Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 25 additions & 12 deletions packages/host-metrics/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,12 @@ const hostMetrics = new HostMetrics({ meterProvider });
hostMetrics.start();
```

## Configuration

| Option | Type | Description |
| -------------- | ---------- | ----------- |
| `metricGroups` | `string[]` | Optionally specify zero or more groups of metrics to collect. This package can collect many metrics. They are grouped by metric name prefix (see the "Semantic Conventions" section below). If this option is specified, only metrics from the named groups will be collected. For example, `metricGroups: ['process.cpu', 'process.memory']` will limit collection to just those 3 metrics. |

## Semantic Conventions

This package uses Semantic Conventions [Version 1.25.0](https://github.com/open-telemetry/semantic-conventions/tree/v1.25.0/docs/system).
Expand All @@ -48,18 +54,25 @@ Ref: [opentelemetry-js/issues/4235](https://github.com/open-telemetry/openteleme

Metrics collected:

| Metric | Short Description |
| --------------------------- | --------------------------------------------------------- |
| `system.cpu.time` | Seconds each logical CPU spent on each mode |
| `system.cpu.utilization` | CPU usage time (0-1) |
| `system.memory.usage` | Reports memory in use by state |
| `system.memory.utilization` | Memory usage (0-1) |
| `system.network.dropped` | Count of packets that are dropped |
| `system.network.errors` | Count of network errors detected |
| `system.network.io` | Network flow direction |
| `process.cpu.time` | Total CPU seconds |
| `process.cpu.utilization` | Difference in process.cpu.time since the last measurement |
| `process.memory.usage` | The amount of physical memory in use |
| Metric | Short Description |
| ----------------------------- | --------------------------------------------------------- |
| **Group `system.cpu`** | |
| `system.cpu.time` | Seconds each logical CPU spent on each mode |
| `system.cpu.utilization` | CPU usage time (0-1) |
| **Group `system.memory`** | |
| `system.memory.usage` | Reports memory in use by state |
| `system.memory.utilization` | Memory usage (0-1) |
| **Group `system.network`** | |
| `system.network.dropped` | Count of packets that are dropped |
| `system.network.errors` | Count of network errors detected |
| `system.network.io` | Network flow direction |
| **Group `process.cpu`** | |
| `process.cpu.time` | Total CPU seconds |
| `process.cpu.utilization` | Difference in process.cpu.time since the last measurement |
| **Group `process.memory`** | |
| `process.memory.usage` | The amount of physical memory in use |

Note: the "Group" names are groupings used by the `metricGroups` configuration option.

Attributes collected:

Expand Down
6 changes: 4 additions & 2 deletions packages/host-metrics/src/BaseMetrics.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
* limitations under the License.
*/

import { Meter, MeterProvider, diag, metrics } from '@opentelemetry/api';
import { Meter, MeterProvider, metrics } from '@opentelemetry/api';

/** @knipignore */
import { PACKAGE_NAME, PACKAGE_VERSION } from './version';
Expand All @@ -27,6 +27,7 @@ export interface MetricsCollectorConfig {
meterProvider?: MeterProvider;
// Name of component
name?: string;
metricGroups?: string[];
}

const DEFAULT_NAME = PACKAGE_NAME;
Expand All @@ -35,16 +36,17 @@ const DEFAULT_NAME = PACKAGE_NAME;
* Base Class for metrics
*/
export abstract class BaseMetrics {
protected _logger = diag;
protected _meter: Meter;
private _name: string;
protected _metricGroups: Array<string> | undefined;

constructor(config?: MetricsCollectorConfig) {
// Do not use `??` operator to allow falling back to default when the
// specified name is an empty string.
this._name = config?.name || DEFAULT_NAME;
const meterProvider = config?.meterProvider ?? metrics.getMeterProvider();
this._meter = meterProvider.getMeter(this._name, PACKAGE_VERSION);
this._metricGroups = config?.metricGroups;
}

/**
Expand Down
200 changes: 114 additions & 86 deletions packages/host-metrics/src/metric.ts
Original file line number Diff line number Diff line change
Expand Up @@ -205,102 +205,130 @@ export class HostMetrics extends BaseMetrics {
* Creates metrics
*/
protected _createMetrics(): void {
this._cpuTime = this._meter.createObservableCounter(
METRIC_SYSTEM_CPU_TIME,
{
description: 'Cpu time in seconds',
unit: 's',
}
);
this._cpuUtilization = this._meter.createObservableGauge(
METRIC_SYSTEM_CPU_UTILIZATION,
{
description: 'Cpu usage time 0-1',
}
);
const observables = [];

this._memoryUsage = this._meter.createObservableGauge(
METRIC_SYSTEM_MEMORY_USAGE,
{
description: 'Memory usage in bytes',
}
);
this._memoryUtilization = this._meter.createObservableGauge(
METRIC_SYSTEM_MEMORY_UTILIZATION,
{
description: 'Memory usage 0-1',
}
);
const systemCpuGroupEnabled =
!this._metricGroups || this._metricGroups.includes('system.cpu');
const systemMemoryGroupEnabled =
!this._metricGroups || this._metricGroups.includes('system.memory');
const systemNetworkGroupEnabled =
!this._metricGroups || this._metricGroups.includes('system.network');
const processCpuGroupEnabled =
!this._metricGroups || this._metricGroups.includes('process.cpu');
const processMemoryGroupEnabled =
!this._metricGroups || this._metricGroups.includes('process.memory');

this._networkDropped = this._meter.createObservableCounter(
// There is no semconv pkg export for this in v1.37.0 because
// https://github.com/open-telemetry/semantic-conventions/issues/2828.
// TODO: update to `METRIC_SYSTEM_NETWORK_PACKET_DROPPED` (breaking change)
'system.network.dropped',
{
description: 'Network dropped packets',
}
);
this._networkErrors = this._meter.createObservableCounter(
METRIC_SYSTEM_NETWORK_ERRORS,
{
description: 'Network errors counter',
}
);
this._networkIo = this._meter.createObservableCounter(
METRIC_SYSTEM_NETWORK_IO,
{
description: 'Network transmit and received bytes',
}
);
if (systemCpuGroupEnabled) {
this._cpuTime = this._meter.createObservableCounter(
METRIC_SYSTEM_CPU_TIME,
{
description: 'Cpu time in seconds',
unit: 's',
}
);
observables.push(this._cpuTime);
this._cpuUtilization = this._meter.createObservableGauge(
METRIC_SYSTEM_CPU_UTILIZATION,
{
description: 'Cpu usage time 0-1',
}
);
observables.push(this._cpuUtilization);
}

this._processCpuTime = this._meter.createObservableCounter(
METRIC_PROCESS_CPU_TIME,
{
description: 'Process Cpu time in seconds',
unit: 's',
}
);
this._processCpuUtilization = this._meter.createObservableGauge(
METRIC_PROCESS_CPU_UTILIZATION,
{
description: 'Process Cpu usage time 0-1',
}
);
this._processMemoryUsage = this._meter.createObservableGauge(
METRIC_PROCESS_MEMORY_USAGE,
{
description: 'Process Memory usage in bytes',
}
);
if (systemMemoryGroupEnabled) {
this._memoryUsage = this._meter.createObservableGauge(
METRIC_SYSTEM_MEMORY_USAGE,
{
description: 'Memory usage in bytes',
}
);
observables.push(this._memoryUsage);
this._memoryUtilization = this._meter.createObservableGauge(
METRIC_SYSTEM_MEMORY_UTILIZATION,
{
description: 'Memory usage 0-1',
}
);
observables.push(this._memoryUtilization);
}

this._meter.addBatchObservableCallback(
async observableResult => {
const cpuUsages = getCpuUsageData();
const memoryUsages = getMemoryData();
const processCpuUsages = getProcessCpuUsageData();
const processMemoryUsages = getProcessMemoryData();
const networkData = await getNetworkData();
if (systemNetworkGroupEnabled) {
this._networkDropped = this._meter.createObservableCounter(
// There is no semconv pkg export for this in v1.37.0 because
// https://github.com/open-telemetry/semantic-conventions/issues/2828.
// TODO: update to `METRIC_SYSTEM_NETWORK_PACKET_DROPPED` (breaking change)
'system.network.dropped',
{
description: 'Network dropped packets',
}
);
observables.push(this._networkDropped);
this._networkErrors = this._meter.createObservableCounter(
METRIC_SYSTEM_NETWORK_ERRORS,
{
description: 'Network errors counter',
}
);
observables.push(this._networkErrors);
this._networkIo = this._meter.createObservableCounter(
METRIC_SYSTEM_NETWORK_IO,
{
description: 'Network transmit and received bytes',
}
);
observables.push(this._networkIo);
}

if (processCpuGroupEnabled) {
this._processCpuTime = this._meter.createObservableCounter(
METRIC_PROCESS_CPU_TIME,
{
description: 'Process Cpu time in seconds',
unit: 's',
}
);
observables.push(this._processCpuTime);
this._processCpuUtilization = this._meter.createObservableGauge(
METRIC_PROCESS_CPU_UTILIZATION,
{
description: 'Process Cpu usage time 0-1',
}
);
observables.push(this._processCpuUtilization);
}
if (processMemoryGroupEnabled) {
this._processMemoryUsage = this._meter.createObservableGauge(
METRIC_PROCESS_MEMORY_USAGE,
{
description: 'Process Memory usage in bytes',
}
);
observables.push(this._processMemoryUsage);
}

this._meter.addBatchObservableCallback(async observableResult => {
if (systemCpuGroupEnabled) {
const cpuUsages = getCpuUsageData();
this._batchUpdateCpuUsages(observableResult, cpuUsages);
}
if (systemMemoryGroupEnabled) {
const memoryUsages = getMemoryData();
this._batchUpdateMemUsages(observableResult, memoryUsages);
}
if (processCpuGroupEnabled) {
const processCpuUsages = getProcessCpuUsageData();
this._batchUpdateProcessCpuUsages(observableResult, processCpuUsages);
}
if (processMemoryGroupEnabled) {
const processMemoryUsages = getProcessMemoryData();
this._batchUpdateProcessMemUsage(observableResult, processMemoryUsages);
}
if (systemNetworkGroupEnabled) {
const networkData = await getNetworkData();
this._batchUpdateNetworkData(observableResult, networkData);
},
[
this._cpuTime,
this._cpuUtilization,
this._memoryUsage,
this._memoryUtilization,
this._processCpuTime,
this._processCpuUtilization,
this._processMemoryUsage,
this._networkDropped,
this._networkErrors,
this._networkIo,
]
);
}
}, observables);
}

/**
Expand Down
Loading