Skip to content

Commit a50fc02

Browse files
authored
feat!: improve log sanitization (#550)
1 parent 3be3417 commit a50fc02

File tree

4 files changed

+230
-50
lines changed

4 files changed

+230
-50
lines changed

packages/log/src/log.ts

Lines changed: 59 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
import { LogFormat, LogLevel, PREFIX_DELIMITER } from './log_consts';
2-
import { getFormatFromEnv, getLevelFromEnv, limitDepth } from './log_helpers';
1+
import { LogFormat, LogLevel, PREFERRED_FIELDS, PREFIX_DELIMITER, TRUNCATION_FLAG_KEY, TRUNCATION_SUFFIX } from './log_consts';
2+
import { getFormatFromEnv, getLevelFromEnv, sanitizeData } from './log_helpers';
33
import type { Logger } from './logger';
44
import { LoggerJson } from './logger_json';
55
import { LoggerText } from './logger_text';
@@ -12,12 +12,31 @@ export interface LoggerOptions {
1212
level?: number;
1313
/** Max depth of data object that will be logged. Anything deeper than the limit will be stripped off. */
1414
maxDepth?: number;
15+
/**
16+
* Factor by which the limits (`maxStringLength`, `maxArrayLength`, `maxFields`) will be adjusted at each depth level.
17+
*
18+
* Examples
19+
* - If the factor is 0.5, the limits will be halved at each depth level.
20+
* - If the factor is 1, the limits will be kept the same at each depth level.
21+
* - If the factor is 2, the limits will be doubled at each depth level.
22+
*/
23+
gradualLimitFactor?: number;
1524
/** Max length of the string to be logged. Longer strings will be truncated. */
1625
maxStringLength?: number;
26+
/** Max number of array items to be logged. More items will be omitted. */
27+
maxArrayLength?: number;
28+
/** Max number of fields to be logged. More fields will be omitted. */
29+
maxFields?: number;
30+
/** Ordered list of fields that should be prioritized when logging objects. */
31+
preferredFields?: PropertyKey[];
1732
/** Prefix to be prepended the each logged line. */
1833
prefix?: string | null;
1934
/** Suffix that will be appended the each logged line. */
2035
suffix?: string | null;
36+
/** Suffix that will be appended to truncated strings, objects and arrays. */
37+
truncationSuffix?: string;
38+
/** Key of the flag property that will be added to the object if it is truncated. */
39+
truncationFlagKey?: string;
2140
/**
2241
* Logger implementation to be used. Default one is log.LoggerText to log messages as easily readable
2342
* strings. Optionally you can use `log.LoggerJson` that formats each log line as a JSON.
@@ -27,6 +46,8 @@ export interface LoggerOptions {
2746
data?: Record<string, unknown>,
2847
}
2948

49+
type AdditionalData = Record<string, any> | null;
50+
3051
const getLoggerForFormat = (format: LogFormat): Logger => {
3152
switch (format) {
3253
case LogFormat.JSON:
@@ -40,15 +61,19 @@ const getLoggerForFormat = (format: LogFormat): Logger => {
4061
const getDefaultOptions = () => ({
4162
level: getLevelFromEnv(),
4263
maxDepth: 4,
43-
maxStringLength: 2000,
64+
gradualLimitFactor: 1 / 2, // at each depth level, the limits will be reduced by half
65+
maxStringLength: 1000,
66+
maxArrayLength: 500,
67+
maxFields: 20,
68+
preferredFields: [...PREFERRED_FIELDS],
4469
prefix: null,
4570
suffix: null,
71+
truncationSuffix: TRUNCATION_SUFFIX,
72+
truncationFlagKey: TRUNCATION_FLAG_KEY,
4673
logger: getLoggerForFormat(getFormatFromEnv()),
4774
data: {},
4875
});
4976

50-
type AdditionalData = Record<string, any> | null;
51-
5277
/**
5378
* The log instance enables level aware logging of messages and we advise
5479
* to use it instead of `console.log()` and its aliases in most development
@@ -122,22 +147,47 @@ export class Log {
122147

123148
private options: Required<LoggerOptions>;
124149

150+
/** Maps preferred fields to their index for faster lookup */
151+
private readonly preferredFieldsMap: Record<string, number>;
152+
125153
private readonly warningsOnceLogged: Set<string> = new Set();
126154

127155
constructor(options: Partial<LoggerOptions> = {}) {
128156
this.options = { ...getDefaultOptions(), ...options };
129157

130158
if (!LogLevel[this.options.level]) throw new Error('Options "level" must be one of log.LEVELS enum!');
131159
if (typeof this.options.maxDepth !== 'number') throw new Error('Options "maxDepth" must be a number!');
160+
if (typeof this.options.gradualLimitFactor !== 'number') throw new Error('Options "gradualLimitFactor" must be a number!');
132161
if (typeof this.options.maxStringLength !== 'number') throw new Error('Options "maxStringLength" must be a number!');
162+
if (typeof this.options.maxArrayLength !== 'number') throw new Error('Options "maxArrayLength" must be a number!');
163+
if (typeof this.options.maxFields !== 'number') throw new Error('Options "maxFields" must be a number!');
164+
if (!Array.isArray(this.options.preferredFields)) throw new Error('Options "preferredFields" must be an array!');
133165
if (this.options.prefix && typeof this.options.prefix !== 'string') throw new Error('Options "prefix" must be a string!');
134166
if (this.options.suffix && typeof this.options.suffix !== 'string') throw new Error('Options "suffix" must be a string!');
167+
if (typeof this.options.truncationSuffix !== 'string') throw new Error('Options "truncationSuffix" must be a string!');
168+
if (typeof this.options.truncationFlagKey !== 'string') throw new Error('Options "truncationFlagKey" must be a string!');
135169
if (typeof this.options.logger !== 'object') throw new Error('Options "logger" must be an object!');
136170
if (typeof this.options.data !== 'object') throw new Error('Options "data" must be an object!');
171+
172+
this.preferredFieldsMap = Object.fromEntries(
173+
this.options.preferredFields.map((field, index) => [field, index]),
174+
);
137175
}
138176

139-
private _limitDepth(obj: any) {
140-
return limitDepth(obj, this.options.maxDepth);
177+
private _sanitizeData(obj: any) {
178+
return sanitizeData(
179+
obj,
180+
{
181+
maxDepth: this.options.maxDepth,
182+
gradualLimitFactor: this.options.gradualLimitFactor,
183+
maxStringLength: this.options.maxStringLength,
184+
maxArrayLength: this.options.maxArrayLength,
185+
maxFields: this.options.maxFields,
186+
preferredFieldsMap: this.preferredFieldsMap,
187+
truncationSuffix: this.options.truncationSuffix,
188+
truncationFlagKey: this.options.truncationFlagKey,
189+
},
190+
);
141191
}
142192

143193
/**
@@ -170,8 +220,8 @@ export class Log {
170220
if (level > this.options.level) return;
171221

172222
data = { ...this.options.data, ...data };
173-
data = Reflect.ownKeys(data).length > 0 ? this._limitDepth(data) : undefined;
174-
exception = this._limitDepth(exception);
223+
data = Reflect.ownKeys(data).length > 0 ? this._sanitizeData(data) : undefined;
224+
exception = this._sanitizeData(exception);
175225

176226
this.options.logger.log(level, message, data, exception, {
177227
prefix: this.options.prefix,

packages/log/src/log_consts.ts

Lines changed: 70 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,19 +13,83 @@ export enum LogLevel {
1313
PERF = 6,
1414
}
1515

16+
export const LEVELS = LogLevel;
17+
// Inverse of LEVELS = maps log level to string.
18+
export const LEVEL_TO_STRING = Object.keys(LogLevel).filter((x) => Number.isNaN(+x));
19+
1620
export enum LogFormat {
1721
JSON = 'JSON',
1822
TEXT = 'TEXT',
1923
}
2024

21-
export const PREFIX_DELIMITER = ':';
22-
export const LEVELS = LogLevel;
23-
24-
// Inverse of LOG_LEVELS = maps log level to string.
25-
export const LEVEL_TO_STRING = Object.keys(LogLevel).filter((x) => Number.isNaN(+x));
26-
2725
/**
2826
* A symbol used to mark a limited depth object as having come from an error
2927
* @internal
3028
*/
3129
export const IS_APIFY_LOGGER_EXCEPTION = Symbol('apify.processed_error');
30+
31+
export const PREFIX_DELIMITER = ':';
32+
33+
export const TRUNCATION_FLAG_KEY = '[TRUNCATED]';
34+
export const TRUNCATION_SUFFIX = '...[truncated]';
35+
36+
/** ID fields used in Apify system */
37+
export const PREFERRED_ID_FIELDS = [
38+
'_id',
39+
'id',
40+
'userId',
41+
'impersonatedUserId',
42+
'impersonatingUserId',
43+
'adminUserId',
44+
'actorId',
45+
'actorTaskId',
46+
'taskId',
47+
'buildId',
48+
'buildNumber',
49+
'runId',
50+
] as const;
51+
52+
/** Standard JS Error fields */
53+
export const PREFERRED_ERROR_FIELDS = [
54+
'name',
55+
'message',
56+
'stack',
57+
'cause',
58+
] as const;
59+
60+
/** Standard HTTP / network-related fields */
61+
export const PREFERRED_HTTP_FIELDS = [
62+
'url',
63+
'method',
64+
'code',
65+
'status',
66+
'statusCode',
67+
'statusText',
68+
] as const;
69+
70+
/** API error fields used in Apify system */
71+
export const PREFERRED_API_ERROR_FIELDS = [
72+
'errorCode',
73+
'errorMessage',
74+
'errorResponse',
75+
] as const;
76+
77+
/** Potentially large or nested data fields */
78+
export const PREFERRED_DATA_FIELDS = [
79+
'response',
80+
'request',
81+
'data',
82+
'payload',
83+
'details',
84+
'exception',
85+
'config',
86+
'headers',
87+
] as const;
88+
89+
export const PREFERRED_FIELDS = [
90+
...PREFERRED_ID_FIELDS,
91+
...PREFERRED_ERROR_FIELDS,
92+
...PREFERRED_HTTP_FIELDS,
93+
...PREFERRED_API_ERROR_FIELDS,
94+
...PREFERRED_DATA_FIELDS,
95+
] as const;

packages/log/src/log_helpers.ts

Lines changed: 94 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,18 @@
11
import { APIFY_ENV_VARS } from '@apify/consts';
22

3-
import { IS_APIFY_LOGGER_EXCEPTION, LogFormat, LogLevel } from './log_consts';
3+
import {
4+
IS_APIFY_LOGGER_EXCEPTION,
5+
LogFormat,
6+
LogLevel,
7+
TRUNCATION_FLAG_KEY,
8+
TRUNCATION_SUFFIX,
9+
} from './log_consts';
410

511
/**
612
* Ensures a string is shorter than a specified number of character, and truncates it if not, appending a specific suffix to it.
713
* (copied from utilities package so logger do not have to depend on all of its dependencies)
814
*/
9-
export function truncate(str: string, maxLength: number, suffix = '...[truncated]'): string {
15+
export function truncate(str: string, maxLength: number, suffix = TRUNCATION_SUFFIX): string {
1016
maxLength = Math.floor(maxLength);
1117

1218
// TODO: we should just ignore rest of the suffix...
@@ -53,55 +59,115 @@ export function getFormatFromEnv(): LogFormat {
5359
}
5460
}
5561

62+
type SanitizeDataOptions = {
63+
maxDepth?: number;
64+
gradualLimitFactor?: number;
65+
maxStringLength?: number;
66+
maxArrayLength?: number;
67+
maxFields?: number;
68+
preferredFieldsMap?: Record<PropertyKey, number>;
69+
truncationSuffix?: string;
70+
truncationFlagKey?: string;
71+
};
72+
5673
/**
57-
* Limits given object to given depth and escapes function with [function] string.
74+
* Sanitizes given object based on the given options.
5875
*
5976
* ie. Replaces object's content by '[object]' and array's content
60-
* by '[array]' when the value is nested more than given limit.
77+
* by '[array]' when the value is nested more than given depth limit.
6178
*/
62-
export function limitDepth<T>(record: T, depth: number, maxStringLength?: number): T | undefined {
79+
export function sanitizeData(data: unknown, options: SanitizeDataOptions): unknown {
80+
const {
81+
maxDepth = Infinity,
82+
gradualLimitFactor = 1,
83+
maxStringLength = Infinity,
84+
maxArrayLength = Infinity,
85+
maxFields = Infinity,
86+
preferredFieldsMap = {},
87+
truncationSuffix = TRUNCATION_SUFFIX,
88+
truncationFlagKey = TRUNCATION_FLAG_KEY,
89+
} = options;
90+
6391
// handle common cases quickly
64-
if (typeof record === 'string') {
65-
return maxStringLength && record.length > maxStringLength ? truncate(record, maxStringLength) as unknown as T : record;
92+
if (typeof data === 'string') {
93+
return data.length > maxStringLength
94+
? truncate(data, maxStringLength, truncationSuffix)
95+
: data;
6696
}
6797

68-
if (['number', 'boolean', 'symbol', 'bigint'].includes(typeof record) || record == null || record instanceof Date) {
69-
return record;
98+
if (['number', 'boolean', 'symbol', 'bigint'].includes(typeof data) || data == null || data instanceof Date) {
99+
return data;
70100
}
71101

72102
// WORKAROUND: Error's properties are not iterable, convert it to a simple object and preserve custom properties
73103
// NOTE: _.isError() doesn't work on Match.Error
74-
if (record instanceof Error) {
75-
const { name, message, stack, cause, ...rest } = record;
76-
record = { name, message, stack, cause, ...rest, [IS_APIFY_LOGGER_EXCEPTION]: true } as unknown as T;
104+
if (data instanceof Error) {
105+
const { name, message, stack, cause, ...rest } = data;
106+
data = { name, message, stack, cause, ...rest, [IS_APIFY_LOGGER_EXCEPTION]: true };
77107
}
78108

79-
const nextCall = (rec: T) => limitDepth(rec, depth - 1, maxStringLength);
80-
81-
if (Array.isArray(record)) {
82-
return (depth ? record.map(nextCall) : '[array]') as unknown as T;
109+
const nextCall = (dat: unknown) => sanitizeData(
110+
dat,
111+
{
112+
...options,
113+
maxDepth: maxDepth - 1,
114+
maxStringLength: Math.max(
115+
Math.floor(maxStringLength * gradualLimitFactor),
116+
truncationSuffix.length, // always at least the length of the truncation suffix
117+
),
118+
maxArrayLength: Math.floor(maxArrayLength * gradualLimitFactor),
119+
maxFields: Math.floor(maxFields * gradualLimitFactor),
120+
},
121+
);
122+
123+
if (Array.isArray(data)) {
124+
if (maxDepth <= 0) return '[array]';
125+
126+
const sanitized = data.slice(0, maxArrayLength).map(nextCall);
127+
128+
if (data.length > maxArrayLength) {
129+
sanitized.push(truncationSuffix);
130+
}
131+
132+
return sanitized;
83133
}
84134

85-
if (typeof record === 'object' && record !== null) {
86-
const mapObject = <U extends Record<PropertyKey, any>> (obj: U) => {
87-
const res = {} as U;
88-
Reflect.ownKeys(obj).forEach((key: keyof U) => {
89-
res[key as keyof U] = nextCall(obj[key]) as U[keyof U];
90-
});
91-
return res;
92-
};
135+
if (typeof data === 'object' && data !== null) {
136+
if (maxDepth <= 0) return '[object]';
137+
138+
// Sort preferred fields to the front
139+
const allKeys = Reflect.ownKeys(data);
140+
allKeys.sort((a, b) => {
141+
const aIndex = preferredFieldsMap[String(a)] ?? -1;
142+
const bIndex = preferredFieldsMap[String(b)] ?? -1;
143+
144+
if (aIndex === -1 && bIndex === -1) return 0; // none is preferred
145+
if (aIndex === -1) return 1; // a is not preferred
146+
if (bIndex === -1) return -1; // b is not preferred
147+
return aIndex - bIndex; // both are preferred, sort by index
148+
});
149+
150+
// Sanitize only up to maxFields fields (keeping preferred ones first)
151+
const sanitized: Record<PropertyKey, unknown> = {};
152+
allKeys
153+
.slice(0, maxFields)
154+
.forEach((key) => { sanitized[key] = nextCall(data[key as keyof typeof data]); });
155+
156+
if (allKeys.length > maxFields) {
157+
sanitized[truncationFlagKey] = true;
158+
}
93159

94-
return depth ? mapObject(record) : '[object]' as unknown as T;
160+
return sanitized;
95161
}
96162

97163
// Replaces all function with [function] string
98-
if (typeof record === 'function') {
99-
return '[function]' as unknown as T;
164+
if (typeof data === 'function') {
165+
return '[function]';
100166
}
101167

102168
// this shouldn't happen
103169
// eslint-disable-next-line no-console
104-
console.log(`WARNING: Object cannot be logged: ${record}`);
170+
console.log(`WARNING: Object cannot be logged: ${data}`);
105171

106172
return undefined;
107173
}

0 commit comments

Comments
 (0)