Skip to content
89 changes: 76 additions & 13 deletions packages/input_schema/src/input_schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,11 @@ export function parseAjvError(
let fieldKey: string;
let message: string;

// remove leading and trailing slashes and replace remaining slashes with dots
const cleanPropertyName = (name: string) => {
return name.replace(/^\/|\/$/g, '').replace(/\//g, '.');
};
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is needed, because now error can be related to sub-properties and we want to show nice path.


// If error is with keyword type, it means that type of input is incorrect
// this can mean that provided value is null
if (error.keyword === 'type') {
Expand All @@ -48,20 +53,23 @@ export function parseAjvError(
}
message = m('inputSchema.validation.generic', { rootName, fieldKey, message: error.message });
} else if (error.keyword === 'required') {
fieldKey = error.params.missingProperty;
fieldKey = cleanPropertyName(`${error.instancePath}/${error.params.missingProperty}`);
message = m('inputSchema.validation.required', { rootName, fieldKey });
} else if (error.keyword === 'additionalProperties') {
fieldKey = error.params.additionalProperty;
fieldKey = cleanPropertyName(`${error.instancePath}/${error.params.additionalProperty}`);
message = m('inputSchema.validation.additionalProperty', { rootName, fieldKey });
} else if (error.keyword === 'unevaluatedProperties') {
fieldKey = cleanPropertyName(`${error.instancePath}/${error.params.unevaluatedProperty}`);
message = m('inputSchema.validation.additionalProperty', { rootName, fieldKey });
} else if (error.keyword === 'enum') {
fieldKey = error.instancePath.split('/').pop()!;
fieldKey = cleanPropertyName(error.instancePath);
const errorMessage = `${error.message}: "${error.params.allowedValues.join('", "')}"`;
message = m('inputSchema.validation.generic', { rootName, fieldKey, message: errorMessage });
} else if (error.keyword === 'const') {
fieldKey = error.instancePath.split('/').pop()!;
fieldKey = cleanPropertyName(error.instancePath);
message = m('inputSchema.validation.generic', { rootName, fieldKey, message: error.message });
} else {
fieldKey = error.instancePath.split('/').pop()!;
fieldKey = cleanPropertyName(error.instancePath);
message = m('inputSchema.validation.generic', { rootName, fieldKey, message: error.message });
}

Expand Down Expand Up @@ -92,11 +100,33 @@ function validateBasicStructure(validator: Ajv, obj: Record<string, unknown>): a

/**
* Validates particular field against it's schema.
* @param validator An instance of AJV validator (must support draft 2019-09).
* @param fieldSchema Schema of the field to validate.
* @param fieldKey Key of the field in the input schema.
* @param subField If true, the field is a sub-field of another field, so we need to skip some definitions.
*/
function validateField(validator: Ajv, fieldSchema: Record<string, unknown>, fieldKey: string): asserts fieldSchema is FieldDefinition {
function validateField(validator: Ajv, fieldSchema: Record<string, unknown>, fieldKey: string, subField = false): asserts fieldSchema is FieldDefinition {
const matchingDefinitions = Object
.values<any>(definitions) // cast as any, as the code in first branch seems to be invalid
.filter((definition) => {
// Because the definitions contains not only the root properties definitions, but also sub-schema definitions
// and utility definitions, we need to filter them out and validate only against the appropriate ones.
// We do this by checking prefix of the definition title (Utils: or Sub-schema:)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I haven't found better approach then grouping the definitions by its title, it's not that robust but works well.
Another approach would be to define definitions, sub_definitions and util_definitions but that's not valid JSON Schema keywords and wouldn't pass Ajv strict validation.
Or put it as sub objects like definitions.root, definitions.sub, definitions.utils, but haven't test it.

But since this is used just here I think it's ok to go with this solution.


if (definition.title.startsWith('Utils:')) {
// Utility definitions are not used for property validation.
// They are used for their internal logic. Filter them out.
return false;
}
if (!subField && definition.title.startsWith('Sub-schema:')) {
// This is a sub-schema definition, but we are validating a root field, so we skip it.
return false;
}
if (subField && !definition.title.startsWith('Sub-schema:')) {
// This is a normal definition, but we are validating a sub-field, so we skip it.
return false;
}

return definition.properties.type.enum
// This is a normal case where fieldSchema.type can be only one possible value matching definition.properties.type.enum.0
? definition.properties.type.enum[0] === fieldSchema.type
Expand All @@ -110,9 +140,19 @@ function validateField(validator: Ajv, fieldSchema: Record<string, unknown>, fie
throw new Error(`Input schema is not valid (${errorMessage})`);
}

// We are validating a field schema against one of the definitions, but one definition can reference other definitions.
// So this basically creates a new JSON Schema with a picked definition at root and puts all definitions from the `schema.json`
// into the `definitions` property of this final schema.
const enhanceDefinition = (definition: object) => {
return {
...definition,
definitions,
};
};

// If there is only one matching then we are done and simply compare it.
if (matchingDefinitions.length === 1) {
validateAgainstSchemaOrThrow(validator, fieldSchema, matchingDefinitions[0], `schema.properties.${fieldKey}`);
validateAgainstSchemaOrThrow(validator, fieldSchema, enhanceDefinition(matchingDefinitions[0]), `schema.properties.${fieldKey}`);
return;
}

Expand All @@ -121,30 +161,49 @@ function validateField(validator: Ajv, fieldSchema: Record<string, unknown>, fie
if ((fieldSchema as StringFieldDefinition).enum) {
const definition = matchingDefinitions.filter((item) => !!item.properties.enum).pop();
if (!definition) throw new Error('Input schema validation failed to find "enum property" definition');
validateAgainstSchemaOrThrow(validator, fieldSchema, definition, `schema.properties.${fieldKey}.enum`);
validateAgainstSchemaOrThrow(validator, fieldSchema, enhanceDefinition(definition), `schema.properties.${fieldKey}.enum`);
return;
}
// If the definition contains "resourceType" property then it's resource type.
if ((fieldSchema as CommonResourceFieldDefinition<unknown>).resourceType) {
const definition = matchingDefinitions.filter((item) => !!item.properties.resourceType).pop();
if (!definition) throw new Error('Input schema validation failed to find "resource property" definition');
validateAgainstSchemaOrThrow(validator, fieldSchema, definition, `schema.properties.${fieldKey}`);
validateAgainstSchemaOrThrow(validator, fieldSchema, enhanceDefinition(definition), `schema.properties.${fieldKey}`);
return;
}
// Otherwise we use the other definition.
const definition = matchingDefinitions.filter((item) => !item.properties.enum && !item.properties.resourceType).pop();
if (!definition) throw new Error('Input schema validation failed to find other than "enum property" definition');

validateAgainstSchemaOrThrow(validator, fieldSchema, definition, `schema.properties.${fieldKey}`);
validateAgainstSchemaOrThrow(validator, fieldSchema, enhanceDefinition(definition), `schema.properties.${fieldKey}`);
}

/**
* Validates all subfields (and their subfields) of a given field schema.
*/
function validateSubFields(validator: Ajv, fieldSchema: InputSchemaBaseChecked, fieldKey: string) {
Object.entries(fieldSchema.properties).forEach(([subFieldKey, subFieldSchema]) => {
// The sub-properties has to be validated first, so we got more relevant error messages.
if ((subFieldSchema as any).properties) {
// If the field has sub-fields, we need to validate them as well.
validateSubFields(validator, subFieldSchema as any as InputSchemaBaseChecked, `${fieldKey}.${subFieldKey}`);
}
validateField(validator, subFieldSchema, `${fieldKey}.${subFieldKey}`, true);
});
}

/**
* Validates all properties in the input schema
*/
function validateProperties(inputSchema: InputSchemaBaseChecked, validator: Ajv): asserts inputSchema is InputSchema {
Object.entries(inputSchema.properties).forEach(([fieldKey, fieldSchema]) => (
validateField(validator, fieldSchema, fieldKey)),
);
Object.entries(inputSchema.properties).forEach(([fieldKey, fieldSchema]) => {
// The sub-properties has to be validated first, so we got more relevant error messages.
if ((fieldSchema as any).properties) {
// If the field has sub-fields, we need to validate them as well.
validateSubFields(validator, fieldSchema as any as InputSchemaBaseChecked, fieldKey);
}
validateField(validator, fieldSchema, fieldKey);
});
}

/**
Expand All @@ -168,6 +227,10 @@ export function validateExistenceOfRequiredFields(inputSchema: InputSchema) {
* then checks that all required fields are present and finally checks fully against the whole schema.
*
* This way we get the most accurate error message for user.
*
* @param validator An instance of AJV validator. Important: The JSON Schema that the passed input schema is validated against
* is using features from JSON Schema 2019 draft, so the AJV instance must support it.
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This PR already more than doubled it's length, but without utilising the unevaluatedProperties (https://json-schema.org/understanding-json-schema/reference/object#unevaluatedproperties) from draft 2019-09 it would be much more with lot of duplicated definitions (there is space to update existing definitions and replace additionalProperties with unevaluatedProperties and make the schema smaller and cleaner).

But to support this the Ajv validator instance passed to validateInputSchema function has to support this version, so we need update the Ajv import in all places that are calling this function from:

import Ajv from 'ajv'

to

import Ajv from 'ajv/dist/2019'

It should be these places (we should change it together with bumping version of @apify/input_schema once this PR is merged):

  • apify/apify-worker - when validating schema during build
  • apify/apify-cli - validate-schema command
  • apify/apify-core - just admin input schema playground

Note: the draft 2019-09 is used only to validate input-schema against our meta JSON Schema, but input-schema itself has only features from draft 07 so when validating input against input-schema we don't need to change anything.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we maybe add createAjv or some other mechanism that would make sure that the correct schema version is used every time, without the need to check every call of the function? Or maybe there is some property on the Ajv instance that we could check?

Copy link
Member Author

@mfori mfori Jul 15, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've added check (ensureAjvSupportsDraft2019) that would validate it or throw exception. Do you think it's enough?

* @param inputSchema Input schema to validate.
*/
export function validateInputSchema(validator: Ajv, inputSchema: Record<string, unknown>): asserts inputSchema is InputSchema {
// First validate just basic structure without fields.
Expand Down
Loading