diff --git a/.github/workflows/validate-tooling-data.yml b/.github/workflows/validate-tooling-data.yml index 5ec227928..5c3b02877 100644 --- a/.github/workflows/validate-tooling-data.yml +++ b/.github/workflows/validate-tooling-data.yml @@ -47,6 +47,49 @@ jobs: const data = yaml.load(fs.readFileSync(dataPath, 'utf-8')); const ajv = new Ajv({ allErrors: true }); addFormats(ajv); + + ajv.addKeyword({ + keyword: 'caseInsensitiveUnique', + type: 'array', + validate: function (schema, data) { + if (!Array.isArray(data)) return false; + + const languagesSet = new Set(); + const languagesLowercaseSet = new Set(); + data.forEach((tool) => { + if (tool.languages) { + tool.languages.forEach((language) => { + languagesSet.add(language); + languagesLowercaseSet.add(language.toLowerCase()); + }); + } + }); + if (languagesSet.size !== languagesLowercaseSet.size) { + console.error('Duplicate languages found'); + const lowercaseMap = new Map(); + languagesSet.forEach((language) => { + lowercaseMap.set( + language.toLowerCase(), + (lowercaseMap.get(language.toLowerCase()) || 0) + 1 + ); + }); + + lowercaseMap.forEach((value, key) => { + if (value > 1) { + console.log('Duplicate found for:', key); + } + }); + validate.errors = [{ + keyword: 'caseInsensitiveUnique', + message: 'array contains case-insensitive duplicates', + params: { keyword: 'caseInsensitiveUnique' } + }]; + return false; + } + return true; + } + }); + const validate = ajv.compile(schema); const valid = validate(data); if (!valid) { diff --git a/data/tooling-data.schema.json b/data/tooling-data.schema.json index 75d0d435c..29b6ef547 100644 --- a/data/tooling-data.schema.json +++ b/data/tooling-data.schema.json @@ -88,6 +88,7 @@ "languages": { "description": "The language or languages a tool is built in. In the case of a validator, this will likely be the language it is written in. In the case of a conversion or transformation tool, these are the languages that are supported in some capacity.", "type": "array", + "caseInsensitiveUnique": true, "items": { "description": "Individual language name, from the list unless not included.", "type": "string",