Skip to content

Commit 35486e9

Browse files
committed
Improve Chinese character support with robust Unicode property union
Replace hard-coded Unicode range with comprehensive Unicode property approach to fix incomplete Han character coverage in MCP tool name formatting. Changes: - Replace \u4e00-\u9fa5 range with union of Unicode Script and Block properties - Use \p{IsHan} + \p{InCJK_Unified_Ideographs} + \p{InCJK_Compatibility_Ideographs} - Fix boundary case where \u9fff was incorrectly excluded by script-only approach - Add comprehensive test coverage for all Han character blocks and edge cases Technical details: - Addresses Unicode Script vs Block classification differences across JDK versions - \u9fff (鿿) is in CJK Unified Ideographs block but not Han script in some JDKs - Union approach ensures complete coverage while maintaining exclusion of other scripts - Future-proof solution that automatically includes new Han characters in Unicode updates Test coverage added: - CJK Unified Ideographs boundary cases (\u4e00, \u9fff) - CJK Extension A characters (\u3400) - CJK Compatibility Ideographs (\uf900) - Mixed character block scenarios - Proper exclusion verification for non-Han scripts (Hiragana, Emoji, etc.) Fixes incomplete Chinese character support while maintaining backward compatibility and minimal risk profile of the original change. Signed-off-by: shishuiwuhen2009 Signed-off-by: Mark Pollack <[email protected]> Auto-cherry-pick to 1.0.x Fixes #4192
1 parent c9ad1c7 commit 35486e9

File tree

2 files changed

+119
-2
lines changed

2 files changed

+119
-2
lines changed

mcp/common/src/main/java/org/springframework/ai/mcp/McpToolUtils.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,8 +80,10 @@ public static String prefixedToolName(String prefix, String toolName) {
8080
String input = prefix + "_" + toolName;
8181

8282
// Replace any character that isn't alphanumeric, underscore, or hyphen with
83-
// concatenation
84-
String formatted = input.replaceAll("[^a-zA-Z0-9_-]", "");
83+
// concatenation. Support Han script + CJK blocks for complete Chinese character
84+
// coverage
85+
String formatted = input
86+
.replaceAll("[^\\p{IsHan}\\p{InCJK_Unified_Ideographs}\\p{InCJK_Compatibility_Ideographs}a-zA-Z0-9_-]", "");
8587

8688
formatted = formatted.replaceAll("-", "_");
8789

mcp/common/src/test/java/org/springframework/ai/mcp/ToolUtilsTests.java

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,121 @@ void prefixedToolNameShouldThrowExceptionForNullOrEmptyInputs() {
9393
.hasMessageContaining("Prefix or toolName cannot be null or empty");
9494
}
9595

96+
@Test
97+
void prefixedToolNameShouldSupportChineseCharacters() {
98+
String result = McpToolUtils.prefixedToolName("前缀", "工具名称");
99+
assertThat(result).isEqualTo("前缀_工具名称");
100+
}
101+
102+
@Test
103+
void prefixedToolNameShouldSupportMixedChineseAndEnglish() {
104+
String result = McpToolUtils.prefixedToolName("prefix前缀", "tool工具Name");
105+
assertThat(result).isEqualTo("prefix前缀_tool工具Name");
106+
}
107+
108+
@Test
109+
void prefixedToolNameShouldRemoveSpecialCharactersButKeepChinese() {
110+
String result = McpToolUtils.prefixedToolName("pre@fix前缀", "tool#工具$name");
111+
assertThat(result).isEqualTo("prefix前缀_tool工具name");
112+
}
113+
114+
@Test
115+
void prefixedToolNameShouldHandleChineseWithHyphens() {
116+
String result = McpToolUtils.prefixedToolName("前缀-test", "工具-name");
117+
assertThat(result).isEqualTo("前缀_test_工具_name");
118+
}
119+
120+
@Test
121+
void prefixedToolNameShouldTruncateLongChineseStrings() {
122+
// Create a string with Chinese characters that exceeds 64 characters
123+
String longPrefix = "前缀".repeat(20); // 40 Chinese characters
124+
String longToolName = "工具".repeat(20); // 40 Chinese characters
125+
String result = McpToolUtils.prefixedToolName(longPrefix, longToolName);
126+
assertThat(result).hasSize(64);
127+
assertThat(result).endsWith("_" + "工具".repeat(20));
128+
}
129+
130+
@Test
131+
void prefixedToolNameShouldHandleChinesePunctuation() {
132+
String result = McpToolUtils.prefixedToolName("前缀,测试", "工具。名称!");
133+
assertThat(result).isEqualTo("前缀测试_工具名称");
134+
}
135+
136+
@Test
137+
void prefixedToolNameShouldHandleUnicodeBoundaries() {
138+
// Test characters at the boundaries of the Chinese Unicode range
139+
String result1 = McpToolUtils.prefixedToolName("prefix", "tool\u4e00"); // First
140+
// Chinese
141+
// character
142+
assertThat(result1).isEqualTo("prefix_tool\u4e00");
143+
144+
String result2 = McpToolUtils.prefixedToolName("prefix", "tool\u9fa5"); // Last
145+
// Chinese
146+
// character
147+
assertThat(result2).isEqualTo("prefix_tool\u9fa5");
148+
}
149+
150+
@Test
151+
void prefixedToolNameShouldExcludeNonChineseUnicodeCharacters() {
152+
// Test with Japanese Hiragana (outside Chinese range)
153+
String result1 = McpToolUtils.prefixedToolName("prefix", "toolあ"); // Japanese
154+
// Hiragana
155+
assertThat(result1).isEqualTo("prefix_tool");
156+
157+
// Test with Korean characters (outside Chinese range)
158+
String result2 = McpToolUtils.prefixedToolName("prefix", "tool한"); // Korean
159+
// character
160+
assertThat(result2).isEqualTo("prefix_tool");
161+
162+
// Test with Arabic characters (outside Chinese range)
163+
String result3 = McpToolUtils.prefixedToolName("prefix", "toolع"); // Arabic
164+
// character
165+
assertThat(result3).isEqualTo("prefix_tool");
166+
}
167+
168+
@Test
169+
void prefixedToolNameShouldHandleEmojisAndSymbols() {
170+
// Emojis and symbols should be removed
171+
String result = McpToolUtils.prefixedToolName("prefix🚀", "tool工具😀name");
172+
assertThat(result).isEqualTo("prefix_tool工具name");
173+
}
174+
175+
@Test
176+
void prefixedToolNameShouldPreserveNumbersWithChinese() {
177+
String result = McpToolUtils.prefixedToolName("前缀123", "工具456名称");
178+
assertThat(result).isEqualTo("前缀123_工具456名称");
179+
}
180+
181+
@Test
182+
void prefixedToolNameShouldSupportExtendedHanCharacters() {
183+
// Test boundary character at end of CJK Unified Ideographs block
184+
String result1 = McpToolUtils.prefixedToolName("prefix", "tool\u9fff"); // CJK
185+
// block
186+
// boundary
187+
assertThat(result1).isEqualTo("prefix_tool\u9fff");
188+
189+
// Test CJK Extension A characters
190+
String result2 = McpToolUtils.prefixedToolName("prefix", "tool\u3400"); // CJK Ext
191+
// A
192+
assertThat(result2).isEqualTo("prefix_tool\u3400");
193+
}
194+
195+
@Test
196+
void prefixedToolNameShouldSupportCompatibilityIdeographs() {
197+
// Test CJK Compatibility Ideographs
198+
String result = McpToolUtils.prefixedToolName("prefix", "tool\uf900"); // Compatibility
199+
// ideograph
200+
assertThat(result).isEqualTo("prefix_tool\uf900");
201+
}
202+
203+
@Test
204+
void prefixedToolNameShouldHandleAllHanScriptCharacters() {
205+
// Mix of different Han character blocks: Extension A + CJK Unified +
206+
// Compatibility
207+
String result = McpToolUtils.prefixedToolName("前缀\u3400", "工具\u9fff名称\uf900");
208+
assertThat(result).isEqualTo("前缀\u3400_工具\u9fff名称\uf900");
209+
}
210+
96211
@Test
97212
void constructorShouldBePrivate() throws Exception {
98213
Constructor<McpToolUtils> constructor = McpToolUtils.class.getDeclaredConstructor();

0 commit comments

Comments
 (0)