@@ -185,17 +185,12 @@ public static function platformNewLines(string $s): string
185
185
*/
186
186
public static function toAscii (string $ s ): string
187
187
{
188
- $ iconv = defined ('ICONV_IMPL ' ) ? trim (ICONV_IMPL , '" \'' ) : null ;
189
- static $ transliterator = null ;
190
- if ($ transliterator === null ) {
191
- if (class_exists ('Transliterator ' , false )) {
192
- $ transliterator = \Transliterator::create ('Any-Latin; Latin-ASCII ' );
193
- } else {
194
- trigger_error (__METHOD__ . "(): it is recommended to enable PHP extensions 'intl'. " , E_USER_NOTICE );
195
- $ transliterator = false ;
196
- }
188
+ if (!extension_loaded ('intl ' )) {
189
+ throw new Nette \NotSupportedException (__METHOD__ . '() requires INTL extension that is not loaded. ' );
197
190
}
198
191
192
+ $ iconv = defined ('ICONV_IMPL ' ) ? trim (ICONV_IMPL , '" \'' ) : null ;
193
+
199
194
// remove control characters and check UTF-8 validity
200
195
$ s = self ::pcre ('preg_replace ' , ['#[^\x09\x0A\x0D\x20-\x7E\xA0-\x{2FF}\x{370}-\x{10FFFF}]#u ' , '' , $ s ]);
201
196
@@ -205,39 +200,15 @@ public static function toAscii(string $s): string
205
200
$ s = strtr ($ s , ["\u{AE}" => '(R) ' , "\u{A9}" => '(c) ' , "\u{2026}" => '... ' , "\u{AB}" => '<< ' , "\u{BB}" => '>> ' , "\u{A3}" => 'lb ' , "\u{A5}" => 'yen ' , "\u{B2}" => '^2 ' , "\u{B3}" => '^3 ' , "\u{B5}" => 'u ' , "\u{B9}" => '^1 ' , "\u{BA}" => 'o ' , "\u{BF}" => '? ' , "\u{2CA}" => "' " , "\u{2CD}" => '_ ' , "\u{2DD}" => '" ' , "\u{1FEF}" => '' , "\u{20AC}" => 'EUR ' , "\u{2122}" => 'TM ' , "\u{212E}" => 'e ' , "\u{2190}" => '<- ' , "\u{2191}" => '^ ' , "\u{2192}" => '-> ' , "\u{2193}" => 'V ' , "\u{2194}" => '<-> ' ]); // ® © … « » £ ¥ ² ³ µ ¹ º ¿ ˊ ˍ ˝ ` € ™ ℮ ← ↑ → ↓ ↔
206
201
}
207
202
208
- if ($ transliterator ) {
209
- $ s = $ transliterator ->transliterate ($ s );
210
- // use iconv because The transliterator leaves some characters out of ASCII, eg → ʾ
211
- if ($ iconv === 'glibc ' ) {
212
- $ s = strtr ($ s , '? ' , "\x01" ); // temporarily hide ? to distinguish them from the garbage that iconv creates
213
- $ s = iconv ('UTF-8 ' , 'ASCII//TRANSLIT//IGNORE ' , $ s );
214
- $ s = str_replace (['? ' , "\x01" ], ['' , '? ' ], $ s ); // remove garbage and restore ? characters
215
- } elseif ($ iconv === 'libiconv ' ) {
216
- $ s = iconv ('UTF-8 ' , 'ASCII//TRANSLIT//IGNORE ' , $ s );
217
- } else { // null or 'unknown' (#216)
218
- $ s = self ::pcre ('preg_replace ' , ['#[^\x00-\x7F]++# ' , '' , $ s ]); // remove non-ascii chars
219
- }
220
- } elseif ($ iconv === 'glibc ' || $ iconv === 'libiconv ' ) {
221
- // temporarily hide these characters to distinguish them from the garbage that iconv creates
222
- $ s = strtr ($ s , '` \'"^~? ' , "\x01\x02\x03\x04\x05\x06" );
223
- if ($ iconv === 'glibc ' ) {
224
- // glibc implementation is very limited. transliterate into Windows-1250 and then into ASCII, so most Eastern European characters are preserved
225
- $ s = iconv ('UTF-8 ' , 'WINDOWS-1250//TRANSLIT//IGNORE ' , $ s );
226
- $ s = strtr (
227
- $ s ,
228
- "\xa5\xa3\xbc\x8c\xa7\x8a\xaa\x8d\x8f\x8e\xaf\xb9\xb3\xbe\x9c\x9a\xba\x9d\x9f\x9e\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x96\xa0\x8b\x97\x9b\xa6\xad\xb7" ,
229
- 'ALLSSSSTZZZallssstzzzRAAAALCCCEEEEIIDDNNOOOOxRUUUUYTsraaaalccceeeeiiddnnooooruuuuyt- <->|-. ' ,
230
- );
231
- $ s = self ::pcre ('preg_replace ' , ['#[^\x00-\x7F]++# ' , '' , $ s ]);
232
- } else {
233
- $ s = iconv ('UTF-8 ' , 'ASCII//TRANSLIT//IGNORE ' , $ s );
234
- }
235
-
236
- // remove garbage that iconv creates during transliteration (eg Ý -> Y')
237
- $ s = str_replace (['` ' , "' " , '" ' , '^ ' , '~ ' , '? ' ], '' , $ s );
238
- // restore temporarily hidden characters
239
- $ s = strtr ($ s , "\x01\x02\x03\x04\x05\x06" , '` \'"^~? ' );
240
- } else {
203
+ $ s = \Transliterator::create ('Any-Latin; Latin-ASCII ' )->transliterate ($ s );
204
+ // use iconv because The transliterator leaves some characters out of ASCII, eg → ʾ
205
+ if ($ iconv === 'glibc ' ) {
206
+ $ s = strtr ($ s , '? ' , "\x01" ); // temporarily hide ? to distinguish them from the garbage that iconv creates
207
+ $ s = iconv ('UTF-8 ' , 'ASCII//TRANSLIT//IGNORE ' , $ s );
208
+ $ s = str_replace (['? ' , "\x01" ], ['' , '? ' ], $ s ); // remove garbage and restore ? characters
209
+ } elseif ($ iconv === 'libiconv ' ) {
210
+ $ s = iconv ('UTF-8 ' , 'ASCII//TRANSLIT//IGNORE ' , $ s );
211
+ } else { // null or 'unknown' (#216)
241
212
$ s = self ::pcre ('preg_replace ' , ['#[^\x00-\x7F]++# ' , '' , $ s ]); // remove non-ascii chars
242
213
}
243
214
0 commit comments