Skip to content

Commit f38a458

Browse files
committed
Strings::webalize() requires INTL extension
1 parent e9e80aa commit f38a458

File tree

1 file changed

+13
-42
lines changed

1 file changed

+13
-42
lines changed

src/Utils/Strings.php

Lines changed: 13 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -185,17 +185,12 @@ public static function platformNewLines(string $s): string
185185
*/
186186
public static function toAscii(string $s): string
187187
{
188-
$iconv = defined('ICONV_IMPL') ? trim(ICONV_IMPL, '"\'') : null;
189-
static $transliterator = null;
190-
if ($transliterator === null) {
191-
if (class_exists('Transliterator', false)) {
192-
$transliterator = \Transliterator::create('Any-Latin; Latin-ASCII');
193-
} else {
194-
trigger_error(__METHOD__ . "(): it is recommended to enable PHP extensions 'intl'.", E_USER_NOTICE);
195-
$transliterator = false;
196-
}
188+
if (!extension_loaded('intl')) {
189+
throw new Nette\NotSupportedException(__METHOD__ . '() requires INTL extension that is not loaded.');
197190
}
198191

192+
$iconv = defined('ICONV_IMPL') ? trim(ICONV_IMPL, '"\'') : null;
193+
199194
// remove control characters and check UTF-8 validity
200195
$s = self::pcre('preg_replace', ['#[^\x09\x0A\x0D\x20-\x7E\xA0-\x{2FF}\x{370}-\x{10FFFF}]#u', '', $s]);
201196

@@ -205,39 +200,15 @@ public static function toAscii(string $s): string
205200
$s = strtr($s, ["\u{AE}" => '(R)', "\u{A9}" => '(c)', "\u{2026}" => '...', "\u{AB}" => '<<', "\u{BB}" => '>>', "\u{A3}" => 'lb', "\u{A5}" => 'yen', "\u{B2}" => '^2', "\u{B3}" => '^3', "\u{B5}" => 'u', "\u{B9}" => '^1', "\u{BA}" => 'o', "\u{BF}" => '?', "\u{2CA}" => "'", "\u{2CD}" => '_', "\u{2DD}" => '"', "\u{1FEF}" => '', "\u{20AC}" => 'EUR', "\u{2122}" => 'TM', "\u{212E}" => 'e', "\u{2190}" => '<-', "\u{2191}" => '^', "\u{2192}" => '->', "\u{2193}" => 'V', "\u{2194}" => '<->']); // ® © … « » £ ¥ ² ³ µ ¹ º ¿ ˊ ˍ ˝ ` € ™ ℮ ← ↑ → ↓ ↔
206201
}
207202

208-
if ($transliterator) {
209-
$s = $transliterator->transliterate($s);
210-
// use iconv because The transliterator leaves some characters out of ASCII, eg → ʾ
211-
if ($iconv === 'glibc') {
212-
$s = strtr($s, '?', "\x01"); // temporarily hide ? to distinguish them from the garbage that iconv creates
213-
$s = iconv('UTF-8', 'ASCII//TRANSLIT//IGNORE', $s);
214-
$s = str_replace(['?', "\x01"], ['', '?'], $s); // remove garbage and restore ? characters
215-
} elseif ($iconv === 'libiconv') {
216-
$s = iconv('UTF-8', 'ASCII//TRANSLIT//IGNORE', $s);
217-
} else { // null or 'unknown' (#216)
218-
$s = self::pcre('preg_replace', ['#[^\x00-\x7F]++#', '', $s]); // remove non-ascii chars
219-
}
220-
} elseif ($iconv === 'glibc' || $iconv === 'libiconv') {
221-
// temporarily hide these characters to distinguish them from the garbage that iconv creates
222-
$s = strtr($s, '`\'"^~?', "\x01\x02\x03\x04\x05\x06");
223-
if ($iconv === 'glibc') {
224-
// glibc implementation is very limited. transliterate into Windows-1250 and then into ASCII, so most Eastern European characters are preserved
225-
$s = iconv('UTF-8', 'WINDOWS-1250//TRANSLIT//IGNORE', $s);
226-
$s = strtr(
227-
$s,
228-
"\xa5\xa3\xbc\x8c\xa7\x8a\xaa\x8d\x8f\x8e\xaf\xb9\xb3\xbe\x9c\x9a\xba\x9d\x9f\x9e\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x96\xa0\x8b\x97\x9b\xa6\xad\xb7",
229-
'ALLSSSSTZZZallssstzzzRAAAALCCCEEEEIIDDNNOOOOxRUUUUYTsraaaalccceeeeiiddnnooooruuuuyt- <->|-.',
230-
);
231-
$s = self::pcre('preg_replace', ['#[^\x00-\x7F]++#', '', $s]);
232-
} else {
233-
$s = iconv('UTF-8', 'ASCII//TRANSLIT//IGNORE', $s);
234-
}
235-
236-
// remove garbage that iconv creates during transliteration (eg Ý -> Y')
237-
$s = str_replace(['`', "'", '"', '^', '~', '?'], '', $s);
238-
// restore temporarily hidden characters
239-
$s = strtr($s, "\x01\x02\x03\x04\x05\x06", '`\'"^~?');
240-
} else {
203+
$s = \Transliterator::create('Any-Latin; Latin-ASCII')->transliterate($s);
204+
// use iconv because The transliterator leaves some characters out of ASCII, eg → ʾ
205+
if ($iconv === 'glibc') {
206+
$s = strtr($s, '?', "\x01"); // temporarily hide ? to distinguish them from the garbage that iconv creates
207+
$s = iconv('UTF-8', 'ASCII//TRANSLIT//IGNORE', $s);
208+
$s = str_replace(['?', "\x01"], ['', '?'], $s); // remove garbage and restore ? characters
209+
} elseif ($iconv === 'libiconv') {
210+
$s = iconv('UTF-8', 'ASCII//TRANSLIT//IGNORE', $s);
211+
} else { // null or 'unknown' (#216)
241212
$s = self::pcre('preg_replace', ['#[^\x00-\x7F]++#', '', $s]); // remove non-ascii chars
242213
}
243214

0 commit comments

Comments
 (0)