Skip to content

Commit 123f36d

Browse files
authored
Remove extra llm changes
1 parent a1afc11 commit 123f36d

File tree

1 file changed

+42
-185
lines changed

1 file changed

+42
-185
lines changed

components/DataLiberation/URL/class-cssprocessor.php

Lines changed: 42 additions & 185 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@
22

33
namespace WordPress\DataLiberation\URL;
44

5-
use function WordPress\Encoding\_wp_scan_utf8;
6-
use function WordPress\Encoding\_wp_scrub_utf8_fallback;
75
use function WordPress\Encoding\utf8_codepoint_at;
86
use function WordPress\Encoding\codepoint_to_utf8_bytes;
7+
use function WordPress\Encoding\compat\_wp_scan_utf8;
8+
use function WordPress\Encoding\wp_scrub_utf8;
99

1010
/**
1111
* Tokenizes CSS according to the CSS Syntax Level 3 specification.
@@ -742,6 +742,32 @@ public function get_token_value() {
742742
return $this->token_value;
743743
}
744744

745+
/**
746+
* Determines whether the current token is a data URI.
747+
*
748+
* Only meaningful for URL and STRING tokens. Returns false for all other token types.
749+
*
750+
* @return bool Whether the current token value starts with "data:" (case-insensitive).
751+
*/
752+
public function is_data_uri(): bool {
753+
if ( null === $this->token_value_starts_at || null === $this->token_value_length ) {
754+
return false;
755+
}
756+
757+
if ( $this->token_value_length < 5 ) {
758+
return false;
759+
}
760+
761+
$offset = $this->token_value_starts_at;
762+
return (
763+
( 'd' === $this->css[ $offset ] || 'D' === $this->css[ $offset ] ) &&
764+
( 'a' === $this->css[ $offset + 1 ] || 'A' === $this->css[ $offset + 1 ] ) &&
765+
( 't' === $this->css[ $offset + 2 ] || 'T' === $this->css[ $offset + 2 ] ) &&
766+
( 'a' === $this->css[ $offset + 3 ] || 'A' === $this->css[ $offset + 3 ] ) &&
767+
':' === $this->css[ $offset + 4 ]
768+
);
769+
}
770+
745771
/**
746772
* Gets the token start at.
747773
*
@@ -788,40 +814,13 @@ public function get_token_value_length(): ?int {
788814
}
789815

790816
/**
791-
* Determines whether the current token is a data URI.
792-
*
793-
* Only meaningful for URL and STRING tokens. Returns false for all other token types.
794-
*
795-
* @return bool Whether the current token value starts with "data:" (case-insensitive).
796-
*/
797-
public function is_data_uri(): bool {
798-
if ( null === $this->token_value_starts_at || null === $this->token_value_length ) {
799-
return false;
800-
}
801-
802-
if ( $this->token_value_length < 5 ) {
803-
return false;
804-
}
805-
806-
$offset = $this->token_value_starts_at;
807-
return (
808-
( 'd' === $this->css[ $offset ] || 'D' === $this->css[ $offset ] ) &&
809-
( 'a' === $this->css[ $offset + 1 ] || 'A' === $this->css[ $offset + 1 ] ) &&
810-
( 't' === $this->css[ $offset + 2 ] || 'T' === $this->css[ $offset + 2 ] ) &&
811-
( 'a' === $this->css[ $offset + 3 ] || 'A' === $this->css[ $offset + 3 ] ) &&
812-
':' === $this->css[ $offset + 4 ]
813-
);
814-
}
815-
816-
/**
817-
* Sets the value of the current token.
817+
* Sets the value of the current URL token.
818818
*
819-
* This method allows modifying URL or STRING token values. The new value
820-
* will be properly escaped according to CSS syntax rules.
819+
* This method allows modifying the URL value in url() tokens. The new value
820+
* will be properly escaped according to CSS URL syntax rules.
821821
*
822-
* Supported token types:
823-
* - TOKEN_URL: URL value in url() tokens
824-
* - TOKEN_STRING: String value (properly quoted and escaped)
822+
* Currently only URL tokens are supported. Attempting to set the value on
823+
* other token types will return false.
825824
*
826825
* Example:
827826
*
@@ -835,32 +834,22 @@ public function is_data_uri(): bool {
835834
* echo $processor->get_updated_css();
836835
* // Outputs: background: url(new.jpg);
837836
*
838-
* @param string $new_value The new value (should not include url() wrapper or quotes).
837+
* @param string $new_value The new URL value (should not include url() wrapper).
839838
* @return bool Whether the value was successfully updated.
840839
*/
841840
public function set_token_value( string $new_value ): bool {
841+
// Only URL tokens are currently supported.
842+
if ( self::TOKEN_URL !== $this->token_type ) {
843+
return false;
844+
}
845+
842846
// Ensure we have valid token value boundaries.
843847
if ( null === $this->token_value_starts_at || null === $this->token_value_length ) {
844848
return false;
845849
}
846850

847-
$escaped_value = null;
848-
849-
switch ( $this->token_type ) {
850-
case self::TOKEN_URL:
851-
// Escape the URL value for quoted URL syntax.
852-
$escaped_value = $this->escape_url_value( $new_value );
853-
break;
854-
855-
case self::TOKEN_STRING:
856-
// Escape the string value for quoted string syntax.
857-
$escaped_value = $this->escape_string_value( $new_value );
858-
break;
859-
860-
default:
861-
// Unsupported token type.
862-
return false;
863-
}
851+
// Escape the URL value for unquoted URL syntax.
852+
$escaped_value = $this->escape_url_value( $new_value );
864853

865854
// Queue the lexical update.
866855
$this->lexical_updates[] = array(
@@ -935,56 +924,6 @@ private function escape_url_value( string $unescaped ): string {
935924
return '"' . $escaped . '"';
936925
}
937926

938-
/**
939-
* Escapes a string value for use in string token replacement.
940-
*
941-
* For STRING tokens, the value boundaries point to the content between quotes,
942-
* so we must NOT add quotes ourselves - they're already in the source.
943-
*
944-
* @param string $unescaped Unescaped string value.
945-
* @return string Escaped string value without surrounding quotes.
946-
*/
947-
private function escape_string_value( string $unescaped ): string {
948-
$escaped = '';
949-
$at = 0;
950-
while ( $at < strlen( $unescaped ) ) {
951-
$safe_len = strcspn( $unescaped, "\n\r\f\\\"", $at );
952-
if ( $safe_len > 0 ) {
953-
$escaped .= substr( $unescaped, $at, $safe_len );
954-
$at += $safe_len;
955-
continue;
956-
}
957-
958-
$unsafe_char = $unescaped[ $at ];
959-
switch ( $unsafe_char ) {
960-
case "\r":
961-
++$at;
962-
$escaped .= '\\a ';
963-
if ( strlen( $unescaped ) > $at + 1 && "\n" === $unescaped[ $at + 1 ] ) {
964-
++$at;
965-
}
966-
break;
967-
case "\f":
968-
case "\n":
969-
++$at;
970-
$escaped .= '\\a ';
971-
break;
972-
case '\\':
973-
++$at;
974-
$escaped .= '\\5C ';
975-
break;
976-
case '"':
977-
++$at;
978-
$escaped .= '\\22 ';
979-
break;
980-
default:
981-
_doing_it_wrong( __METHOD__, 'Unexpected character in string value: ' . $unsafe_char, '1.0.0' );
982-
break;
983-
}
984-
}
985-
return $escaped;
986-
}
987-
988927
/**
989928
* Returns the CSS with all modifications applied.
990929
*
@@ -1615,7 +1554,7 @@ private function consume_ident_start_codepoint( $at ): int {
16151554
*/
16161555
private function decode_string_or_url( int $start, int $length ): string {
16171556
// Fast path: check if any processing is needed.
1618-
$slice = _wp_scrub_utf8_fallback( substr( $this->css, $start, $length ) );
1557+
$slice = wp_scrub_utf8( substr( $this->css, $start, $length ) );
16191558
$special_chars = "\\\r\f\x00";
16201559
if ( false === strpbrk( $slice, $special_chars ) ) {
16211560
// No special chars - return raw substring (almost zero allocations).
@@ -1885,86 +1824,4 @@ private function check_if_3_code_points_start_an_ident_sequence( int $offset ):
18851824

18861825
return $this->consume_ident_start_codepoint( $offset ) > 0 || $this->is_valid_escape( $offset );
18871826
}
1888-
1889-
/**
1890-
* Decodes CSS escape sequences in a string.
1891-
*
1892-
* This is a utility method that can be used by other classes to decode
1893-
* CSS escapes in extracted values. It implements the same logic as the
1894-
* incremental escape parsing done during tokenization.
1895-
*
1896-
* Handles:
1897-
* - Hex escapes: \20 (space), \1F600 (emoji), up to 6 hex digits
1898-
* - Character escapes: \(, \), \", \', \\
1899-
* - Whitespace after hex escapes (single whitespace consumed)
1900-
* - Escaped newlines (consumed, not included in output)
1901-
*
1902-
* @see https://www.w3.org/TR/css-syntax-3/#consume-escaped-code-point
1903-
*
1904-
* @param string $value Encoded string with CSS escapes.
1905-
* @return string Decoded string with escapes resolved to their actual characters.
1906-
*/
1907-
public static function decode_css_escapes( string $value ): string {
1908-
$length = strlen( $value );
1909-
$result = '';
1910-
$at = 0;
1911-
1912-
while ( $at < $length ) {
1913-
$span = strcspn( $value, '\\', $at );
1914-
if ( $span > 0 ) {
1915-
$result .= substr( $value, $at, $span );
1916-
$at += $span;
1917-
}
1918-
1919-
if ( $at >= $length ) {
1920-
break;
1921-
}
1922-
1923-
++$at;
1924-
if ( $at >= $length ) {
1925-
break;
1926-
}
1927-
1928-
$hex_len = strspn( $value, '0123456789abcdefABCDEF', $at );
1929-
if ( $hex_len > 6 ) {
1930-
$hex_len = 6;
1931-
}
1932-
1933-
if ( $hex_len > 0 ) {
1934-
$hex = substr( $value, $at, $hex_len );
1935-
$result .= codepoint_to_utf8_bytes( hexdec( $hex ) );
1936-
$at += $hex_len;
1937-
1938-
$ws_len = strspn( $value, " \n\r\t\f", $at );
1939-
if ( $ws_len > 0 ) {
1940-
if ( $at + 1 < $length && "\r" === $value[ $at ] && "\n" === $value[ $at + 1 ] ) {
1941-
$at += 2;
1942-
} else {
1943-
$at += 1;
1944-
}
1945-
}
1946-
continue;
1947-
}
1948-
1949-
$next = $value[ $at ];
1950-
1951-
if ( "\n" === $next || "\f" === $next ) {
1952-
++$at;
1953-
continue;
1954-
}
1955-
1956-
if ( "\r" === $next ) {
1957-
++$at;
1958-
if ( $at < $length && "\n" === $value[ $at ] ) {
1959-
++$at;
1960-
}
1961-
continue;
1962-
}
1963-
1964-
$result .= $next;
1965-
++$at;
1966-
}
1967-
1968-
return $result;
1969-
}
19701827
}

0 commit comments

Comments
 (0)