@@ -6,7 +6,7 @@ use crate::slice;
6
6
use crate :: str:: from_utf8_unchecked_mut;
7
7
use crate :: ub_checks:: assert_unsafe_precondition;
8
8
use crate :: unicode:: printable:: is_printable;
9
- use crate :: unicode:: { self , conversions} ;
9
+ use crate :: unicode:: { self , Case_Ignorable , conversions} ;
10
10
11
11
impl char {
12
12
/// The lowest valid code point a `char` can have, `'\0'`.
@@ -950,7 +950,11 @@ impl char {
950
950
#[ stable( feature = "rust1" , since = "1.0.0" ) ]
951
951
#[ inline]
952
952
pub fn is_control ( self ) -> bool {
953
- unicode:: Cc ( self )
953
+ // According to
954
+ // https://www.unicode.org/policies/stability_policy.html#Property_Value,
955
+ // the set of codepoints in `Cc` will never change. So we can hard-code
956
+ // the patterns to match against instead of using a table.
957
+ matches ! ( self , '\0' ..='\x1f' | '\x7f' ..='\u{9f}' )
954
958
}
955
959
956
960
/// Returns `true` if this `char` has the `Grapheme_Extend` property.
@@ -965,7 +969,47 @@ impl char {
965
969
#[ must_use]
966
970
#[ inline]
967
971
pub ( crate ) fn is_grapheme_extended ( self ) -> bool {
968
- unicode:: Grapheme_Extend ( self )
972
+ !self . is_ascii ( ) && unicode:: Grapheme_Extend ( self )
973
+ }
974
+
975
+ /// Returns `true` if this `char` has the `Cased` derived property.
976
+ ///
977
+ /// `Cased` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and
978
+ /// specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`].
979
+ ///
980
+ /// [Unicode Standard]: https://www.unicode.org/versions/latest/
981
+ /// [ucd]: https://www.unicode.org/reports/tr44/
982
+ /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
983
+ #[ must_use]
984
+ #[ inline]
985
+ #[ doc( hidden) ]
986
+ #[ unstable( feature = "char_internals" , reason = "exposed only for libstd" , issue = "none" ) ]
987
+ pub fn is_cased ( self ) -> bool {
988
+ if self . is_ascii ( ) {
989
+ self . is_ascii_alphabetic ( )
990
+ } else {
991
+ unicode:: Lowercase ( self ) || unicode:: Uppercase ( self ) || unicode:: Lt ( self )
992
+ }
993
+ }
994
+
995
+ /// Returns `true` if this `char` has the `Case_Ignorable` property.
996
+ ///
997
+ /// `Case_Ignorable` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and
998
+ /// specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`].
999
+ ///
1000
+ /// [Unicode Standard]: https://www.unicode.org/versions/latest/
1001
+ /// [ucd]: https://www.unicode.org/reports/tr44/
1002
+ /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
1003
+ #[ must_use]
1004
+ #[ inline]
1005
+ #[ doc( hidden) ]
1006
+ #[ unstable( feature = "char_internals" , reason = "exposed only for libstd" , issue = "none" ) ]
1007
+ pub fn is_case_ignorable ( self ) -> bool {
1008
+ if self . is_ascii ( ) {
1009
+ matches ! ( self , '\'' | '.' | ':' | '^' | '`' )
1010
+ } else {
1011
+ Case_Ignorable ( self )
1012
+ }
969
1013
}
970
1014
971
1015
/// Returns `true` if this `char` has one of the general categories for numbers.
0 commit comments