@@ -268,7 +268,7 @@ macro_rules! impl_Display {
268268 // Format per two digits from the lookup table.
269269 if remain > 9 {
270270 // SAFETY: All of the decimals fit in buf due to MAX_DEC_N
271- // and the while condition ensures at least 2 more decimals.
271+ // and the if condition ensures at least 2 more decimals.
272272 unsafe { core:: hint:: assert_unchecked( offset >= 2 ) }
273273 // SAFETY: The offset counts down from its initial buf.len()
274274 // without underflow due to the previous precondition.
@@ -555,93 +555,6 @@ mod imp {
555555}
556556impl_Exp ! ( i128 , u128 as u128 via to_u128 named exp_u128) ;
557557
558- /// Helper function for writing a u64 into `buf` going from last to first, with `curr`.
559- fn parse_u64_into < const N : usize > ( mut n : u64 , buf : & mut [ MaybeUninit < u8 > ; N ] , curr : & mut usize ) {
560- let buf_ptr = MaybeUninit :: slice_as_mut_ptr ( buf) ;
561- let lut_ptr = DEC_DIGITS_LUT . as_ptr ( ) ;
562- assert ! ( * curr > 19 ) ;
563-
564- // SAFETY:
565- // Writes at most 19 characters into the buffer. Guaranteed that any ptr into LUT is at most
566- // 198, so will never OOB. There is a check above that there are at least 19 characters
567- // remaining.
568- unsafe {
569- if n >= 1e16 as u64 {
570- let to_parse = n % 1e16 as u64 ;
571- n /= 1e16 as u64 ;
572-
573- // Some of these are nops but it looks more elegant this way.
574- let d1 = ( ( to_parse / 1e14 as u64 ) % 100 ) << 1 ;
575- let d2 = ( ( to_parse / 1e12 as u64 ) % 100 ) << 1 ;
576- let d3 = ( ( to_parse / 1e10 as u64 ) % 100 ) << 1 ;
577- let d4 = ( ( to_parse / 1e8 as u64 ) % 100 ) << 1 ;
578- let d5 = ( ( to_parse / 1e6 as u64 ) % 100 ) << 1 ;
579- let d6 = ( ( to_parse / 1e4 as u64 ) % 100 ) << 1 ;
580- let d7 = ( ( to_parse / 1e2 as u64 ) % 100 ) << 1 ;
581- let d8 = ( ( to_parse / 1e0 as u64 ) % 100 ) << 1 ;
582-
583- * curr -= 16 ;
584-
585- ptr:: copy_nonoverlapping ( lut_ptr. add ( d1 as usize ) , buf_ptr. add ( * curr + 0 ) , 2 ) ;
586- ptr:: copy_nonoverlapping ( lut_ptr. add ( d2 as usize ) , buf_ptr. add ( * curr + 2 ) , 2 ) ;
587- ptr:: copy_nonoverlapping ( lut_ptr. add ( d3 as usize ) , buf_ptr. add ( * curr + 4 ) , 2 ) ;
588- ptr:: copy_nonoverlapping ( lut_ptr. add ( d4 as usize ) , buf_ptr. add ( * curr + 6 ) , 2 ) ;
589- ptr:: copy_nonoverlapping ( lut_ptr. add ( d5 as usize ) , buf_ptr. add ( * curr + 8 ) , 2 ) ;
590- ptr:: copy_nonoverlapping ( lut_ptr. add ( d6 as usize ) , buf_ptr. add ( * curr + 10 ) , 2 ) ;
591- ptr:: copy_nonoverlapping ( lut_ptr. add ( d7 as usize ) , buf_ptr. add ( * curr + 12 ) , 2 ) ;
592- ptr:: copy_nonoverlapping ( lut_ptr. add ( d8 as usize ) , buf_ptr. add ( * curr + 14 ) , 2 ) ;
593- }
594- if n >= 1e8 as u64 {
595- let to_parse = n % 1e8 as u64 ;
596- n /= 1e8 as u64 ;
597-
598- // Some of these are nops but it looks more elegant this way.
599- let d1 = ( ( to_parse / 1e6 as u64 ) % 100 ) << 1 ;
600- let d2 = ( ( to_parse / 1e4 as u64 ) % 100 ) << 1 ;
601- let d3 = ( ( to_parse / 1e2 as u64 ) % 100 ) << 1 ;
602- let d4 = ( ( to_parse / 1e0 as u64 ) % 100 ) << 1 ;
603- * curr -= 8 ;
604-
605- ptr:: copy_nonoverlapping ( lut_ptr. add ( d1 as usize ) , buf_ptr. add ( * curr + 0 ) , 2 ) ;
606- ptr:: copy_nonoverlapping ( lut_ptr. add ( d2 as usize ) , buf_ptr. add ( * curr + 2 ) , 2 ) ;
607- ptr:: copy_nonoverlapping ( lut_ptr. add ( d3 as usize ) , buf_ptr. add ( * curr + 4 ) , 2 ) ;
608- ptr:: copy_nonoverlapping ( lut_ptr. add ( d4 as usize ) , buf_ptr. add ( * curr + 6 ) , 2 ) ;
609- }
610- // `n` < 1e8 < (1 << 32)
611- let mut n = n as u32 ;
612- if n >= 1e4 as u32 {
613- let to_parse = n % 1e4 as u32 ;
614- n /= 1e4 as u32 ;
615-
616- let d1 = ( to_parse / 100 ) << 1 ;
617- let d2 = ( to_parse % 100 ) << 1 ;
618- * curr -= 4 ;
619-
620- ptr:: copy_nonoverlapping ( lut_ptr. add ( d1 as usize ) , buf_ptr. add ( * curr + 0 ) , 2 ) ;
621- ptr:: copy_nonoverlapping ( lut_ptr. add ( d2 as usize ) , buf_ptr. add ( * curr + 2 ) , 2 ) ;
622- }
623-
624- // `n` < 1e4 < (1 << 16)
625- let mut n = n as u16 ;
626- if n >= 100 {
627- let d1 = ( n % 100 ) << 1 ;
628- n /= 100 ;
629- * curr -= 2 ;
630- ptr:: copy_nonoverlapping ( lut_ptr. add ( d1 as usize ) , buf_ptr. add ( * curr) , 2 ) ;
631- }
632-
633- // decode last 1 or 2 chars
634- if n < 10 {
635- * curr -= 1 ;
636- * buf_ptr. add ( * curr) = ( n as u8 ) + b'0' ;
637- } else {
638- let d1 = n << 1 ;
639- * curr -= 2 ;
640- ptr:: copy_nonoverlapping ( lut_ptr. add ( d1 as usize ) , buf_ptr. add ( * curr) , 2 ) ;
641- }
642- }
643- }
644-
645558#[ stable( feature = "rust1" , since = "1.0.0" ) ]
646559impl fmt:: Display for u128 {
647560 fn fmt ( & self , f : & mut fmt:: Formatter < ' _ > ) -> fmt:: Result {
@@ -652,90 +565,155 @@ impl fmt::Display for u128 {
652565#[ stable( feature = "rust1" , since = "1.0.0" ) ]
653566impl fmt:: Display for i128 {
654567 fn fmt ( & self , f : & mut fmt:: Formatter < ' _ > ) -> fmt:: Result {
655- let is_nonnegative = * self >= 0 ;
656- let n = if is_nonnegative {
657- self . to_u128 ( )
658- } else {
659- // convert the negative num to positive by summing 1 to its 2s complement
660- ( !self . to_u128 ( ) ) . wrapping_add ( 1 )
661- } ;
662- fmt_u128 ( n, is_nonnegative, f)
568+ fmt_u128 ( self . unsigned_abs ( ) , * self >= 0 , f)
663569 }
664570}
665571
666- /// Specialized optimization for u128. Instead of taking two items at a time, it splits
667- /// into at most 2 u64s, and then chunks by 10e16, 10e8, 10e4, 10e2, and then 10e1.
668- /// It also has to handle 1 last item, as 10^40 > 2^128 > 10^39, whereas
669- /// 10^20 > 2^64 > 10^19.
572+ /// Format optimized for u128. Computation of 128 bits is limited by proccessing
573+ /// in batches of 16 decimals at a time.
670574fn fmt_u128 ( n : u128 , is_nonnegative : bool , f : & mut fmt:: Formatter < ' _ > ) -> fmt:: Result {
671- // 2^128 is about 3*10^38, so 39 gives an extra byte of space
672- let mut buf = [ MaybeUninit :: < u8 > :: uninit ( ) ; 39 ] ;
673- let mut curr = buf. len ( ) ;
674-
675- let ( n, rem) = udiv_1e19 ( n) ;
676- parse_u64_into ( rem, & mut buf, & mut curr) ;
677-
678- if n != 0 {
679- // 0 pad up to point
680- let target = buf. len ( ) - 19 ;
681- // SAFETY: Guaranteed that we wrote at most 19 bytes, and there must be space
682- // remaining since it has length 39
683- unsafe {
684- ptr:: write_bytes (
685- MaybeUninit :: slice_as_mut_ptr ( & mut buf) . add ( target) ,
686- b'0' ,
687- curr - target,
688- ) ;
689- }
690- curr = target;
691-
692- let ( n, rem) = udiv_1e19 ( n) ;
693- parse_u64_into ( rem, & mut buf, & mut curr) ;
694- // Should this following branch be annotated with unlikely?
695- if n != 0 {
696- let target = buf. len ( ) - 38 ;
697- // The raw `buf_ptr` pointer is only valid until `buf` is used the next time,
698- // buf `buf` is not used in this scope so we are good.
699- let buf_ptr = MaybeUninit :: slice_as_mut_ptr ( & mut buf) ;
700- // SAFETY: At this point we wrote at most 38 bytes, pad up to that point,
701- // There can only be at most 1 digit remaining.
702- unsafe {
703- ptr:: write_bytes ( buf_ptr. add ( target) , b'0' , curr - target) ;
704- curr = target - 1 ;
705- * buf_ptr. add ( curr) = ( n as u8 ) + b'0' ;
706- }
575+ // Optimize common-case zero, which would also need special treatment due to
576+ // its "leading" zero.
577+ if n == 0 {
578+ return f. pad_integral ( true , "" , "0" ) ;
579+ }
580+
581+ // U128::MAX has 39 significant-decimals.
582+ const MAX_DEC_N : usize = 39 ;
583+ // Buffer decimals with right alignment.
584+ let mut buf = [ MaybeUninit :: < u8 > :: uninit ( ) ; MAX_DEC_N ] ;
585+ // Count the number of bytes in buf that are not initialized.
586+ let mut offset = buf. len ( ) ;
587+
588+ // Take the 16 least-significant decimals.
589+ let ( n, mod_1e16) = div_rem_1e16 ( n) ;
590+ let mut remain = if n == 0 {
591+ mod_1e16
592+ } else {
593+ // write buf[23..39]
594+ enc_16lsd :: < 23 > ( & mut buf, mod_1e16) ;
595+ offset = 23 ;
596+
597+ // Take another 16 decimals.
598+ let ( n, mod_1e16) = div_rem_1e16 ( n) ;
599+ if n == 0 {
600+ mod_1e16
601+ } else {
602+ // write buf[7..23]
603+ enc_16lsd :: < 7 > ( & mut buf, mod_1e16) ;
604+ offset = 7 ;
605+
606+ debug_assert ! ( n < 10 ) ;
607+ n as u64
707608 }
609+ } ;
610+
611+ // Format per four digits from the lookup table.
612+ while remain > 999 {
613+ // SAFETY: All of the decimals fit in buf due to MAX_DEC_N
614+ // and the while condition ensures at least 4 more decimals.
615+ unsafe { core:: hint:: assert_unchecked ( offset >= 4 ) }
616+ // SAFETY: The offset counts down from its initial buf.len()
617+ // without underflow due to the previous precondition.
618+ unsafe { core:: hint:: assert_unchecked ( offset <= buf. len ( ) ) }
619+ offset -= 4 ;
620+
621+ // pull two pairs
622+ let quad = remain % 1_00_00 ;
623+ remain /= 1_00_00 ;
624+ let pair1 = ( quad / 100 ) as usize ;
625+ let pair2 = ( quad % 100 ) as usize ;
626+ buf[ offset + 0 ] . write ( DEC_DIGITS_LUT [ pair1 * 2 + 0 ] ) ;
627+ buf[ offset + 1 ] . write ( DEC_DIGITS_LUT [ pair1 * 2 + 1 ] ) ;
628+ buf[ offset + 2 ] . write ( DEC_DIGITS_LUT [ pair2 * 2 + 0 ] ) ;
629+ buf[ offset + 3 ] . write ( DEC_DIGITS_LUT [ pair2 * 2 + 1 ] ) ;
630+ }
631+
632+ // Format per two digits from the lookup table.
633+ if remain > 9 {
634+ // SAFETY: All of the decimals fit in buf due to MAX_DEC_N
635+ // and the if condition ensures at least 2 more decimals.
636+ unsafe { core:: hint:: assert_unchecked ( offset >= 2 ) }
637+ // SAFETY: The offset counts down from its initial buf.len()
638+ // without underflow due to the previous precondition.
639+ unsafe { core:: hint:: assert_unchecked ( offset <= buf. len ( ) ) }
640+ offset -= 2 ;
641+
642+ let pair = ( remain % 100 ) as usize ;
643+ remain /= 100 ;
644+ buf[ offset + 0 ] . write ( DEC_DIGITS_LUT [ pair * 2 + 0 ] ) ;
645+ buf[ offset + 1 ] . write ( DEC_DIGITS_LUT [ pair * 2 + 1 ] ) ;
708646 }
709647
710- // SAFETY: `curr` > 0 (since we made `buf` large enough), and all the chars are valid
711- // UTF-8 since `DEC_DIGITS_LUT` is
712- let buf_slice = unsafe {
648+ // Format the last remaining digit, if any.
649+ if remain != 0 {
650+ // SAFETY: All of the decimals fit in buf due to MAX_DEC_N
651+ // and the if condition ensures (at least) 1 more decimals.
652+ unsafe { core:: hint:: assert_unchecked ( offset >= 1 ) }
653+ // SAFETY: The offset counts down from its initial buf.len()
654+ // without underflow due to the previous precondition.
655+ unsafe { core:: hint:: assert_unchecked ( offset <= buf. len ( ) ) }
656+ offset -= 1 ;
657+
658+ // Either the compiler sees that remain < 10, or it prevents
659+ // a boundary check up next.
660+ let last = ( remain & 15 ) as usize ;
661+ buf[ offset] . write ( DEC_DIGITS_LUT [ last * 2 + 1 ] ) ;
662+ // not used: remain = 0;
663+ }
664+
665+ // SAFETY: All buf content since offset is set.
666+ let written = unsafe { buf. get_unchecked ( offset..) } ;
667+ // SAFETY: Writes use ASCII from the lookup table exclusively.
668+ let as_str = unsafe {
713669 str:: from_utf8_unchecked ( slice:: from_raw_parts (
714- MaybeUninit :: slice_as_mut_ptr ( & mut buf ) . add ( curr ) ,
715- buf . len ( ) - curr ,
670+ MaybeUninit :: slice_as_ptr ( written ) ,
671+ written . len ( ) ,
716672 ) )
717673 } ;
718- f. pad_integral ( is_nonnegative, "" , buf_slice)
674+ f. pad_integral ( is_nonnegative, "" , as_str)
675+ }
676+
677+ /// Encodes the 16 least significant decimals of n into buf.
678+ fn enc_16lsd < const OFFSET : usize > ( buf : & mut [ MaybeUninit < u8 > ; 39 ] , n : u64 ) {
679+ // Consume the least-significant decimals from a working copy.
680+ let mut remain = n;
681+
682+ // Format per four digits from the lookup table.
683+ for quad_index in ( 0 ..4 ) . rev ( ) {
684+ // pull two pairs
685+ let quad = remain % 1_00_00 ;
686+ remain /= 1_00_00 ;
687+ let pair1 = ( quad / 100 ) as usize ;
688+ let pair2 = ( quad % 100 ) as usize ;
689+ buf[ quad_index * 4 + OFFSET + 0 ] . write ( DEC_DIGITS_LUT [ pair1 * 2 + 0 ] ) ;
690+ buf[ quad_index * 4 + OFFSET + 1 ] . write ( DEC_DIGITS_LUT [ pair1 * 2 + 1 ] ) ;
691+ buf[ quad_index * 4 + OFFSET + 2 ] . write ( DEC_DIGITS_LUT [ pair2 * 2 + 0 ] ) ;
692+ buf[ quad_index * 4 + OFFSET + 3 ] . write ( DEC_DIGITS_LUT [ pair2 * 2 + 1 ] ) ;
693+ }
719694}
720695
721- /// Partition of `n` into n > 1e19 and rem <= 1e19
696+ /// Euclidean division plus remainder with constant 1E16 basically consumes 16
697+ /// decimals from n.
722698///
723- /// Integer division algorithm is based on the following paper:
699+ /// The integer division algorithm is based on the following paper:
724700///
725701/// T. Granlund and P. Montgomery, “Division by Invariant Integers Using Multiplication”
726702/// in Proc. of the SIGPLAN94 Conference on Programming Language Design and
727703/// Implementation, 1994, pp. 61–72
728704///
729- fn udiv_1e19 ( n : u128 ) -> ( u128 , u64 ) {
730- const DIV : u64 = 1e19 as u64 ;
731- const FACTOR : u128 = 156927543384667019095894735580191660403 ;
705+ #[ inline]
706+ fn div_rem_1e16 ( n : u128 ) -> ( u128 , u64 ) {
707+ const D : u128 = 1_0000_0000_0000_0000 ;
708+ if n < D {
709+ return ( 0 , n as u64 ) ;
710+ }
732711
733- let quot = if n < 1 << 83 {
734- ( ( n >> 19 ) as u64 / ( DIV >> 19 ) ) as u128
735- } else {
736- n. widening_mul ( FACTOR ) . 1 >> 62
737- } ;
712+ // These constant values are computed with the CHOOSE_MULTIPLIER procedure.
713+ const M_HIGH : u128 = 76624777043294442917917351357515459181 ;
714+ const SH_POST : u8 = 51 ;
738715
739- let rem = ( n - quot * DIV as u128 ) as u64 ;
740- ( quot, rem)
716+ let quot = n. widening_mul ( M_HIGH ) . 1 >> SH_POST ;
717+ let rem = n - quot * D ;
718+ ( quot, rem as u64 )
741719}
0 commit comments