@@ -47,95 +47,109 @@ fn adc(carry: u8, lhs: BigDigit, rhs: BigDigit, out: &mut BigDigit) -> u8 {
4747 u8:: from ( b || d)
4848}
4949
50- /// Performs a part of the addition. Returns a tuple containing the carry state
51- /// and the number of integers that were added
52- ///
53- /// By using as many registers as possible, we treat digits 5 by 5
5450#[ cfg( target_arch = "x86_64" ) ]
55- unsafe fn schoolbook_add_assign_x86_64 (
56- lhs : * mut u64 ,
57- rhs : * const u64 ,
58- mut size : usize ,
59- ) -> ( bool , usize ) {
60- size /= 5 ;
61- if size == 0 {
62- return ( false , 0 ) ;
63- }
51+ cfg_64 ! (
52+ /// Performs a part of the addition. Returns a tuple containing the carry state
53+ /// and the number of integers that were added
54+ ///
55+ /// By using as many registers as possible, we treat digits 5 by 5
56+ unsafe fn schoolbook_add_assign_x86_64(
57+ lhs: * mut u64 ,
58+ rhs: * const u64 ,
59+ mut size: usize ,
60+ ) -> ( bool , usize ) {
61+ size /= 5 ;
62+ if size == 0 {
63+ return ( false , 0 ) ;
64+ }
6465
65- let mut c: u8 ;
66- let mut idx = 0 ;
67-
68- asm ! (
69- // Clear the carry flag
70- "clc" ,
71-
72- "3:" ,
73-
74- // Copy a in registers
75- "mov {a_tmp1}, qword ptr [{a} + 8*{idx}]" ,
76- "mov {a_tmp2}, qword ptr [{a} + 8*{idx} + 8]" ,
77- "mov {a_tmp3}, qword ptr [{a} + 8*{idx} + 16]" ,
78- "mov {a_tmp4}, qword ptr [{a} + 8*{idx} + 24]" ,
79- "mov {a_tmp5}, qword ptr [{a} + 8*{idx} + 32]" ,
80-
81- // Copy b in registers
82- "mov {b_tmp1}, qword ptr [{b} + 8*{idx}]" ,
83- "mov {b_tmp2}, qword ptr [{b} + 8*{idx} + 8]" ,
84- "mov {b_tmp3}, qword ptr [{b} + 8*{idx} + 16]" ,
85- "mov {b_tmp4}, qword ptr [{b} + 8*{idx} + 24]" ,
86- "mov {b_tmp5}, qword ptr [{b} + 8*{idx} + 32]" ,
87-
88- // Perform the addition
89- "adc {a_tmp1}, {b_tmp1}" ,
90- "adc {a_tmp2}, {b_tmp2}" ,
91- "adc {a_tmp3}, {b_tmp3}" ,
92- "adc {a_tmp4}, {b_tmp4}" ,
93- "adc {a_tmp5}, {b_tmp5}" ,
94-
95- // Copy the return values
96- "mov qword ptr [{a} + 8*{idx}], {a_tmp1}" ,
97- "mov qword ptr [{a} + 8*{idx} + 8], {a_tmp2}" ,
98- "mov qword ptr [{a} + 8*{idx} + 16], {a_tmp3}" ,
99- "mov qword ptr [{a} + 8*{idx} + 24], {a_tmp4}" ,
100- "mov qword ptr [{a} + 8*{idx} + 32], {a_tmp5}" ,
101-
102- // Increment loop counter
103- // `inc` and `dec` aren't modifying carry flag
104- "inc {idx}" ,
105- "inc {idx}" ,
106- "inc {idx}" ,
107- "inc {idx}" ,
108- "inc {idx}" ,
109- "dec {size}" ,
110- "jnz 3b" ,
111-
112- // Output carry flag and clear
113- "setc {c}" ,
114- "clc" ,
115-
116- size = in( reg) size,
117- a = in( reg) lhs,
118- b = in( reg) rhs,
119- c = lateout( reg_byte) c,
120- idx = inout( reg) idx,
121-
122- a_tmp1 = out( reg) _,
123- a_tmp2 = out( reg) _,
124- a_tmp3 = out( reg) _,
125- a_tmp4 = out( reg) _,
126- a_tmp5 = out( reg) _,
127-
128- b_tmp1 = out( reg) _,
129- b_tmp2 = out( reg) _,
130- b_tmp3 = out( reg) _,
131- b_tmp4 = out( reg) _,
132- b_tmp5 = out( reg) _,
133-
134- options( nostack) ,
135- ) ;
66+ let mut c: u8 ;
67+ let mut idx = 0 ;
68+
69+ asm!(
70+ // Clear the carry flag
71+ "clc" ,
72+
73+ "3:" ,
74+
75+ // Copy a in registers
76+ "mov {a_tmp1}, qword ptr [{a} + 8*{idx}]" ,
77+ "mov {a_tmp2}, qword ptr [{a} + 8*{idx} + 8]" ,
78+ "mov {a_tmp3}, qword ptr [{a} + 8*{idx} + 16]" ,
79+ "mov {a_tmp4}, qword ptr [{a} + 8*{idx} + 24]" ,
80+ "mov {a_tmp5}, qword ptr [{a} + 8*{idx} + 32]" ,
81+
82+ // Copy b in registers
83+ "mov {b_tmp1}, qword ptr [{b} + 8*{idx}]" ,
84+ "mov {b_tmp2}, qword ptr [{b} + 8*{idx} + 8]" ,
85+ "mov {b_tmp3}, qword ptr [{b} + 8*{idx} + 16]" ,
86+ "mov {b_tmp4}, qword ptr [{b} + 8*{idx} + 24]" ,
87+ "mov {b_tmp5}, qword ptr [{b} + 8*{idx} + 32]" ,
88+
89+ // Perform the addition
90+ "adc {a_tmp1}, {b_tmp1}" ,
91+ "adc {a_tmp2}, {b_tmp2}" ,
92+ "adc {a_tmp3}, {b_tmp3}" ,
93+ "adc {a_tmp4}, {b_tmp4}" ,
94+ "adc {a_tmp5}, {b_tmp5}" ,
95+
96+ // Copy the return values
97+ "mov qword ptr [{a} + 8*{idx}], {a_tmp1}" ,
98+ "mov qword ptr [{a} + 8*{idx} + 8], {a_tmp2}" ,
99+ "mov qword ptr [{a} + 8*{idx} + 16], {a_tmp3}" ,
100+ "mov qword ptr [{a} + 8*{idx} + 24], {a_tmp4}" ,
101+ "mov qword ptr [{a} + 8*{idx} + 32], {a_tmp5}" ,
102+
103+ // Increment loop counter
104+ // `inc` and `dec` aren't modifying carry flag
105+ "inc {idx}" ,
106+ "inc {idx}" ,
107+ "inc {idx}" ,
108+ "inc {idx}" ,
109+ "inc {idx}" ,
110+ "dec {size}" ,
111+ "jnz 3b" ,
112+
113+ // Output carry flag and clear
114+ "setc {c}" ,
115+ "clc" ,
116+
117+ size = in( reg) size,
118+ a = in( reg) lhs,
119+ b = in( reg) rhs,
120+ c = lateout( reg_byte) c,
121+ idx = inout( reg) idx,
122+
123+ a_tmp1 = out( reg) _,
124+ a_tmp2 = out( reg) _,
125+ a_tmp3 = out( reg) _,
126+ a_tmp4 = out( reg) _,
127+ a_tmp5 = out( reg) _,
128+
129+ b_tmp1 = out( reg) _,
130+ b_tmp2 = out( reg) _,
131+ b_tmp3 = out( reg) _,
132+ b_tmp4 = out( reg) _,
133+ b_tmp5 = out( reg) _,
134+
135+ options( nostack) ,
136+ ) ;
137+
138+ ( c > 0 , idx)
139+ }
140+ ) ;
136141
137- ( c > 0 , idx)
138- }
142+ #[ cfg( any( target_arch = "x86" , target_arch = "x86_64" ) ) ]
143+ cfg_32 ! (
144+ /// TODO: The same trick as above can be applied to 32 bit targets
145+ unsafe fn schoolbook_add_assign_x86_64(
146+ _lhs: * mut u32 ,
147+ _rhs: * const u32 ,
148+ _size: usize ,
149+ ) -> ( bool , usize ) {
150+ ( false , 0 )
151+ }
152+ ) ;
139153
140154/// Two argument addition of raw slices, `a += b`, returning the carry.
141155///
@@ -149,10 +163,10 @@ pub(super) fn __add2(a: &mut [BigDigit], b: &[BigDigit]) -> BigDigit {
149163
150164 let ( a_lo, a_hi) = a. split_at_mut ( b. len ( ) ) ;
151165
152- // On x86_64 machine, perform most of the addition via inline assembly
153- #[ cfg( target_arch = "x86_64" ) ]
166+ // On x86 machine, perform most of the addition via inline assembly
167+ #[ cfg( any ( target_arch = "x86" , target_arch = " x86_64") ) ]
154168 let ( c, done) = unsafe { schoolbook_add_assign_x86_64 ( a_lo. as_mut_ptr ( ) , b. as_ptr ( ) , b. len ( ) ) } ;
155- #[ cfg( not( target_arch = "x86_64" ) ) ]
169+ #[ cfg( not( any ( target_arch = "x86" , target_arch = " x86_64") ) ) ]
156170 let ( c, done) = ( false , 0 ) ;
157171
158172 let mut carry = c as u8 ;
0 commit comments