|
| 1 | +package drivers |
| 2 | + |
| 3 | +import ( |
| 4 | + "errors" |
| 5 | + "math" |
| 6 | +) |
| 7 | + |
| 8 | +const ( |
| 9 | + signBitmask = 0x80000000 // first bit |
| 10 | + expBitmask = 0xFF // next 8 bits, mask after shift is simpler to understand |
| 11 | + mantissaBits = 23 // remaining 23 bits |
| 12 | + mantissaBitsMask = 0x007FFFFF // mask sign and exponent bits |
| 13 | + bias = 127 // bias according IEEE754 specification for "binary32" |
| 14 | + maxAbsExponent = 30 // to keep safe below MaxInt32 without using "2^31-1" for calculation |
| 15 | + maxAbsExponentEncoded = maxAbsExponent + bias |
| 16 | +) |
| 17 | + |
| 18 | +// Float32Fractions calculates the numerator and denominator from the encoded bits of the float32 number, which |
| 19 | +// are placed as follows: s|hhh hhhh h|nnn nnnn nnnn nnnn nnnn nnnn |
| 20 | +// |
| 21 | +// sign S: 1 bit s, exponent-value E: 8 bits h (count bits r=8), mantis-value M: 23 bits n (count bits p=23) |
| 22 | +// |
| 23 | +// S = 1 for positive and -1 for negative values |
| 24 | +// |
| 25 | +// creating of real mantis "m" (1 =< m < 2) from M (0 =< (M/2^p) < 1; 0 =< M =< 2^p - 1): |
| 26 | +// 24 bits mantis = 1nnn nnnn nnnn nnnn nnnn nnnn |
| 27 | +// m = 1 + M/2^p; 2^p = 2^23 = 8388608 |
| 28 | +// |
| 29 | +// creating of real exponent "e" (-126 =< e =< 127) |
| 30 | +// e = E - B; B = 127 |
| 31 | +// |
| 32 | +// f = m * 2^e |
| 33 | +// |
| 34 | +// Target values can be calculated as follows: |
| 35 | +// num = (-1)^S * [2^k + M*2^(k-23)]; den = 2^(k-e) |
| 36 | +// |
| 37 | +// For fast calculation use the shift operation: y = a*2^x => y = (a<<x) ; "a" can be 1 |
| 38 | +// |
| 39 | +// If internally int32 is a concern, k must be chosen dynamically as follows: |
| 40 | +// k=30+e; if k>30; k=30 => means for f > 1 (e >= 0) => k=30, otherwise (e<0) => k=0..29 |
| 41 | +// |
| 42 | +// example 18.4: exp=4, M=1258291 |
| 43 | +// k=34 => k=30 |
| 44 | +// den = 2^(30-4) = 67108864 = 0x04000000 |
| 45 | +// num = 2^30 + M*2^(30-23) = 2^30 + M*2^7 = 1073741824 + 161061248 = 1234803072 0x49999980 |
| 46 | +// f = num/den = 18.3999996185 |
| 47 | +// |
| 48 | +// example 0.05: exp=-5, M=5033164 |
| 49 | +// k=25 |
| 50 | +// den = 2^(25--5) = 2^30 = 1073741824 = 0x40000000 |
| 51 | +// num = 2^25 + M*2^(25-23) = 2^25 + M*2^2 = 33554432 + 20132656 = 53687088 = 0x03333330 |
| 52 | +// f = num/den = 0.04999999702 |
| 53 | +// |
| 54 | +// If a bigger range for the internal calculation is used and encoded values until just before return are used, some |
| 55 | +// calculations and checks can be simplified/dropped. This means especially: |
| 56 | +// * k can be constant 30 |
| 57 | +// * a constant can be used for "2^30" instead of calculation from k |
| 58 | +// * a constant can be used for "2^(30-23)" instead of calculation from k |
| 59 | +// * no differentiation is needed at beginning regarding the sign of e |
| 60 | +// |
| 61 | +// valid range for int32 for e: -30 <= e <= 30 |
| 62 | +// k=30 => means for f > 1 (e >= 0) => (k-e)=0..30, otherwise (e<0) => (k-e)=31..60 |
| 63 | +// |
| 64 | +// if e < 0, "num" can and "den" will exceed the int32 limit and needs to be adjusted as follows: |
| 65 | +// * let den = 2^30 |
| 66 | +// * adjust "num" according the missing accuracy of "den" now, which means dividing by 2^(-exp) |
| 67 | +// |
| 68 | +// example 18.4: exp=4, M=1258291 |
| 69 | +// k=34 => k=30 |
| 70 | +// den = 2^(30-4) = 67108864 = 0x04000000 |
| 71 | +// num = 2^30 + M*2^(30-23) = 2^30 + M*2^7 = 1073741824 + 161061248 = 1234803072 = 0x49999980 |
| 72 | +// f = num/den = 18.3999996185; num and den are guaranteed to be in the int32 range |
| 73 | +// |
| 74 | +// example 0.05: exp=-5, M=5033164 |
| 75 | +// k=25 |
| 76 | +// den64 = 2^(30--5) = 2^35 = 34359738368 = 0x0800000000 |
| 77 | +// num64 = 2^30 + M*2^(30-23) = 2^30 + M*2^7 = 1073741824 + 644244992 = 1717986816 = 0x66666600 |
| 78 | +// f64 = num64/den64 = 0.04999999702 |
| 79 | +// den = 2^30 = 1073741824 |
| 80 | +// num = num64/2^--5 = num64/2^5 = 1717986816/32 = 53687088 = 0x03333330 |
| 81 | +// f = num/den = 0.04999999702 |
| 82 | +// |
| 83 | +// special cases: |
| 84 | +// E=0, M=0: f=0 => num=0; den=1 |
| 85 | +// E=0, M>0 (very small numbers): |f| =< 2^-127; f=1/MaxInt32 (error=e^-95); f=0 (error=2^-127) => num=0; den=1 |
| 86 | +// please note: with this we loose the sign, but get higher accuracy |
| 87 | +// E=255 (all bits set), M=0: +/-Inf; but for int32 numerator, +Inf is for e=31, means E=158, -Inf is for e= |
| 88 | +// E=255 (all bits set), M>0: NaN |
| 89 | +// |
| 90 | +// See: |
| 91 | +// https://de.wikipedia.org/wiki/IEEE_754 |
| 92 | +// |
| 93 | +// Very good accuracy can be reached, similar to calculating with "math/big.Rat", but ~20 times faster: |
| 94 | +// nrf52840: 1.526µs-4.577µs |
| 95 | +// |
| 96 | +// Considered other options: see function in test file |
| 97 | +func Float32Fractions(f float32) (int32, int32, error) { |
| 98 | + const ( |
| 99 | + k = maxAbsExponent |
| 100 | + twoPowK = (1 << k) // 2^30 |
| 101 | + kMinusMantisssaBits = k - mantissaBits // 7 |
| 102 | + ) |
| 103 | + |
| 104 | + bits := math.Float32bits(f) // uint32 |
| 105 | + |
| 106 | + encMantNumerator := bits & mantissaBitsMask // encrypted mantissa "M" |
| 107 | + encExpDenominator := int64(bits>>mantissaBits) & expBitmask // encrypted exponent "E" |
| 108 | + negative := bits&signBitmask > 0 // use the sign bit as boolean |
| 109 | + /* |
| 110 | + fmt.Printf("%f > bits: %032b (%x) exp: %08b (%d) man: %023b (%d) neg: %t\n", f, bits, bits, encExpDenominator, |
| 111 | + encExpDenominator, encMantNumerator, encMantNumerator, negative) |
| 112 | + */ |
| 113 | + |
| 114 | + if encMantNumerator == 0 && encExpDenominator == 0 { |
| 115 | + // f was zero |
| 116 | + return 0, 1, nil |
| 117 | + } |
| 118 | + |
| 119 | + if encExpDenominator > maxAbsExponentEncoded { |
| 120 | + if negative { |
| 121 | + if encExpDenominator == maxAbsExponentEncoded+1 { |
| 122 | + // special case for "s"=-1; "m" exactly "1" and "e" exactly "31" |
| 123 | + return math.MinInt32, 1, nil |
| 124 | + } |
| 125 | + |
| 126 | + return math.MinInt32, 1, errors.New("input value exceeds -int32 range") |
| 127 | + } |
| 128 | + |
| 129 | + return math.MaxInt32, 1, errors.New("input value exceeds +int32 range") |
| 130 | + } |
| 131 | + |
| 132 | + if encExpDenominator < -maxAbsExponentEncoded { |
| 133 | + // treat this is zero, not 1/MaxInt32 (see comment) |
| 134 | + return 0, 1, nil |
| 135 | + } |
| 136 | + |
| 137 | + // num = (-1)^S * [2^k + M*2^(k-23)] |
| 138 | + encMantNumerator = twoPowK + encMantNumerator<<kMinusMantisssaBits // now decrypted mantissa without sign |
| 139 | + |
| 140 | + // start converting to 32 bit integer |
| 141 | + if encExpDenominator < bias { |
| 142 | + // small values f < 1, denominator is set to fixed 2^30 and numerator needs to be decreased |
| 143 | + encMantNumerator = encMantNumerator >> (bias - encExpDenominator) |
| 144 | + if negative { |
| 145 | + return -int32(encMantNumerator), twoPowK, nil |
| 146 | + } |
| 147 | + |
| 148 | + return int32(encMantNumerator), twoPowK, nil |
| 149 | + } |
| 150 | + |
| 151 | + // values f > 1, numerator and denominator can be used like expected |
| 152 | + // den = 2^(k-e) |
| 153 | + encExpDenominator = 1 << (k + bias - encExpDenominator) // now denominator |
| 154 | + |
| 155 | + if negative { |
| 156 | + return -int32(encMantNumerator), int32(encExpDenominator), nil |
| 157 | + } |
| 158 | + |
| 159 | + return int32(encMantNumerator), int32(encExpDenominator), nil |
| 160 | +} |
0 commit comments