-
Notifications
You must be signed in to change notification settings - Fork 625
/
rounding.go
160 lines (145 loc) · 5.07 KB
/
rounding.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Multiprecision decimal numbers.
// For floating-point formatting only; not general purpose.
// Only operations are assign and (binary) left/right shift.
// Can do binary floating point in multiprecision decimal precisely
// because 2 divides 10; cannot do decimal floating point
// in multiprecision binary precisely.
package decimal
type floatInfo struct {
mantbits uint
expbits uint
bias int
}
var float32info = floatInfo{23, 8, -127}
var float64info = floatInfo{52, 11, -1023}
// roundShortest rounds d (= mant * 2^exp) to the shortest number of digits
// that will let the original floating point value be precisely reconstructed.
func roundShortest(d *decimal, mant uint64, exp int, flt *floatInfo) {
// If mantissa is zero, the number is zero; stop now.
if mant == 0 {
d.nd = 0
return
}
// Compute upper and lower such that any decimal number
// between upper and lower (possibly inclusive)
// will round to the original floating point number.
// We may see at once that the number is already shortest.
//
// Suppose d is not denormal, so that 2^exp <= d < 10^dp.
// The closest shorter number is at least 10^(dp-nd) away.
// The lower/upper bounds computed below are at distance
// at most 2^(exp-mantbits).
//
// So the number is already shortest if 10^(dp-nd) > 2^(exp-mantbits),
// or equivalently log2(10)*(dp-nd) > exp-mantbits.
// It is true if 332/100*(dp-nd) >= exp-mantbits (log2(10) > 3.32).
minexp := flt.bias + 1 // minimum possible exponent
if exp > minexp && 332*(d.dp-d.nd) >= 100*(exp-int(flt.mantbits)) {
// The number is already shortest.
return
}
// d = mant << (exp - mantbits)
// Next highest floating point number is mant+1 << exp-mantbits.
// Our upper bound is halfway between, mant*2+1 << exp-mantbits-1.
upper := new(decimal)
upper.Assign(mant*2 + 1)
upper.Shift(exp - int(flt.mantbits) - 1)
// d = mant << (exp - mantbits)
// Next lowest floating point number is mant-1 << exp-mantbits,
// unless mant-1 drops the significant bit and exp is not the minimum exp,
// in which case the next lowest is mant*2-1 << exp-mantbits-1.
// Either way, call it mantlo << explo-mantbits.
// Our lower bound is halfway between, mantlo*2+1 << explo-mantbits-1.
var mantlo uint64
var explo int
if mant > 1<<flt.mantbits || exp == minexp {
mantlo = mant - 1
explo = exp
} else {
mantlo = mant*2 - 1
explo = exp - 1
}
lower := new(decimal)
lower.Assign(mantlo*2 + 1)
lower.Shift(explo - int(flt.mantbits) - 1)
// The upper and lower bounds are possible outputs only if
// the original mantissa is even, so that IEEE round-to-even
// would round to the original mantissa and not the neighbors.
inclusive := mant%2 == 0
// As we walk the digits we want to know whether rounding up would fall
// within the upper bound. This is tracked by upperdelta:
//
// If upperdelta == 0, the digits of d and upper are the same so far.
//
// If upperdelta == 1, we saw a difference of 1 between d and upper on a
// previous digit and subsequently only 9s for d and 0s for upper.
// (Thus rounding up may fall outside the bound, if it is exclusive.)
//
// If upperdelta == 2, then the difference is greater than 1
// and we know that rounding up falls within the bound.
var upperdelta uint8
// Now we can figure out the minimum number of digits required.
// Walk along until d has distinguished itself from upper and lower.
for ui := 0; ; ui++ {
// lower, d, and upper may have the decimal points at different
// places. In this case upper is the longest, so we iterate from
// ui==0 and start li and mi at (possibly) -1.
mi := ui - upper.dp + d.dp
if mi >= d.nd {
break
}
li := ui - upper.dp + lower.dp
l := byte('0') // lower digit
if li >= 0 && li < lower.nd {
l = lower.d[li]
}
m := byte('0') // middle digit
if mi >= 0 {
m = d.d[mi]
}
u := byte('0') // upper digit
if ui < upper.nd {
u = upper.d[ui]
}
// Okay to round down (truncate) if lower has a different digit
// or if lower is inclusive and is exactly the result of rounding
// down (i.e., and we have reached the final digit of lower).
okdown := l != m || inclusive && li+1 == lower.nd
switch {
case upperdelta == 0 && m+1 < u:
// Example:
// m = 12345xxx
// u = 12347xxx
upperdelta = 2
case upperdelta == 0 && m != u:
// Example:
// m = 12345xxx
// u = 12346xxx
upperdelta = 1
case upperdelta == 1 && (m != '9' || u != '0'):
// Example:
// m = 1234598x
// u = 1234600x
upperdelta = 2
}
// Okay to round up if upper has a different digit and either upper
// is inclusive or upper is bigger than the result of rounding up.
okup := upperdelta > 0 && (inclusive || upperdelta > 1 || ui+1 < upper.nd)
// If it's okay to do either, then round to the nearest one.
// If it's okay to do only one, do it.
switch {
case okdown && okup:
d.Round(mi + 1)
return
case okdown:
d.RoundDown(mi + 1)
return
case okup:
d.RoundUp(mi + 1)
return
}
}
}