1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
|
pub mod CFloatFPU {
// Maximum allowed argument for SmallRound
// const sc_uSmallMax: u32 = 0xFFFFF;
// Binary representation of static_cast<float>(sc_uSmallMax)
const sc_uBinaryFloatSmallMax: u32 = 0x497ffff0;
fn LargeRound(x: f32) -> i32 {
//XXX: the SSE2 version is probably slower than a naive SSE4 implementation that can use roundss
#[cfg(target_feature = "sse2")]
unsafe {
#[cfg(target_arch = "x86")]
use std::arch::x86::{__m128, _mm_set_ss, _mm_cvtss_si32, _mm_cvtsi32_ss, _mm_sub_ss, _mm_cmple_ss, _mm_store_ss, _mm_setzero_ps};
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::{__m128, _mm_set_ss, _mm_cvtss_si32, _mm_cvtsi32_ss, _mm_sub_ss, _mm_cmple_ss, _mm_store_ss, _mm_setzero_ps};
let given: __m128 = _mm_set_ss(x); // load given value
let result = _mm_cvtss_si32(given);
let rounded: __m128 = _mm_setzero_ps(); // convert it to integer (rounding mode doesn't matter)
let rounded = _mm_cvtsi32_ss(rounded, result); // convert back to float
let diff = _mm_sub_ss(rounded, given); // diff = (rounded - given)
let negHalf = _mm_set_ss(-0.5); // load -0.5f
let mask = _mm_cmple_ss(diff, negHalf); // get all-ones if (rounded - given) < -0.5f
let mut correction: i32 = 0;
_mm_store_ss((&mut correction) as *mut _ as *mut _, mask); // get comparison result as integer
return result - correction; // correct the result of rounding
}
#[cfg(not(target_feature = "sse2"))]
return (x + 0.5).floor() as i32;
}
//+------------------------------------------------------------------------
//
// Function: CFloatFPU::SmallRound
//
// Synopsis: Convert given floating point value to nearest integer.
// Half-integers are rounded up.
//
// Important: this routine is fast but restricted:
// given x should be within (-(0x100000-.5) < x < (0x100000-.5))
//
// Details: Implementation has abnormal looking that use to confuse
// many people. However, it indeed works, being tested
// thoroughly on x86 and ia64 platforms for literally
// each possible argument values in the given range.
//
// More details:
// Implementation is based on the knowledge of floating point
// value representation. This 32-bits value consists of three parts:
// v & 0x80000000 = sign
// v & 0x7F800000 = exponent
// v & 0x007FFFFF - mantissa
//
// Let N to be a floating point number within -0x400000 <= N <= 0x3FFFFF.
// The sum (S = 0xC00000 + N) thus will satisfy Ox800000 <= S <= 0xFFFFFF.
// All the numbers within this range (sometimes referred to as "binade")
// have same position of most significant bit, i.e. 0x800000.
// Therefore they are normalized equal way, thus
// providing the weights on mantissa's bits to be the same
// as integer numbers have. In other words, to get
// integer value of floating point S, when Ox800000 <= S <= 0xFFFFFF,
// we can just throw away the exponent and sign, and add assumed
// most significant bit (that is always 1 and therefore is not stored
// in floating point value):
// (int)S = (<float S as int> & 0x7FFFFF | 0x800000);
// To get given N in as integer, we need to subtract back
// the value 0xC00000 that was added in order to obtain
// proper normalization:
// N = (<float S as int> & 0x7FFFFF | 0x800000) - 0xC00000.
// or
// N = (<float S as int> & 0x7FFFFF ) - 0x400000.
//
// Hopefully, the text above explains how
// following routine works:
// int SmallRound1(float x)
// {
// union
// {
// __int32 i;
// float f;
// } u;
//
// u.f = x + float(0x00C00000);
// return ((u.i - (int)0x00400000) << 9) >> 9;
// }
// Unfortunatelly it is imperfect, due to the way how FPU
// use to round intermediate calculation results.
// By default, rounding mode is set to "nearest".
// This means that when it calculates N+float(0x00C00000),
// the 80-bit precise result will not fit in 32-bit float,
// so some least significant bits will be thrown away.
// Rounding to nearest means that S consisting of intS + fraction,
// where 0 <= fraction < 1, will be converted to intS
// when fraction < 0.5 and to intS+1 if fraction > 0.5.
// What would happen with fraction exactly equal to 0.5?
// Smart thing: S will go to intS if intS is even and
// to intS+1 if intS is odd. In other words, half-integers
// are rounded to nearest even number.
// This FPU feature apparently is useful to minimize
// average rounding error when somebody is, say,
// digitally simulating electrons' behavior in plasma.
// However for graphics this is not desired.
//
// We want to move half-integers up, therefore
// define SmallRound(x) as {return SmallRound1(x*2+.5) >> 1;}.
// This may require more comments.
// Let given x = i+f, where i is integer and f is fraction, 0 <= f < 1.
// Let's wee what is y = x*2+.5:
// y = i*2 + (f*2 + .5) = i*2 + g, where g = f*2 + .5;
// If "f" is in the range 0 <= f < .5 (so correct rounding result should be "i"),
// then range for "g" is .5 <= g < 1.5. The very first value, .5 will force
// SmallRound1 result to be "i*2", due to round-to-even rule; the remaining
// will lead to "i*2+1". Consequent shift will throw away extra "1" and give
// us desired "i".
// When "f" in in the range .5 <= f < 1, then 1.5 <= g < 2.5.
// All these values will round to 2, so SmallRound1 will return (2*i+2),
// and the final shift will give desired 1+1.
//
// To get final routine looking we need to transform the combines
// expression for u.f:
// (x*2) + .5 + float(0x00C00000) ==
// (x + (.25 + double(0x00600000)) )*2
// Note that the ratio "2" means nothing for following operations,
// since it affects only exponent bits that are ignored anyway.
// So we can save some processor cycles avoiding this multiplication.
//
// And, the very final beautification:
// to avoid subtracting 0x00400000 let's ignore this bit.
// This mean that we effectively decrease available range by 1 bit,
// but we're chasing for performance and found it acceptable.
// So
// return ((u.i - (int)0x00400000) << 9) >> 9;
// is converted to
// return ((u.i ) << 10) >> 10;
// Eventually, will found that final shift by 10 bits may be combined
// with shift by 1 in the definition {return SmallRound1(x*2+.5) >> 1;},
// we'll just shift by 11 bits. That's it.
//
//-------------------------------------------------------------------------
fn SmallRound(x: f32) -> i32
{
//AssertPrecisionAndRoundingMode();
debug_assert!(-(0x100000 as f64 -0.5) < x as f64 && (x as f64) < (0x100000 as f64 -0.5));
let fi = (x as f64 + (0x00600000 as f64 + 0.25)) as f32;
let result = ((fi.to_bits() as i32) << 10) >> 11;
debug_assert!(x < (result as f32) + 0.5 && x >= (result as f32) - 0.5);
return result;
}
pub fn Round(x: f32) -> i32
{
// cut off sign
let xAbs: u32 = x.to_bits() & 0x7FFFFFFF;
return if xAbs <= sc_uBinaryFloatSmallMax {SmallRound(x)} else {LargeRound(x)};
}
}
macro_rules! TOREAL { ($e: expr) => { $e as REAL } }
|