1 files changed, 163 insertions, 0 deletions
diff --git a/third_party/rust/wpf-gpu-raster/src/real.rs b/third_party/rust/wpf-gpu-raster/src/real.rs
new file mode 100644
index 0000000000..a9144ec149
--- /dev/null
+++ b/third_party/rust/wpf-gpu-raster/src/real.rs
@@ -0,0 +1,163 @@
+pub mod CFloatFPU {
+    // Maximum allowed argument for SmallRound
+    // const sc_uSmallMax: u32 = 0xFFFFF;
+
+    // Binary representation of static_cast<float>(sc_uSmallMax)
+    const sc_uBinaryFloatSmallMax: u32 = 0x497ffff0;
+
+    fn LargeRound(x: f32) -> i32 {
+        //XXX: the SSE2 version is probably slower than a naive SSE4 implementation that can use roundss
+        #[cfg(target_feature = "sse2")]
+        unsafe {
+            #[cfg(target_arch = "x86")]
+            use std::arch::x86::{__m128, _mm_set_ss, _mm_cvtss_si32, _mm_cvtsi32_ss, _mm_sub_ss, _mm_cmple_ss, _mm_store_ss, _mm_setzero_ps};
+            #[cfg(target_arch = "x86_64")]
+            use std::arch::x86_64::{__m128, _mm_set_ss, _mm_cvtss_si32, _mm_cvtsi32_ss, _mm_sub_ss, _mm_cmple_ss, _mm_store_ss, _mm_setzero_ps};
+
+            let given: __m128 = _mm_set_ss(x);                       // load given value
+            let result = _mm_cvtss_si32(given);
+            let rounded: __m128 = _mm_setzero_ps();             // convert it to integer (rounding mode doesn't matter)
+            let rounded = _mm_cvtsi32_ss(rounded, result);   // convert back to float
+            let diff = _mm_sub_ss(rounded, given);           // diff = (rounded - given)
+            let negHalf = _mm_set_ss(-0.5);                 // load -0.5f
+            let mask = _mm_cmple_ss(diff, negHalf);          // get all-ones if (rounded - given) < -0.5f
+            let mut correction: i32 = 0;                                 
+            _mm_store_ss((&mut correction) as *mut _ as *mut _, mask); // get comparison result as integer
+            return result - correction;                         // correct the result of rounding
+        }
+        #[cfg(not(target_feature = "sse2"))]
+        return (x + 0.5).floor() as i32;
+    }
+
+
+    //+------------------------------------------------------------------------
+//
+//  Function:   CFloatFPU::SmallRound
+//
+//  Synopsis:   Convert given floating point value to nearest integer.
+//              Half-integers are rounded up.
+//
+//              Important: this routine is fast but restricted:
+//              given x should be within (-(0x100000-.5) < x < (0x100000-.5))
+//
+//  Details:    Implementation has abnormal looking that use to confuse
+//              many people. However, it indeed works, being tested
+//              thoroughly on x86 and ia64 platforms for literally
+//              each possible argument values in the given range.
+//              
+//  More details:
+//      Implementation is based on the knowledge of floating point
+//      value representation. This 32-bits value consists of three parts:
+//      v & 0x80000000 = sign
+//      v & 0x7F800000 = exponent
+//      v & 0x007FFFFF - mantissa
+//
+//      Let N to be a floating point number within -0x400000 <= N <= 0x3FFFFF.
+//      The sum (S = 0xC00000 + N) thus will satisfy Ox800000 <= S <= 0xFFFFFF.
+//      All the numbers within this range (sometimes referred to as "binade")
+//      have same position of most significant bit, i.e. 0x800000.
+//      Therefore they are normalized equal way, thus
+//      providing the weights on mantissa's bits to be the same
+//      as integer numbers have. In other words, to get
+//      integer value of floating point S, when Ox800000 <= S <= 0xFFFFFF,
+//      we can just throw away the exponent and sign, and add assumed
+//      most significant bit (that is always 1 and therefore is not stored
+//      in floating point value):
+//      (int)S = (<float S as int> & 0x7FFFFF | 0x800000);
+//      To get given N in as integer, we need to subtract back
+//      the value 0xC00000 that was added in order to obtain
+//      proper normalization:
+//      N = (<float S as int> & 0x7FFFFF | 0x800000) - 0xC00000.
+//      or
+//      N = (<float S as int> & 0x7FFFFF           ) - 0x400000.
+//
+//      Hopefully, the text above explains how
+//      following routine works:
+//        int SmallRound1(float x)
+//        {
+//            union
+//            {
+//                __int32 i;
+//                float f;
+//            } u;
+//
+//            u.f = x + float(0x00C00000);
+//            return ((u.i - (int)0x00400000) << 9) >> 9;
+//        }
+//      Unfortunatelly it is imperfect, due to the way how FPU
+//      use to round intermediate calculation results.
+//      By default, rounding mode is set to "nearest".
+//      This means that when it calculates N+float(0x00C00000),
+//      the 80-bit precise result will not fit in 32-bit float,
+//      so some least significant bits will be thrown away.
+//      Rounding to nearest means that S consisting of intS + fraction,
+//      where 0 <= fraction < 1, will be converted to intS
+//      when fraction < 0.5 and to intS+1 if fraction > 0.5.
+//      What would happen with fraction exactly equal to 0.5?
+//      Smart thing: S will go to intS if intS is even and
+//      to intS+1 if intS is odd. In other words, half-integers
+//      are rounded to nearest even number.
+//      This FPU feature apparently is useful to minimize
+//      average rounding error when somebody is, say,
+//      digitally simulating electrons' behavior in plasma.
+//      However for graphics this is not desired.
+//
+//      We want to move half-integers up, therefore
+//      define SmallRound(x) as {return SmallRound1(x*2+.5) >> 1;}.
+//      This may require more comments.
+//      Let given x = i+f, where i is integer and f is fraction, 0 <= f < 1.
+//      Let's wee what is y = x*2+.5:
+//          y = i*2 + (f*2 + .5) = i*2 + g, where g = f*2 + .5;
+//      If "f" is in the range 0 <= f < .5 (so correct rounding result should be "i"),
+//      then range for "g" is .5 <= g < 1.5. The very first value, .5 will force
+//      SmallRound1 result to be "i*2", due to round-to-even rule; the remaining
+//      will lead to "i*2+1". Consequent shift will throw away extra "1" and give
+//      us desired "i".
+//      When "f" in in the range .5 <= f < 1, then 1.5 <= g < 2.5.
+//      All these values will round to 2, so SmallRound1 will return (2*i+2),
+//      and the final shift will give desired 1+1.
+//
+//      To get final routine looking we need to transform the combines
+//      expression for u.f:
+//            (x*2) + .5 + float(0x00C00000) ==
+//             (x + (.25 + double(0x00600000)) )*2
+//      Note that the ratio "2" means nothing for following operations,
+//      since it affects only exponent bits that are ignored anyway.
+//      So we can save some processor cycles avoiding this multiplication.
+//
+//      And, the very final beautification:
+//      to avoid subtracting 0x00400000 let's ignore this bit.
+//      This mean that we effectively decrease available range by 1 bit,
+//      but we're chasing for performance and found it acceptable.
+//      So 
+//         return ((u.i - (int)0x00400000) << 9) >> 9;
+//      is converted to
+//         return ((u.i                  ) << 10) >> 10;
+//      Eventually, will found that final shift by 10 bits may be combined
+//      with shift by 1 in the definition {return SmallRound1(x*2+.5) >> 1;},
+//      we'll just shift by 11 bits. That's it.
+//      
+//-------------------------------------------------------------------------
+fn SmallRound(x: f32) -> i32
+{
+    //AssertPrecisionAndRoundingMode();
+    debug_assert!(-(0x100000 as f64 -0.5) < x as f64 && (x as f64) < (0x100000 as f64 -0.5));
+
+ 
+    let fi = (x as f64 + (0x00600000 as f64 + 0.25)) as f32;
+    let result = ((fi.to_bits() as i32) << 10) >> 11;
+
+    debug_assert!(x < (result as f32) + 0.5 && x >= (result as f32) - 0.5);
+    return result;
+}
+
+pub fn Round(x: f32) -> i32
+{
+    // cut off sign
+    let xAbs: u32 = x.to_bits() & 0x7FFFFFFF;
+
+    return if xAbs <= sc_uBinaryFloatSmallMax {SmallRound(x)} else {LargeRound(x)};
+}
+}
+
+macro_rules! TOREAL { ($e: expr) => { $e as REAL } }