/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* vim: set ts=8 sts=2 et sw=2 tw=80: */ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #include "Swizzle.h" #include "Logging.h" #include "Orientation.h" #include "Tools.h" #include "mozilla/CheckedInt.h" #include "mozilla/EndianUtils.h" #include "mozilla/UniquePtr.h" #ifdef USE_SSE2 # include "mozilla/SSE.h" #endif #ifdef USE_NEON # include "mozilla/arm.h" #endif #include namespace mozilla { namespace gfx { /** * Convenience macros for dispatching to various format combinations. */ // Hash the formats to a relatively dense value to optimize jump table // generation. The first 6 formats in SurfaceFormat are the 32-bit BGRA variants // and are the most common formats dispatched here. Room is reserved in the // lowish bits for up to these 6 destination formats. If a destination format is // >= 6, the 6th bit is set to avoid collisions. #define FORMAT_KEY(aSrcFormat, aDstFormat) \ (int(aSrcFormat) * 6 + int(aDstFormat) + (int(int(aDstFormat) >= 6) << 6)) #define FORMAT_CASE_EXPR(aSrcFormat, aDstFormat, ...) \ case FORMAT_KEY(aSrcFormat, aDstFormat): \ __VA_ARGS__; \ return true; #define FORMAT_CASE(aSrcFormat, aDstFormat, ...) \ FORMAT_CASE_EXPR(aSrcFormat, aDstFormat, FORMAT_CASE_CALL(__VA_ARGS__)) #define FORMAT_CASE_ROW(aSrcFormat, aDstFormat, ...) \ case FORMAT_KEY(aSrcFormat, aDstFormat): \ return &__VA_ARGS__; /** * Constexpr functions for analyzing format attributes in templates. */ // Whether B comes before R in pixel memory layout. static constexpr bool IsBGRFormat(SurfaceFormat aFormat) { return aFormat == SurfaceFormat::B8G8R8A8 || #if MOZ_LITTLE_ENDIAN() aFormat == SurfaceFormat::R5G6B5_UINT16 || #endif aFormat == SurfaceFormat::B8G8R8X8 || aFormat == SurfaceFormat::B8G8R8; } // Whether the order of B and R need to be swapped to map from src to dst. static constexpr bool ShouldSwapRB(SurfaceFormat aSrcFormat, SurfaceFormat aDstFormat) { return IsBGRFormat(aSrcFormat) != IsBGRFormat(aDstFormat); } // The starting byte of the RGB components in pixel memory. static constexpr uint32_t RGBByteIndex(SurfaceFormat aFormat) { return aFormat == SurfaceFormat::A8R8G8B8 || aFormat == SurfaceFormat::X8R8G8B8 ? 1 : 0; } // The byte of the alpha component, which just comes after RGB. static constexpr uint32_t AlphaByteIndex(SurfaceFormat aFormat) { return (RGBByteIndex(aFormat) + 3) % 4; } // The endian-dependent bit shift to access RGB of a UINT32 pixel. static constexpr uint32_t RGBBitShift(SurfaceFormat aFormat) { #if MOZ_LITTLE_ENDIAN() return 8 * RGBByteIndex(aFormat); #else return 8 - 8 * RGBByteIndex(aFormat); #endif } // The endian-dependent bit shift to access alpha of a UINT32 pixel. static constexpr uint32_t AlphaBitShift(SurfaceFormat aFormat) { return (RGBBitShift(aFormat) + 24) % 32; } // Whether the pixel format should ignore the value of the alpha channel and // treat it as opaque. static constexpr bool IgnoreAlpha(SurfaceFormat aFormat) { return aFormat == SurfaceFormat::B8G8R8X8 || aFormat == SurfaceFormat::R8G8B8X8 || aFormat == SurfaceFormat::X8R8G8B8; } // Whether to force alpha to opaque to map from src to dst. static constexpr bool ShouldForceOpaque(SurfaceFormat aSrcFormat, SurfaceFormat aDstFormat) { return IgnoreAlpha(aSrcFormat) != IgnoreAlpha(aDstFormat); } #ifdef USE_SSE2 /** * SSE2 optimizations */ template void Premultiply_SSE2(const uint8_t*, int32_t, uint8_t*, int32_t, IntSize); # define PREMULTIPLY_SSE2(aSrcFormat, aDstFormat) \ FORMAT_CASE(aSrcFormat, aDstFormat, \ Premultiply_SSE2) template void PremultiplyRow_SSE2(const uint8_t*, uint8_t*, int32_t); # define PREMULTIPLY_ROW_SSE2(aSrcFormat, aDstFormat) \ FORMAT_CASE_ROW( \ aSrcFormat, aDstFormat, \ PremultiplyRow_SSE2) template void Unpremultiply_SSE2(const uint8_t*, int32_t, uint8_t*, int32_t, IntSize); # define UNPREMULTIPLY_SSE2(aSrcFormat, aDstFormat) \ FORMAT_CASE(aSrcFormat, aDstFormat, \ Unpremultiply_SSE2) template void UnpremultiplyRow_SSE2(const uint8_t*, uint8_t*, int32_t); # define UNPREMULTIPLY_ROW_SSE2(aSrcFormat, aDstFormat) \ FORMAT_CASE_ROW( \ aSrcFormat, aDstFormat, \ UnpremultiplyRow_SSE2) template void Swizzle_SSE2(const uint8_t*, int32_t, uint8_t*, int32_t, IntSize); # define SWIZZLE_SSE2(aSrcFormat, aDstFormat) \ FORMAT_CASE(aSrcFormat, aDstFormat, \ Swizzle_SSE2) template void SwizzleRow_SSE2(const uint8_t*, uint8_t*, int32_t); # define SWIZZLE_ROW_SSE2(aSrcFormat, aDstFormat) \ FORMAT_CASE_ROW( \ aSrcFormat, aDstFormat, \ SwizzleRow_SSE2) template void UnpackRowRGB24_SSSE3(const uint8_t*, uint8_t*, int32_t); # define UNPACK_ROW_RGB_SSSE3(aDstFormat) \ FORMAT_CASE_ROW( \ SurfaceFormat::R8G8B8, aDstFormat, \ UnpackRowRGB24_SSSE3) template void UnpackRowRGB24_AVX2(const uint8_t*, uint8_t*, int32_t); # define UNPACK_ROW_RGB_AVX2(aDstFormat) \ FORMAT_CASE_ROW( \ SurfaceFormat::R8G8B8, aDstFormat, \ UnpackRowRGB24_AVX2) #endif #ifdef USE_NEON /** * ARM NEON optimizations */ template void Premultiply_NEON(const uint8_t*, int32_t, uint8_t*, int32_t, IntSize); # define PREMULTIPLY_NEON(aSrcFormat, aDstFormat) \ FORMAT_CASE(aSrcFormat, aDstFormat, \ Premultiply_NEON) template void PremultiplyRow_NEON(const uint8_t*, uint8_t*, int32_t); # define PREMULTIPLY_ROW_NEON(aSrcFormat, aDstFormat) \ FORMAT_CASE_ROW( \ aSrcFormat, aDstFormat, \ PremultiplyRow_NEON) template void Unpremultiply_NEON(const uint8_t*, int32_t, uint8_t*, int32_t, IntSize); # define UNPREMULTIPLY_NEON(aSrcFormat, aDstFormat) \ FORMAT_CASE(aSrcFormat, aDstFormat, \ Unpremultiply_NEON) template void UnpremultiplyRow_NEON(const uint8_t*, uint8_t*, int32_t); # define UNPREMULTIPLY_ROW_NEON(aSrcFormat, aDstFormat) \ FORMAT_CASE_ROW( \ aSrcFormat, aDstFormat, \ UnpremultiplyRow_NEON) template void Swizzle_NEON(const uint8_t*, int32_t, uint8_t*, int32_t, IntSize); # define SWIZZLE_NEON(aSrcFormat, aDstFormat) \ FORMAT_CASE(aSrcFormat, aDstFormat, \ Swizzle_NEON) template void SwizzleRow_NEON(const uint8_t*, uint8_t*, int32_t); # define SWIZZLE_ROW_NEON(aSrcFormat, aDstFormat) \ FORMAT_CASE_ROW( \ aSrcFormat, aDstFormat, \ SwizzleRow_NEON) template void UnpackRowRGB24_NEON(const uint8_t*, uint8_t*, int32_t); # define UNPACK_ROW_RGB_NEON(aDstFormat) \ FORMAT_CASE_ROW( \ SurfaceFormat::R8G8B8, aDstFormat, \ UnpackRowRGB24_NEON) #endif /** * Premultiplying */ // Fallback premultiply implementation that uses splayed pixel math to reduce // the multiplications used. That is, the R and B components are isolated from // the G and A components, which then can be multiplied as if they were two // 2-component vectors. Otherwise, an approximation if divide-by-255 is used // which is faster than an actual division. These optimizations are also used // for the SSE2 and NEON implementations. template static void PremultiplyChunkFallback(const uint8_t*& aSrc, uint8_t*& aDst, int32_t aLength) { const uint8_t* end = aSrc + 4 * aLength; do { // Load and process 1 entire pixel at a time. uint32_t color = *reinterpret_cast(aSrc); uint32_t a = aSrcAShift ? color >> aSrcAShift : color & 0xFF; // Isolate the R and B components. uint32_t rb = (color >> aSrcRGBShift) & 0x00FF00FF; // Swap the order of R and B if necessary. if (aSwapRB) { rb = (rb >> 16) | (rb << 16); } // Approximate the multiply by alpha and divide by 255 which is // essentially: // c = c*a + 255; c = (c + (c >> 8)) >> 8; // However, we omit the final >> 8 to fold it with the final shift into // place depending on desired output format. rb = rb * a + 0x00FF00FF; rb = (rb + ((rb >> 8) & 0x00FF00FF)) & 0xFF00FF00; // Use same approximation as above, but G is shifted 8 bits left. // Alpha is left out and handled separately. uint32_t g = color & (0xFF00 << aSrcRGBShift); g = g * a + (0xFF00 << aSrcRGBShift); g = (g + (g >> 8)) & (0xFF0000 << aSrcRGBShift); // The above math leaves RGB shifted left by 8 bits. // Shift them right if required for the output format. // then combine them back together to produce output pixel. // Add the alpha back on if the output format is not opaque. *reinterpret_cast(aDst) = (rb >> (8 - aDstRGBShift)) | (g >> (8 + aSrcRGBShift - aDstRGBShift)) | (aOpaqueAlpha ? 0xFF << aDstAShift : a << aDstAShift); aSrc += 4; aDst += 4; } while (aSrc < end); } template static void PremultiplyRowFallback(const uint8_t* aSrc, uint8_t* aDst, int32_t aLength) { PremultiplyChunkFallback(aSrc, aDst, aLength); } template static void PremultiplyFallback(const uint8_t* aSrc, int32_t aSrcGap, uint8_t* aDst, int32_t aDstGap, IntSize aSize) { for (int32_t height = aSize.height; height > 0; height--) { PremultiplyChunkFallback(aSrc, aDst, aSize.width); aSrc += aSrcGap; aDst += aDstGap; } } #define PREMULTIPLY_FALLBACK_CASE(aSrcFormat, aDstFormat) \ FORMAT_CASE( \ aSrcFormat, aDstFormat, \ PremultiplyFallback) #define PREMULTIPLY_FALLBACK(aSrcFormat) \ PREMULTIPLY_FALLBACK_CASE(aSrcFormat, SurfaceFormat::B8G8R8A8) \ PREMULTIPLY_FALLBACK_CASE(aSrcFormat, SurfaceFormat::B8G8R8X8) \ PREMULTIPLY_FALLBACK_CASE(aSrcFormat, SurfaceFormat::R8G8B8A8) \ PREMULTIPLY_FALLBACK_CASE(aSrcFormat, SurfaceFormat::R8G8B8X8) \ PREMULTIPLY_FALLBACK_CASE(aSrcFormat, SurfaceFormat::A8R8G8B8) \ PREMULTIPLY_FALLBACK_CASE(aSrcFormat, SurfaceFormat::X8R8G8B8) #define PREMULTIPLY_ROW_FALLBACK_CASE(aSrcFormat, aDstFormat) \ FORMAT_CASE_ROW(aSrcFormat, aDstFormat, \ PremultiplyRowFallback< \ ShouldSwapRB(aSrcFormat, aDstFormat), \ ShouldForceOpaque(aSrcFormat, aDstFormat), \ RGBBitShift(aSrcFormat), AlphaBitShift(aSrcFormat), \ RGBBitShift(aDstFormat), AlphaBitShift(aDstFormat)>) #define PREMULTIPLY_ROW_FALLBACK(aSrcFormat) \ PREMULTIPLY_ROW_FALLBACK_CASE(aSrcFormat, SurfaceFormat::B8G8R8A8) \ PREMULTIPLY_ROW_FALLBACK_CASE(aSrcFormat, SurfaceFormat::B8G8R8X8) \ PREMULTIPLY_ROW_FALLBACK_CASE(aSrcFormat, SurfaceFormat::R8G8B8A8) \ PREMULTIPLY_ROW_FALLBACK_CASE(aSrcFormat, SurfaceFormat::R8G8B8X8) \ PREMULTIPLY_ROW_FALLBACK_CASE(aSrcFormat, SurfaceFormat::A8R8G8B8) \ PREMULTIPLY_ROW_FALLBACK_CASE(aSrcFormat, SurfaceFormat::X8R8G8B8) // If rows are tightly packed, and the size of the total area will fit within // the precision range of a single row, then process all the data as if it was // a single row. static inline IntSize CollapseSize(const IntSize& aSize, int32_t aSrcStride, int32_t aDstStride) { if (aSrcStride == aDstStride && (aSrcStride & 3) == 0 && aSrcStride / 4 == aSize.width) { CheckedInt32 area = CheckedInt32(aSize.width) * CheckedInt32(aSize.height); if (area.isValid()) { return IntSize(area.value(), 1); } } return aSize; } static inline int32_t GetStrideGap(int32_t aWidth, SurfaceFormat aFormat, int32_t aStride) { CheckedInt32 used = CheckedInt32(aWidth) * BytesPerPixel(aFormat); if (!used.isValid() || used.value() < 0) { return -1; } return aStride - used.value(); } bool PremultiplyData(const uint8_t* aSrc, int32_t aSrcStride, SurfaceFormat aSrcFormat, uint8_t* aDst, int32_t aDstStride, SurfaceFormat aDstFormat, const IntSize& aSize) { if (aSize.IsEmpty()) { return true; } IntSize size = CollapseSize(aSize, aSrcStride, aDstStride); // Find gap from end of row to the start of the next row. int32_t srcGap = GetStrideGap(aSize.width, aSrcFormat, aSrcStride); int32_t dstGap = GetStrideGap(aSize.width, aDstFormat, aDstStride); MOZ_ASSERT(srcGap >= 0 && dstGap >= 0); if (srcGap < 0 || dstGap < 0) { return false; } #define FORMAT_CASE_CALL(...) __VA_ARGS__(aSrc, srcGap, aDst, dstGap, size) #ifdef USE_SSE2 if (mozilla::supports_sse2()) switch (FORMAT_KEY(aSrcFormat, aDstFormat)) { PREMULTIPLY_SSE2(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8A8) PREMULTIPLY_SSE2(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8X8) PREMULTIPLY_SSE2(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8A8) PREMULTIPLY_SSE2(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8X8) PREMULTIPLY_SSE2(SurfaceFormat::R8G8B8A8, SurfaceFormat::R8G8B8A8) PREMULTIPLY_SSE2(SurfaceFormat::R8G8B8A8, SurfaceFormat::R8G8B8X8) PREMULTIPLY_SSE2(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8A8) PREMULTIPLY_SSE2(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8X8) default: break; } #endif #ifdef USE_NEON if (mozilla::supports_neon()) switch (FORMAT_KEY(aSrcFormat, aDstFormat)) { PREMULTIPLY_NEON(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8A8) PREMULTIPLY_NEON(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8X8) PREMULTIPLY_NEON(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8A8) PREMULTIPLY_NEON(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8X8) PREMULTIPLY_NEON(SurfaceFormat::R8G8B8A8, SurfaceFormat::R8G8B8A8) PREMULTIPLY_NEON(SurfaceFormat::R8G8B8A8, SurfaceFormat::R8G8B8X8) PREMULTIPLY_NEON(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8A8) PREMULTIPLY_NEON(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8X8) default: break; } #endif switch (FORMAT_KEY(aSrcFormat, aDstFormat)) { PREMULTIPLY_FALLBACK(SurfaceFormat::B8G8R8A8) PREMULTIPLY_FALLBACK(SurfaceFormat::R8G8B8A8) PREMULTIPLY_FALLBACK(SurfaceFormat::A8R8G8B8) default: break; } #undef FORMAT_CASE_CALL MOZ_ASSERT(false, "Unsupported premultiply formats"); return false; } SwizzleRowFn PremultiplyRow(SurfaceFormat aSrcFormat, SurfaceFormat aDstFormat) { #ifdef USE_SSE2 if (mozilla::supports_sse2()) switch (FORMAT_KEY(aSrcFormat, aDstFormat)) { PREMULTIPLY_ROW_SSE2(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8A8) PREMULTIPLY_ROW_SSE2(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8X8) PREMULTIPLY_ROW_SSE2(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8A8) PREMULTIPLY_ROW_SSE2(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8X8) PREMULTIPLY_ROW_SSE2(SurfaceFormat::R8G8B8A8, SurfaceFormat::R8G8B8A8) PREMULTIPLY_ROW_SSE2(SurfaceFormat::R8G8B8A8, SurfaceFormat::R8G8B8X8) PREMULTIPLY_ROW_SSE2(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8A8) PREMULTIPLY_ROW_SSE2(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8X8) default: break; } #endif #ifdef USE_NEON if (mozilla::supports_neon()) switch (FORMAT_KEY(aSrcFormat, aDstFormat)) { PREMULTIPLY_ROW_NEON(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8A8) PREMULTIPLY_ROW_NEON(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8X8) PREMULTIPLY_ROW_NEON(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8A8) PREMULTIPLY_ROW_NEON(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8X8) PREMULTIPLY_ROW_NEON(SurfaceFormat::R8G8B8A8, SurfaceFormat::R8G8B8A8) PREMULTIPLY_ROW_NEON(SurfaceFormat::R8G8B8A8, SurfaceFormat::R8G8B8X8) PREMULTIPLY_ROW_NEON(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8A8) PREMULTIPLY_ROW_NEON(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8X8) default: break; } #endif switch (FORMAT_KEY(aSrcFormat, aDstFormat)) { PREMULTIPLY_ROW_FALLBACK(SurfaceFormat::B8G8R8A8) PREMULTIPLY_ROW_FALLBACK(SurfaceFormat::R8G8B8A8) PREMULTIPLY_ROW_FALLBACK(SurfaceFormat::A8R8G8B8) default: break; } MOZ_ASSERT_UNREACHABLE("Unsupported premultiply formats"); return nullptr; } /** * Unpremultiplying */ // Generate a table of 8.16 fixed-point reciprocals representing 1/alpha. #define UNPREMULQ(x) (0xFF00FFU / (x)) #define UNPREMULQ_2(x) UNPREMULQ(x), UNPREMULQ((x) + 1) #define UNPREMULQ_4(x) UNPREMULQ_2(x), UNPREMULQ_2((x) + 2) #define UNPREMULQ_8(x) UNPREMULQ_4(x), UNPREMULQ_4((x) + 4) #define UNPREMULQ_16(x) UNPREMULQ_8(x), UNPREMULQ_8((x) + 8) #define UNPREMULQ_32(x) UNPREMULQ_16(x), UNPREMULQ_16((x) + 16) static const uint32_t sUnpremultiplyTable[256] = {0, UNPREMULQ(1), UNPREMULQ_2(2), UNPREMULQ_4(4), UNPREMULQ_8(8), UNPREMULQ_16(16), UNPREMULQ_32(32), UNPREMULQ_32(64), UNPREMULQ_32(96), UNPREMULQ_32(128), UNPREMULQ_32(160), UNPREMULQ_32(192), UNPREMULQ_32(224)}; // Fallback unpremultiply implementation that uses 8.16 fixed-point reciprocal // math to eliminate any division by the alpha component. This optimization is // used for the SSE2 and NEON implementations, with some adaptations. This // implementation also accesses color components using individual byte accesses // as this profiles faster than accessing the pixel as a uint32_t and // shifting/masking to access components. template static void UnpremultiplyChunkFallback(const uint8_t*& aSrc, uint8_t*& aDst, int32_t aLength) { const uint8_t* end = aSrc + 4 * aLength; do { uint8_t r = aSrc[aSrcRGBIndex + (aSwapRB ? 2 : 0)]; uint8_t g = aSrc[aSrcRGBIndex + 1]; uint8_t b = aSrc[aSrcRGBIndex + (aSwapRB ? 0 : 2)]; uint8_t a = aSrc[aSrcAIndex]; // Access the 8.16 reciprocal from the table based on alpha. Multiply by // the reciprocal and shift off the fraction bits to approximate the // division by alpha. uint32_t q = sUnpremultiplyTable[a]; aDst[aDstRGBIndex + 0] = (r * q) >> 16; aDst[aDstRGBIndex + 1] = (g * q) >> 16; aDst[aDstRGBIndex + 2] = (b * q) >> 16; aDst[aDstAIndex] = a; aSrc += 4; aDst += 4; } while (aSrc < end); } template static void UnpremultiplyRowFallback(const uint8_t* aSrc, uint8_t* aDst, int32_t aLength) { UnpremultiplyChunkFallback(aSrc, aDst, aLength); } template static void UnpremultiplyFallback(const uint8_t* aSrc, int32_t aSrcGap, uint8_t* aDst, int32_t aDstGap, IntSize aSize) { for (int32_t height = aSize.height; height > 0; height--) { UnpremultiplyChunkFallback(aSrc, aDst, aSize.width); aSrc += aSrcGap; aDst += aDstGap; } } #define UNPREMULTIPLY_FALLBACK_CASE(aSrcFormat, aDstFormat) \ FORMAT_CASE(aSrcFormat, aDstFormat, \ UnpremultiplyFallback< \ ShouldSwapRB(aSrcFormat, aDstFormat), \ RGBByteIndex(aSrcFormat), AlphaByteIndex(aSrcFormat), \ RGBByteIndex(aDstFormat), AlphaByteIndex(aDstFormat)>) #define UNPREMULTIPLY_FALLBACK(aSrcFormat) \ UNPREMULTIPLY_FALLBACK_CASE(aSrcFormat, SurfaceFormat::B8G8R8A8) \ UNPREMULTIPLY_FALLBACK_CASE(aSrcFormat, SurfaceFormat::R8G8B8A8) \ UNPREMULTIPLY_FALLBACK_CASE(aSrcFormat, SurfaceFormat::A8R8G8B8) #define UNPREMULTIPLY_ROW_FALLBACK_CASE(aSrcFormat, aDstFormat) \ FORMAT_CASE_ROW(aSrcFormat, aDstFormat, \ UnpremultiplyRowFallback< \ ShouldSwapRB(aSrcFormat, aDstFormat), \ RGBByteIndex(aSrcFormat), AlphaByteIndex(aSrcFormat), \ RGBByteIndex(aDstFormat), AlphaByteIndex(aDstFormat)>) #define UNPREMULTIPLY_ROW_FALLBACK(aSrcFormat) \ UNPREMULTIPLY_ROW_FALLBACK_CASE(aSrcFormat, SurfaceFormat::B8G8R8A8) \ UNPREMULTIPLY_ROW_FALLBACK_CASE(aSrcFormat, SurfaceFormat::R8G8B8A8) \ UNPREMULTIPLY_ROW_FALLBACK_CASE(aSrcFormat, SurfaceFormat::A8R8G8B8) bool UnpremultiplyData(const uint8_t* aSrc, int32_t aSrcStride, SurfaceFormat aSrcFormat, uint8_t* aDst, int32_t aDstStride, SurfaceFormat aDstFormat, const IntSize& aSize) { if (aSize.IsEmpty()) { return true; } IntSize size = CollapseSize(aSize, aSrcStride, aDstStride); // Find gap from end of row to the start of the next row. int32_t srcGap = GetStrideGap(aSize.width, aSrcFormat, aSrcStride); int32_t dstGap = GetStrideGap(aSize.width, aDstFormat, aDstStride); MOZ_ASSERT(srcGap >= 0 && dstGap >= 0); if (srcGap < 0 || dstGap < 0) { return false; } #define FORMAT_CASE_CALL(...) __VA_ARGS__(aSrc, srcGap, aDst, dstGap, size) #ifdef USE_SSE2 if (mozilla::supports_sse2()) switch (FORMAT_KEY(aSrcFormat, aDstFormat)) { UNPREMULTIPLY_SSE2(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8A8) UNPREMULTIPLY_SSE2(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8A8) UNPREMULTIPLY_SSE2(SurfaceFormat::R8G8B8A8, SurfaceFormat::R8G8B8A8) UNPREMULTIPLY_SSE2(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8A8) default: break; } #endif #ifdef USE_NEON if (mozilla::supports_neon()) switch (FORMAT_KEY(aSrcFormat, aDstFormat)) { UNPREMULTIPLY_NEON(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8A8) UNPREMULTIPLY_NEON(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8A8) UNPREMULTIPLY_NEON(SurfaceFormat::R8G8B8A8, SurfaceFormat::R8G8B8A8) UNPREMULTIPLY_NEON(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8A8) default: break; } #endif switch (FORMAT_KEY(aSrcFormat, aDstFormat)) { UNPREMULTIPLY_FALLBACK(SurfaceFormat::B8G8R8A8) UNPREMULTIPLY_FALLBACK(SurfaceFormat::R8G8B8A8) UNPREMULTIPLY_FALLBACK(SurfaceFormat::A8R8G8B8) default: break; } #undef FORMAT_CASE_CALL MOZ_ASSERT(false, "Unsupported unpremultiply formats"); return false; } SwizzleRowFn UnpremultiplyRow(SurfaceFormat aSrcFormat, SurfaceFormat aDstFormat) { #ifdef USE_SSE2 if (mozilla::supports_sse2()) switch (FORMAT_KEY(aSrcFormat, aDstFormat)) { UNPREMULTIPLY_ROW_SSE2(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8A8) UNPREMULTIPLY_ROW_SSE2(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8A8) UNPREMULTIPLY_ROW_SSE2(SurfaceFormat::R8G8B8A8, SurfaceFormat::R8G8B8A8) UNPREMULTIPLY_ROW_SSE2(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8A8) default: break; } #endif #ifdef USE_NEON if (mozilla::supports_neon()) switch (FORMAT_KEY(aSrcFormat, aDstFormat)) { UNPREMULTIPLY_ROW_NEON(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8A8) UNPREMULTIPLY_ROW_NEON(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8A8) UNPREMULTIPLY_ROW_NEON(SurfaceFormat::R8G8B8A8, SurfaceFormat::R8G8B8A8) UNPREMULTIPLY_ROW_NEON(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8A8) default: break; } #endif switch (FORMAT_KEY(aSrcFormat, aDstFormat)) { UNPREMULTIPLY_ROW_FALLBACK(SurfaceFormat::B8G8R8A8) UNPREMULTIPLY_ROW_FALLBACK(SurfaceFormat::R8G8B8A8) UNPREMULTIPLY_ROW_FALLBACK(SurfaceFormat::A8R8G8B8) default: break; } MOZ_ASSERT_UNREACHABLE("Unsupported premultiply formats"); return nullptr; } /** * Swizzling */ // Fallback swizzle implementation that uses shifting and masking to reorder // pixels. template static void SwizzleChunkFallback(const uint8_t*& aSrc, uint8_t*& aDst, int32_t aLength) { const uint8_t* end = aSrc + 4 * aLength; do { uint32_t rgba = *reinterpret_cast(aSrc); if (aSwapRB) { // Handle R and B swaps by exchanging words and masking. uint32_t rb = ((rgba << 16) | (rgba >> 16)) & (0x00FF00FF << aSrcRGBShift); uint32_t ga = rgba & ((0xFF << aSrcAShift) | (0xFF00 << aSrcRGBShift)); rgba = rb | ga; } // If src and dst shifts differ, rotate left or right to move RGB into // place, i.e. ARGB -> RGBA or ARGB -> RGBA. if (aDstRGBShift > aSrcRGBShift) { rgba = (rgba << 8) | (aOpaqueAlpha ? 0x000000FF : rgba >> 24); } else if (aSrcRGBShift > aDstRGBShift) { rgba = (rgba >> 8) | (aOpaqueAlpha ? 0xFF000000 : rgba << 24); } else if (aOpaqueAlpha) { rgba |= 0xFF << aDstAShift; } *reinterpret_cast(aDst) = rgba; aSrc += 4; aDst += 4; } while (aSrc < end); } template static void SwizzleRowFallback(const uint8_t* aSrc, uint8_t* aDst, int32_t aLength) { SwizzleChunkFallback(aSrc, aDst, aLength); } template static void SwizzleFallback(const uint8_t* aSrc, int32_t aSrcGap, uint8_t* aDst, int32_t aDstGap, IntSize aSize) { for (int32_t height = aSize.height; height > 0; height--) { SwizzleChunkFallback(aSrc, aDst, aSize.width); aSrc += aSrcGap; aDst += aDstGap; } } #define SWIZZLE_FALLBACK(aSrcFormat, aDstFormat) \ FORMAT_CASE( \ aSrcFormat, aDstFormat, \ SwizzleFallback) #define SWIZZLE_ROW_FALLBACK(aSrcFormat, aDstFormat) \ FORMAT_CASE_ROW( \ aSrcFormat, aDstFormat, \ SwizzleRowFallback) // Fast-path for matching formats. template static void SwizzleRowCopy(const uint8_t* aSrc, uint8_t* aDst, int32_t aLength) { if (aSrc != aDst) { memcpy(aDst, aSrc, aLength * aBytesPerPixel); } } // Fast-path for matching formats. static void SwizzleCopy(const uint8_t* aSrc, int32_t aSrcGap, uint8_t* aDst, int32_t aDstGap, IntSize aSize, int32_t aBPP) { if (aSrc != aDst) { int32_t rowLength = aBPP * aSize.width; for (int32_t height = aSize.height; height > 0; height--) { memcpy(aDst, aSrc, rowLength); aSrc += rowLength + aSrcGap; aDst += rowLength + aDstGap; } } } // Fast-path for conversions that swap all bytes. template static void SwizzleChunkSwap(const uint8_t*& aSrc, uint8_t*& aDst, int32_t aLength) { const uint8_t* end = aSrc + 4 * aLength; do { // Use an endian swap to move the bytes, i.e. BGRA -> ARGB. uint32_t rgba = *reinterpret_cast(aSrc); #if MOZ_LITTLE_ENDIAN() rgba = NativeEndian::swapToBigEndian(rgba); #else rgba = NativeEndian::swapToLittleEndian(rgba); #endif if (aOpaqueAlpha) { rgba |= 0xFF << aDstAShift; } *reinterpret_cast(aDst) = rgba; aSrc += 4; aDst += 4; } while (aSrc < end); } template static void SwizzleRowSwap(const uint8_t* aSrc, uint8_t* aDst, int32_t aLength) { SwizzleChunkSwap(aSrc, aDst, aLength); } template static void SwizzleSwap(const uint8_t* aSrc, int32_t aSrcGap, uint8_t* aDst, int32_t aDstGap, IntSize aSize) { for (int32_t height = aSize.height; height > 0; height--) { SwizzleChunkSwap(aSrc, aDst, aSize.width); aSrc += aSrcGap; aDst += aDstGap; } } #define SWIZZLE_SWAP(aSrcFormat, aDstFormat) \ FORMAT_CASE( \ aSrcFormat, aDstFormat, \ SwizzleSwap) #define SWIZZLE_ROW_SWAP(aSrcFormat, aDstFormat) \ FORMAT_CASE_ROW( \ aSrcFormat, aDstFormat, \ SwizzleRowSwap) static void SwizzleChunkSwapRGB24(const uint8_t*& aSrc, uint8_t*& aDst, int32_t aLength) { const uint8_t* end = aSrc + 3 * aLength; do { uint8_t r = aSrc[0]; uint8_t g = aSrc[1]; uint8_t b = aSrc[2]; aDst[0] = b; aDst[1] = g; aDst[2] = r; aSrc += 3; aDst += 3; } while (aSrc < end); } static void SwizzleRowSwapRGB24(const uint8_t* aSrc, uint8_t* aDst, int32_t aLength) { SwizzleChunkSwapRGB24(aSrc, aDst, aLength); } static void SwizzleSwapRGB24(const uint8_t* aSrc, int32_t aSrcGap, uint8_t* aDst, int32_t aDstGap, IntSize aSize) { for (int32_t height = aSize.height; height > 0; height--) { SwizzleChunkSwapRGB24(aSrc, aDst, aSize.width); aSrc += aSrcGap; aDst += aDstGap; } } #define SWIZZLE_SWAP_RGB24(aSrcFormat, aDstFormat) \ FORMAT_CASE(aSrcFormat, aDstFormat, SwizzleSwapRGB24) #define SWIZZLE_ROW_SWAP_RGB24(aSrcFormat, aDstFormat) \ FORMAT_CASE_ROW(aSrcFormat, aDstFormat, SwizzleRowSwapRGB24) // Fast-path for conversions that force alpha to opaque. template static void SwizzleChunkOpaqueUpdate(uint8_t*& aBuffer, int32_t aLength) { const uint8_t* end = aBuffer + 4 * aLength; do { uint32_t rgba = *reinterpret_cast(aBuffer); // Just add on the alpha bits to the source. rgba |= 0xFF << aDstAShift; *reinterpret_cast(aBuffer) = rgba; aBuffer += 4; } while (aBuffer < end); } template static void SwizzleChunkOpaqueCopy(const uint8_t*& aSrc, uint8_t* aDst, int32_t aLength) { const uint8_t* end = aSrc + 4 * aLength; do { uint32_t rgba = *reinterpret_cast(aSrc); // Just add on the alpha bits to the source. rgba |= 0xFF << aDstAShift; *reinterpret_cast(aDst) = rgba; aSrc += 4; aDst += 4; } while (aSrc < end); } template static void SwizzleRowOpaque(const uint8_t* aSrc, uint8_t* aDst, int32_t aLength) { if (aSrc == aDst) { SwizzleChunkOpaqueUpdate(aDst, aLength); } else { SwizzleChunkOpaqueCopy(aSrc, aDst, aLength); } } template static void SwizzleOpaque(const uint8_t* aSrc, int32_t aSrcGap, uint8_t* aDst, int32_t aDstGap, IntSize aSize) { if (aSrc == aDst) { // Modifying in-place, so just write out the alpha. for (int32_t height = aSize.height; height > 0; height--) { SwizzleChunkOpaqueUpdate(aDst, aSize.width); aDst += aDstGap; } } else { for (int32_t height = aSize.height; height > 0; height--) { SwizzleChunkOpaqueCopy(aSrc, aDst, aSize.width); aSrc += aSrcGap; aDst += aDstGap; } } } #define SWIZZLE_OPAQUE(aSrcFormat, aDstFormat) \ FORMAT_CASE(aSrcFormat, aDstFormat, SwizzleOpaque) #define SWIZZLE_ROW_OPAQUE(aSrcFormat, aDstFormat) \ FORMAT_CASE_ROW(aSrcFormat, aDstFormat, \ SwizzleRowOpaque) // Packing of 32-bit formats to RGB565. template static void PackToRGB565(const uint8_t* aSrc, int32_t aSrcGap, uint8_t* aDst, int32_t aDstGap, IntSize aSize) { for (int32_t height = aSize.height; height > 0; height--) { const uint8_t* end = aSrc + 4 * aSize.width; do { uint32_t rgba = *reinterpret_cast(aSrc); // Isolate the R, G, and B components and shift to final endian-dependent // locations. uint16_t rgb565; if (aSwapRB) { rgb565 = ((rgba & (0xF8 << aSrcRGBShift)) << (8 - aSrcRGBShift)) | ((rgba & (0xFC00 << aSrcRGBShift)) >> (5 + aSrcRGBShift)) | ((rgba & (0xF80000 << aSrcRGBShift)) >> (19 + aSrcRGBShift)); } else { rgb565 = ((rgba & (0xF8 << aSrcRGBShift)) >> (3 + aSrcRGBShift)) | ((rgba & (0xFC00 << aSrcRGBShift)) >> (5 + aSrcRGBShift)) | ((rgba & (0xF80000 << aSrcRGBShift)) >> (8 + aSrcRGBShift)); } *reinterpret_cast(aDst) = rgb565; aSrc += 4; aDst += 2; } while (aSrc < end); aSrc += aSrcGap; aDst += aDstGap; } } // Packing of 32-bit formats to 24-bit formats. template static void PackChunkToRGB24(const uint8_t*& aSrc, uint8_t*& aDst, int32_t aLength) { const uint8_t* end = aSrc + 4 * aLength; do { uint8_t r = aSrc[aSrcRGBIndex + (aSwapRB ? 2 : 0)]; uint8_t g = aSrc[aSrcRGBIndex + 1]; uint8_t b = aSrc[aSrcRGBIndex + (aSwapRB ? 0 : 2)]; aDst[0] = r; aDst[1] = g; aDst[2] = b; aSrc += 4; aDst += 3; } while (aSrc < end); } template static void PackRowToRGB24(const uint8_t* aSrc, uint8_t* aDst, int32_t aLength) { PackChunkToRGB24(aSrc, aDst, aLength); } template static void PackToRGB24(const uint8_t* aSrc, int32_t aSrcGap, uint8_t* aDst, int32_t aDstGap, IntSize aSize) { for (int32_t height = aSize.height; height > 0; height--) { PackChunkToRGB24(aSrc, aDst, aSize.width); aSrc += aSrcGap; aDst += aDstGap; } } #define PACK_RGB_CASE(aSrcFormat, aDstFormat, aPackFunc) \ FORMAT_CASE(aSrcFormat, aDstFormat, \ aPackFunc) #define PACK_RGB(aDstFormat, aPackFunc) \ PACK_RGB_CASE(SurfaceFormat::B8G8R8A8, aDstFormat, aPackFunc) \ PACK_RGB_CASE(SurfaceFormat::B8G8R8X8, aDstFormat, aPackFunc) \ PACK_RGB_CASE(SurfaceFormat::R8G8B8A8, aDstFormat, aPackFunc) \ PACK_RGB_CASE(SurfaceFormat::R8G8B8X8, aDstFormat, aPackFunc) \ PACK_RGB_CASE(SurfaceFormat::A8R8G8B8, aDstFormat, aPackFunc) \ PACK_RGB_CASE(SurfaceFormat::X8R8G8B8, aDstFormat, aPackFunc) #define PACK_ROW_RGB_CASE(aSrcFormat, aDstFormat, aPackFunc) \ FORMAT_CASE_ROW( \ aSrcFormat, aDstFormat, \ aPackFunc) #define PACK_ROW_RGB(aDstFormat, aPackFunc) \ PACK_ROW_RGB_CASE(SurfaceFormat::B8G8R8A8, aDstFormat, aPackFunc) \ PACK_ROW_RGB_CASE(SurfaceFormat::B8G8R8X8, aDstFormat, aPackFunc) \ PACK_ROW_RGB_CASE(SurfaceFormat::R8G8B8A8, aDstFormat, aPackFunc) \ PACK_ROW_RGB_CASE(SurfaceFormat::R8G8B8X8, aDstFormat, aPackFunc) \ PACK_ROW_RGB_CASE(SurfaceFormat::A8R8G8B8, aDstFormat, aPackFunc) \ PACK_ROW_RGB_CASE(SurfaceFormat::X8R8G8B8, aDstFormat, aPackFunc) // Packing of 32-bit formats to A8. template static void PackToA8(const uint8_t* aSrc, int32_t aSrcGap, uint8_t* aDst, int32_t aDstGap, IntSize aSize) { for (int32_t height = aSize.height; height > 0; height--) { const uint8_t* end = aSrc + 4 * aSize.width; do { *aDst++ = aSrc[aSrcAIndex]; aSrc += 4; } while (aSrc < end); aSrc += aSrcGap; aDst += aDstGap; } } #define PACK_ALPHA_CASE(aSrcFormat, aDstFormat, aPackFunc) \ FORMAT_CASE(aSrcFormat, aDstFormat, aPackFunc) #define PACK_ALPHA(aDstFormat, aPackFunc) \ PACK_ALPHA_CASE(SurfaceFormat::B8G8R8A8, aDstFormat, aPackFunc) \ PACK_ALPHA_CASE(SurfaceFormat::R8G8B8A8, aDstFormat, aPackFunc) \ PACK_ALPHA_CASE(SurfaceFormat::A8R8G8B8, aDstFormat, aPackFunc) template void UnpackRowRGB24(const uint8_t* aSrc, uint8_t* aDst, int32_t aLength) { // Because we are expanding, we can only process the data back to front in // case we are performing this in place. const uint8_t* src = aSrc + 3 * (aLength - 1); uint32_t* dst = reinterpret_cast(aDst + 4 * aLength); while (src >= aSrc) { uint8_t r = src[aSwapRB ? 2 : 0]; uint8_t g = src[1]; uint8_t b = src[aSwapRB ? 0 : 2]; #if MOZ_LITTLE_ENDIAN() *--dst = 0xFF000000 | (b << 16) | (g << 8) | r; #else *--dst = 0x000000FF | (b << 8) | (g << 16) | (r << 24); #endif src -= 3; } } // Force instantiation of swizzle variants here. template void UnpackRowRGB24(const uint8_t*, uint8_t*, int32_t); template void UnpackRowRGB24(const uint8_t*, uint8_t*, int32_t); #define UNPACK_ROW_RGB(aDstFormat) \ FORMAT_CASE_ROW( \ SurfaceFormat::R8G8B8, aDstFormat, \ UnpackRowRGB24) static void UnpackRowRGB24_To_ARGB(const uint8_t* aSrc, uint8_t* aDst, int32_t aLength) { // Because we are expanding, we can only process the data back to front in // case we are performing this in place. const uint8_t* src = aSrc + 3 * (aLength - 1); uint32_t* dst = reinterpret_cast(aDst + 4 * aLength); while (src >= aSrc) { uint8_t r = src[0]; uint8_t g = src[1]; uint8_t b = src[2]; #if MOZ_LITTLE_ENDIAN() *--dst = 0x000000FF | (r << 8) | (g << 16) | (b << 24); #else *--dst = 0xFF000000 | (r << 24) | (g << 16) | b; #endif src -= 3; } } #define UNPACK_ROW_RGB_TO_ARGB(aDstFormat) \ FORMAT_CASE_ROW(SurfaceFormat::R8G8B8, aDstFormat, UnpackRowRGB24_To_ARGB) bool SwizzleData(const uint8_t* aSrc, int32_t aSrcStride, SurfaceFormat aSrcFormat, uint8_t* aDst, int32_t aDstStride, SurfaceFormat aDstFormat, const IntSize& aSize) { if (aSize.IsEmpty()) { return true; } IntSize size = CollapseSize(aSize, aSrcStride, aDstStride); // Find gap from end of row to the start of the next row. int32_t srcGap = GetStrideGap(aSize.width, aSrcFormat, aSrcStride); int32_t dstGap = GetStrideGap(aSize.width, aDstFormat, aDstStride); MOZ_ASSERT(srcGap >= 0 && dstGap >= 0); if (srcGap < 0 || dstGap < 0) { return false; } #define FORMAT_CASE_CALL(...) __VA_ARGS__(aSrc, srcGap, aDst, dstGap, size) #ifdef USE_SSE2 if (mozilla::supports_sse2()) switch (FORMAT_KEY(aSrcFormat, aDstFormat)) { SWIZZLE_SSE2(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8A8) SWIZZLE_SSE2(SurfaceFormat::B8G8R8X8, SurfaceFormat::R8G8B8X8) SWIZZLE_SSE2(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8X8) SWIZZLE_SSE2(SurfaceFormat::B8G8R8X8, SurfaceFormat::R8G8B8A8) SWIZZLE_SSE2(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8A8) SWIZZLE_SSE2(SurfaceFormat::R8G8B8X8, SurfaceFormat::B8G8R8X8) SWIZZLE_SSE2(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8X8) SWIZZLE_SSE2(SurfaceFormat::R8G8B8X8, SurfaceFormat::B8G8R8A8) default: break; } #endif #ifdef USE_NEON if (mozilla::supports_neon()) switch (FORMAT_KEY(aSrcFormat, aDstFormat)) { SWIZZLE_NEON(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8A8) SWIZZLE_NEON(SurfaceFormat::B8G8R8X8, SurfaceFormat::R8G8B8X8) SWIZZLE_NEON(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8X8) SWIZZLE_NEON(SurfaceFormat::B8G8R8X8, SurfaceFormat::R8G8B8A8) SWIZZLE_NEON(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8A8) SWIZZLE_NEON(SurfaceFormat::R8G8B8X8, SurfaceFormat::B8G8R8X8) SWIZZLE_NEON(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8X8) SWIZZLE_NEON(SurfaceFormat::R8G8B8X8, SurfaceFormat::B8G8R8A8) default: break; } #endif switch (FORMAT_KEY(aSrcFormat, aDstFormat)) { SWIZZLE_FALLBACK(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8A8) SWIZZLE_FALLBACK(SurfaceFormat::B8G8R8X8, SurfaceFormat::R8G8B8X8) SWIZZLE_FALLBACK(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8X8) SWIZZLE_FALLBACK(SurfaceFormat::B8G8R8X8, SurfaceFormat::R8G8B8A8) SWIZZLE_FALLBACK(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8A8) SWIZZLE_FALLBACK(SurfaceFormat::R8G8B8X8, SurfaceFormat::B8G8R8X8) SWIZZLE_FALLBACK(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8X8) SWIZZLE_FALLBACK(SurfaceFormat::R8G8B8X8, SurfaceFormat::B8G8R8A8) SWIZZLE_FALLBACK(SurfaceFormat::R8G8B8A8, SurfaceFormat::A8R8G8B8) SWIZZLE_FALLBACK(SurfaceFormat::R8G8B8X8, SurfaceFormat::X8R8G8B8) SWIZZLE_FALLBACK(SurfaceFormat::A8R8G8B8, SurfaceFormat::R8G8B8A8) SWIZZLE_FALLBACK(SurfaceFormat::X8R8G8B8, SurfaceFormat::R8G8B8X8) SWIZZLE_FALLBACK(SurfaceFormat::A8R8G8B8, SurfaceFormat::R8G8B8X8) SWIZZLE_FALLBACK(SurfaceFormat::X8R8G8B8, SurfaceFormat::R8G8B8A8) SWIZZLE_SWAP(SurfaceFormat::B8G8R8A8, SurfaceFormat::A8R8G8B8) SWIZZLE_SWAP(SurfaceFormat::B8G8R8A8, SurfaceFormat::X8R8G8B8) SWIZZLE_SWAP(SurfaceFormat::B8G8R8X8, SurfaceFormat::X8R8G8B8) SWIZZLE_SWAP(SurfaceFormat::B8G8R8X8, SurfaceFormat::A8R8G8B8) SWIZZLE_SWAP(SurfaceFormat::A8R8G8B8, SurfaceFormat::B8G8R8A8) SWIZZLE_SWAP(SurfaceFormat::A8R8G8B8, SurfaceFormat::B8G8R8X8) SWIZZLE_SWAP(SurfaceFormat::X8R8G8B8, SurfaceFormat::B8G8R8X8) SWIZZLE_SWAP(SurfaceFormat::X8R8G8B8, SurfaceFormat::B8G8R8A8) SWIZZLE_SWAP_RGB24(SurfaceFormat::R8G8B8, SurfaceFormat::B8G8R8) SWIZZLE_SWAP_RGB24(SurfaceFormat::B8G8R8, SurfaceFormat::R8G8B8) SWIZZLE_OPAQUE(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8X8) SWIZZLE_OPAQUE(SurfaceFormat::B8G8R8X8, SurfaceFormat::B8G8R8A8) SWIZZLE_OPAQUE(SurfaceFormat::R8G8B8A8, SurfaceFormat::R8G8B8X8) SWIZZLE_OPAQUE(SurfaceFormat::R8G8B8X8, SurfaceFormat::R8G8B8A8) SWIZZLE_OPAQUE(SurfaceFormat::A8R8G8B8, SurfaceFormat::X8R8G8B8) SWIZZLE_OPAQUE(SurfaceFormat::X8R8G8B8, SurfaceFormat::A8R8G8B8) PACK_RGB(SurfaceFormat::R5G6B5_UINT16, PackToRGB565) PACK_RGB(SurfaceFormat::B8G8R8, PackToRGB24) PACK_RGB(SurfaceFormat::R8G8B8, PackToRGB24) PACK_ALPHA(SurfaceFormat::A8, PackToA8) default: break; } if (aSrcFormat == aDstFormat) { // If the formats match, just do a generic copy. SwizzleCopy(aSrc, srcGap, aDst, dstGap, size, BytesPerPixel(aSrcFormat)); return true; } #undef FORMAT_CASE_CALL MOZ_ASSERT(false, "Unsupported swizzle formats"); return false; } static bool SwizzleYFlipDataInternal(const uint8_t* aSrc, int32_t aSrcStride, SurfaceFormat aSrcFormat, uint8_t* aDst, int32_t aDstStride, SurfaceFormat aDstFormat, const IntSize& aSize, SwizzleRowFn aSwizzleFn) { if (!aSwizzleFn) { return false; } // Guarantee our width and height are both greater than zero. if (aSize.IsEmpty()) { return true; } // Unlike SwizzleData/PremultiplyData, we don't use the stride gaps directly, // but we can use it to verify that the stride is valid for our width and // format. int32_t srcGap = GetStrideGap(aSize.width, aSrcFormat, aSrcStride); int32_t dstGap = GetStrideGap(aSize.width, aDstFormat, aDstStride); MOZ_ASSERT(srcGap >= 0 && dstGap >= 0); if (srcGap < 0 || dstGap < 0) { return false; } // Swapping/swizzling to a new buffer is trivial. if (aSrc != aDst) { const uint8_t* src = aSrc; const uint8_t* srcEnd = aSrc + aSize.height * aSrcStride; uint8_t* dst = aDst + (aSize.height - 1) * aDstStride; while (src < srcEnd) { aSwizzleFn(src, dst, aSize.width); src += aSrcStride; dst -= aDstStride; } return true; } if (aSrcStride != aDstStride) { return false; } // If we are swizzling in place, then we need a temporary row buffer. UniquePtr rowBuffer(new (std::nothrow) uint8_t[aDstStride]); if (!rowBuffer) { return false; } // Swizzle and swap the top and bottom rows until we meet in the middle. int32_t middleRow = aSize.height / 2; uint8_t* top = aDst; uint8_t* bottom = aDst + (aSize.height - 1) * aDstStride; for (int32_t row = 0; row < middleRow; ++row) { memcpy(rowBuffer.get(), bottom, aDstStride); aSwizzleFn(top, bottom, aSize.width); aSwizzleFn(rowBuffer.get(), top, aSize.width); top += aDstStride; bottom -= aDstStride; } // If there is an odd numbered row, we haven't swizzled it yet. if (aSize.height % 2 == 1) { top = aDst + middleRow * aDstStride; aSwizzleFn(top, top, aSize.width); } return true; } bool SwizzleYFlipData(const uint8_t* aSrc, int32_t aSrcStride, SurfaceFormat aSrcFormat, uint8_t* aDst, int32_t aDstStride, SurfaceFormat aDstFormat, const IntSize& aSize) { return SwizzleYFlipDataInternal(aSrc, aSrcStride, aSrcFormat, aDst, aDstStride, aDstFormat, aSize, SwizzleRow(aSrcFormat, aDstFormat)); } bool PremultiplyYFlipData(const uint8_t* aSrc, int32_t aSrcStride, SurfaceFormat aSrcFormat, uint8_t* aDst, int32_t aDstStride, SurfaceFormat aDstFormat, const IntSize& aSize) { return SwizzleYFlipDataInternal(aSrc, aSrcStride, aSrcFormat, aDst, aDstStride, aDstFormat, aSize, PremultiplyRow(aSrcFormat, aDstFormat)); } SwizzleRowFn SwizzleRow(SurfaceFormat aSrcFormat, SurfaceFormat aDstFormat) { #ifdef USE_SSE2 if (mozilla::supports_avx2()) switch (FORMAT_KEY(aSrcFormat, aDstFormat)) { UNPACK_ROW_RGB_AVX2(SurfaceFormat::R8G8B8X8) UNPACK_ROW_RGB_AVX2(SurfaceFormat::R8G8B8A8) UNPACK_ROW_RGB_AVX2(SurfaceFormat::B8G8R8X8) UNPACK_ROW_RGB_AVX2(SurfaceFormat::B8G8R8A8) default: break; } if (mozilla::supports_ssse3()) switch (FORMAT_KEY(aSrcFormat, aDstFormat)) { UNPACK_ROW_RGB_SSSE3(SurfaceFormat::R8G8B8X8) UNPACK_ROW_RGB_SSSE3(SurfaceFormat::R8G8B8A8) UNPACK_ROW_RGB_SSSE3(SurfaceFormat::B8G8R8X8) UNPACK_ROW_RGB_SSSE3(SurfaceFormat::B8G8R8A8) default: break; } if (mozilla::supports_sse2()) switch (FORMAT_KEY(aSrcFormat, aDstFormat)) { SWIZZLE_ROW_SSE2(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8A8) SWIZZLE_ROW_SSE2(SurfaceFormat::B8G8R8X8, SurfaceFormat::R8G8B8X8) SWIZZLE_ROW_SSE2(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8X8) SWIZZLE_ROW_SSE2(SurfaceFormat::B8G8R8X8, SurfaceFormat::R8G8B8A8) SWIZZLE_ROW_SSE2(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8A8) SWIZZLE_ROW_SSE2(SurfaceFormat::R8G8B8X8, SurfaceFormat::B8G8R8X8) SWIZZLE_ROW_SSE2(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8X8) SWIZZLE_ROW_SSE2(SurfaceFormat::R8G8B8X8, SurfaceFormat::B8G8R8A8) default: break; } #endif #ifdef USE_NEON if (mozilla::supports_neon()) switch (FORMAT_KEY(aSrcFormat, aDstFormat)) { UNPACK_ROW_RGB_NEON(SurfaceFormat::R8G8B8X8) UNPACK_ROW_RGB_NEON(SurfaceFormat::R8G8B8A8) UNPACK_ROW_RGB_NEON(SurfaceFormat::B8G8R8X8) UNPACK_ROW_RGB_NEON(SurfaceFormat::B8G8R8A8) SWIZZLE_ROW_NEON(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8A8) SWIZZLE_ROW_NEON(SurfaceFormat::B8G8R8X8, SurfaceFormat::R8G8B8X8) SWIZZLE_ROW_NEON(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8X8) SWIZZLE_ROW_NEON(SurfaceFormat::B8G8R8X8, SurfaceFormat::R8G8B8A8) SWIZZLE_ROW_NEON(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8A8) SWIZZLE_ROW_NEON(SurfaceFormat::R8G8B8X8, SurfaceFormat::B8G8R8X8) SWIZZLE_ROW_NEON(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8X8) SWIZZLE_ROW_NEON(SurfaceFormat::R8G8B8X8, SurfaceFormat::B8G8R8A8) default: break; } #endif switch (FORMAT_KEY(aSrcFormat, aDstFormat)) { SWIZZLE_ROW_FALLBACK(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8A8) SWIZZLE_ROW_FALLBACK(SurfaceFormat::B8G8R8X8, SurfaceFormat::R8G8B8X8) SWIZZLE_ROW_FALLBACK(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8X8) SWIZZLE_ROW_FALLBACK(SurfaceFormat::B8G8R8X8, SurfaceFormat::R8G8B8A8) SWIZZLE_ROW_FALLBACK(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8A8) SWIZZLE_ROW_FALLBACK(SurfaceFormat::R8G8B8X8, SurfaceFormat::B8G8R8X8) SWIZZLE_ROW_FALLBACK(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8X8) SWIZZLE_ROW_FALLBACK(SurfaceFormat::R8G8B8X8, SurfaceFormat::B8G8R8A8) SWIZZLE_ROW_FALLBACK(SurfaceFormat::R8G8B8A8, SurfaceFormat::A8R8G8B8) SWIZZLE_ROW_FALLBACK(SurfaceFormat::R8G8B8X8, SurfaceFormat::X8R8G8B8) SWIZZLE_ROW_FALLBACK(SurfaceFormat::A8R8G8B8, SurfaceFormat::R8G8B8A8) SWIZZLE_ROW_FALLBACK(SurfaceFormat::X8R8G8B8, SurfaceFormat::R8G8B8X8) SWIZZLE_ROW_FALLBACK(SurfaceFormat::A8R8G8B8, SurfaceFormat::R8G8B8X8) SWIZZLE_ROW_FALLBACK(SurfaceFormat::X8R8G8B8, SurfaceFormat::R8G8B8A8) SWIZZLE_ROW_OPAQUE(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8X8) SWIZZLE_ROW_OPAQUE(SurfaceFormat::B8G8R8X8, SurfaceFormat::B8G8R8A8) SWIZZLE_ROW_OPAQUE(SurfaceFormat::R8G8B8A8, SurfaceFormat::R8G8B8X8) SWIZZLE_ROW_OPAQUE(SurfaceFormat::R8G8B8X8, SurfaceFormat::R8G8B8A8) SWIZZLE_ROW_OPAQUE(SurfaceFormat::A8R8G8B8, SurfaceFormat::X8R8G8B8) SWIZZLE_ROW_OPAQUE(SurfaceFormat::X8R8G8B8, SurfaceFormat::A8R8G8B8) SWIZZLE_ROW_SWAP(SurfaceFormat::B8G8R8A8, SurfaceFormat::A8R8G8B8) SWIZZLE_ROW_SWAP(SurfaceFormat::B8G8R8A8, SurfaceFormat::X8R8G8B8) SWIZZLE_ROW_SWAP(SurfaceFormat::B8G8R8X8, SurfaceFormat::X8R8G8B8) SWIZZLE_ROW_SWAP(SurfaceFormat::B8G8R8X8, SurfaceFormat::A8R8G8B8) SWIZZLE_ROW_SWAP(SurfaceFormat::A8R8G8B8, SurfaceFormat::B8G8R8A8) SWIZZLE_ROW_SWAP(SurfaceFormat::A8R8G8B8, SurfaceFormat::B8G8R8X8) SWIZZLE_ROW_SWAP(SurfaceFormat::X8R8G8B8, SurfaceFormat::B8G8R8X8) SWIZZLE_ROW_SWAP(SurfaceFormat::X8R8G8B8, SurfaceFormat::B8G8R8A8) SWIZZLE_ROW_SWAP_RGB24(SurfaceFormat::R8G8B8, SurfaceFormat::B8G8R8) SWIZZLE_ROW_SWAP_RGB24(SurfaceFormat::B8G8R8, SurfaceFormat::R8G8B8) UNPACK_ROW_RGB(SurfaceFormat::R8G8B8X8) UNPACK_ROW_RGB(SurfaceFormat::R8G8B8A8) UNPACK_ROW_RGB(SurfaceFormat::B8G8R8X8) UNPACK_ROW_RGB(SurfaceFormat::B8G8R8A8) UNPACK_ROW_RGB_TO_ARGB(SurfaceFormat::A8R8G8B8) UNPACK_ROW_RGB_TO_ARGB(SurfaceFormat::X8R8G8B8) PACK_ROW_RGB(SurfaceFormat::R8G8B8, PackRowToRGB24) PACK_ROW_RGB(SurfaceFormat::B8G8R8, PackRowToRGB24) default: break; } if (aSrcFormat == aDstFormat) { switch (BytesPerPixel(aSrcFormat)) { case 4: return &SwizzleRowCopy<4>; case 3: return &SwizzleRowCopy<3>; default: break; } } MOZ_ASSERT_UNREACHABLE("Unsupported swizzle formats"); return nullptr; } static IntRect ReorientRowRotate0FlipFallback(const uint8_t* aSrc, int32_t aSrcRow, uint8_t* aDst, const IntSize& aDstSize, int32_t aDstStride) { // Reverse order of pixels in the row. const uint32_t* src = reinterpret_cast(aSrc); const uint32_t* end = src + aDstSize.width; uint32_t* dst = reinterpret_cast(aDst + aSrcRow * aDstStride) + aDstSize.width - 1; do { *dst-- = *src++; } while (src < end); return IntRect(0, aSrcRow, aDstSize.width, 1); } static IntRect ReorientRowRotate90FlipFallback(const uint8_t* aSrc, int32_t aSrcRow, uint8_t* aDst, const IntSize& aDstSize, int32_t aDstStride) { // Copy row of pixels from top to bottom, into left to right columns. const uint32_t* src = reinterpret_cast(aSrc); const uint32_t* end = src + aDstSize.height; uint32_t* dst = reinterpret_cast(aDst) + aSrcRow; int32_t stride = aDstStride / sizeof(uint32_t); do { *dst = *src++; dst += stride; } while (src < end); return IntRect(aSrcRow, 0, 1, aDstSize.height); } static IntRect ReorientRowRotate180FlipFallback(const uint8_t* aSrc, int32_t aSrcRow, uint8_t* aDst, const IntSize& aDstSize, int32_t aDstStride) { // Copy row of pixels from top to bottom, into bottom to top rows. uint8_t* dst = aDst + (aDstSize.height - aSrcRow - 1) * aDstStride; memcpy(dst, aSrc, aDstSize.width * sizeof(uint32_t)); return IntRect(0, aDstSize.height - aSrcRow - 1, aDstSize.width, 1); } static IntRect ReorientRowRotate270FlipFallback(const uint8_t* aSrc, int32_t aSrcRow, uint8_t* aDst, const IntSize& aDstSize, int32_t aDstStride) { // Copy row of pixels in reverse order from top to bottom, into right to left // columns. const uint32_t* src = reinterpret_cast(aSrc); const uint32_t* end = src + aDstSize.height; uint32_t* dst = reinterpret_cast(aDst + (aDstSize.height - 1) * aDstStride) + aDstSize.width - aSrcRow - 1; int32_t stride = aDstStride / sizeof(uint32_t); do { *dst = *src++; dst -= stride; } while (src < end); return IntRect(aDstSize.width - aSrcRow - 1, 0, 1, aDstSize.height); } static IntRect ReorientRowRotate0Fallback(const uint8_t* aSrc, int32_t aSrcRow, uint8_t* aDst, const IntSize& aDstSize, int32_t aDstStride) { // Copy row of pixels into the destination. uint8_t* dst = aDst + aSrcRow * aDstStride; memcpy(dst, aSrc, aDstSize.width * sizeof(uint32_t)); return IntRect(0, aSrcRow, aDstSize.width, 1); } static IntRect ReorientRowRotate90Fallback(const uint8_t* aSrc, int32_t aSrcRow, uint8_t* aDst, const IntSize& aDstSize, int32_t aDstStride) { // Copy row of pixels from top to bottom, into right to left columns. const uint32_t* src = reinterpret_cast(aSrc); const uint32_t* end = src + aDstSize.height; uint32_t* dst = reinterpret_cast(aDst) + aDstSize.width - aSrcRow - 1; int32_t stride = aDstStride / sizeof(uint32_t); do { *dst = *src++; dst += stride; } while (src < end); return IntRect(aDstSize.width - aSrcRow - 1, 0, 1, aDstSize.height); } static IntRect ReorientRowRotate180Fallback(const uint8_t* aSrc, int32_t aSrcRow, uint8_t* aDst, const IntSize& aDstSize, int32_t aDstStride) { // Copy row of pixels in reverse order from top to bottom, into bottom to top // rows. const uint32_t* src = reinterpret_cast(aSrc); const uint32_t* end = src + aDstSize.width; uint32_t* dst = reinterpret_cast( aDst + (aDstSize.height - aSrcRow - 1) * aDstStride) + aDstSize.width - 1; do { *dst-- = *src++; } while (src < end); return IntRect(0, aDstSize.height - aSrcRow - 1, aDstSize.width, 1); } static IntRect ReorientRowRotate270Fallback(const uint8_t* aSrc, int32_t aSrcRow, uint8_t* aDst, const IntSize& aDstSize, int32_t aDstStride) { // Copy row of pixels in reverse order from top to bottom, into left to right // column. const uint32_t* src = reinterpret_cast(aSrc); const uint32_t* end = src + aDstSize.height; uint32_t* dst = reinterpret_cast(aDst + (aDstSize.height - 1) * aDstStride) + aSrcRow; int32_t stride = aDstStride / sizeof(uint32_t); do { *dst = *src++; dst -= stride; } while (src < end); return IntRect(aSrcRow, 0, 1, aDstSize.height); } ReorientRowFn ReorientRow(const struct image::Orientation& aOrientation) { switch (aOrientation.flip) { case image::Flip::Unflipped: switch (aOrientation.rotation) { case image::Angle::D0: return &ReorientRowRotate0Fallback; case image::Angle::D90: return &ReorientRowRotate90Fallback; case image::Angle::D180: return &ReorientRowRotate180Fallback; case image::Angle::D270: return &ReorientRowRotate270Fallback; default: break; } break; case image::Flip::Horizontal: switch (aOrientation.rotation) { case image::Angle::D0: return &ReorientRowRotate0FlipFallback; case image::Angle::D90: if (aOrientation.flipFirst) { return &ReorientRowRotate270FlipFallback; } else { return &ReorientRowRotate90FlipFallback; } case image::Angle::D180: return &ReorientRowRotate180FlipFallback; case image::Angle::D270: if (aOrientation.flipFirst) { return &ReorientRowRotate90FlipFallback; } else { return &ReorientRowRotate270FlipFallback; } default: break; } break; default: break; } MOZ_ASSERT_UNREACHABLE("Unhandled orientation!"); return nullptr; } } // namespace gfx } // namespace mozilla