summaryrefslogtreecommitdiffstats
path: root/media/libvpx/libvpx/third_party
diff options
context:
space:
mode:
Diffstat (limited to 'media/libvpx/libvpx/third_party')
-rw-r--r--media/libvpx/libvpx/third_party/googletest/README.libvpx29
-rw-r--r--media/libvpx/libvpx/third_party/googletest/gtest.mk1
-rw-r--r--media/libvpx/libvpx/third_party/googletest/src/.clang-format4
-rw-r--r--media/libvpx/libvpx/third_party/googletest/src/CONTRIBUTORS65
-rw-r--r--media/libvpx/libvpx/third_party/googletest/src/LICENSE28
-rw-r--r--media/libvpx/libvpx/third_party/googletest/src/README.md217
-rw-r--r--media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest-assertion-result.h237
-rw-r--r--media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest-death-test.h345
-rw-r--r--media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest-matchers.h956
-rw-r--r--media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest-message.h218
-rw-r--r--media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest-param-test.h510
-rw-r--r--media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest-printers.h1048
-rw-r--r--media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest-spi.h248
-rw-r--r--media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest-test-part.h190
-rw-r--r--media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest-typed-test.h331
-rw-r--r--media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest.h2297
-rw-r--r--media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest_pred_impl.h279
-rw-r--r--media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest_prod.h60
-rw-r--r--media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/custom/README.md44
-rw-r--r--media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/custom/gtest-port.h68
-rw-r--r--media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/custom/gtest-printers.h42
-rw-r--r--media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/custom/gtest.h37
-rw-r--r--media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-death-test-internal.h306
-rw-r--r--media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-filepath.h210
-rw-r--r--media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-internal.h1570
-rw-r--r--media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-param-util.h956
-rw-r--r--media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-port-arch.h116
-rw-r--r--media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-port.h2413
-rw-r--r--media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-string.h177
-rw-r--r--media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-type-util.h186
-rw-r--r--media/libvpx/libvpx/third_party/googletest/src/src/gtest-all.cc49
-rw-r--r--media/libvpx/libvpx/third_party/googletest/src/src/gtest-assertion-result.cc77
-rw-r--r--media/libvpx/libvpx/third_party/googletest/src/src/gtest-death-test.cc1620
-rw-r--r--media/libvpx/libvpx/third_party/googletest/src/src/gtest-filepath.cc367
-rw-r--r--media/libvpx/libvpx/third_party/googletest/src/src/gtest-internal-inl.h1212
-rw-r--r--media/libvpx/libvpx/third_party/googletest/src/src/gtest-matchers.cc98
-rw-r--r--media/libvpx/libvpx/third_party/googletest/src/src/gtest-port.cc1394
-rw-r--r--media/libvpx/libvpx/third_party/googletest/src/src/gtest-printers.cc553
-rw-r--r--media/libvpx/libvpx/third_party/googletest/src/src/gtest-test-part.cc105
-rw-r--r--media/libvpx/libvpx/third_party/googletest/src/src/gtest-typed-test.cc104
-rw-r--r--media/libvpx/libvpx/third_party/googletest/src/src/gtest.cc6795
-rw-r--r--media/libvpx/libvpx/third_party/googletest/src/src/gtest_main.cc53
-rw-r--r--media/libvpx/libvpx/third_party/libyuv/LICENSE29
-rw-r--r--media/libvpx/libvpx/third_party/libyuv/README.libvpx23
-rw-r--r--media/libvpx/libvpx/third_party/libyuv/include/libyuv/basic_types.h65
-rw-r--r--media/libvpx/libvpx/third_party/libyuv/include/libyuv/compare.h111
-rw-r--r--media/libvpx/libvpx/third_party/libyuv/include/libyuv/convert.h406
-rw-r--r--media/libvpx/libvpx/third_party/libyuv/include/libyuv/convert_argb.h687
-rw-r--r--media/libvpx/libvpx/third_party/libyuv/include/libyuv/convert_from.h342
-rw-r--r--media/libvpx/libvpx/third_party/libyuv/include/libyuv/convert_from_argb.h287
-rw-r--r--media/libvpx/libvpx/third_party/libyuv/include/libyuv/cpu_id.h119
-rw-r--r--media/libvpx/libvpx/third_party/libyuv/include/libyuv/macros_msa.h233
-rw-r--r--media/libvpx/libvpx/third_party/libyuv/include/libyuv/mjpeg_decoder.h195
-rw-r--r--media/libvpx/libvpx/third_party/libyuv/include/libyuv/planar_functions.h847
-rw-r--r--media/libvpx/libvpx/third_party/libyuv/include/libyuv/rotate.h164
-rw-r--r--media/libvpx/libvpx/third_party/libyuv/include/libyuv/rotate_argb.h37
-rw-r--r--media/libvpx/libvpx/third_party/libyuv/include/libyuv/rotate_row.h194
-rw-r--r--media/libvpx/libvpx/third_party/libyuv/include/libyuv/row.h3471
-rw-r--r--media/libvpx/libvpx/third_party/libyuv/include/libyuv/scale.h131
-rw-r--r--media/libvpx/libvpx/third_party/libyuv/include/libyuv/scale_argb.h76
-rw-r--r--media/libvpx/libvpx/third_party/libyuv/include/libyuv/scale_row.h944
-rw-r--r--media/libvpx/libvpx/third_party/libyuv/include/libyuv/version.h16
-rw-r--r--media/libvpx/libvpx/third_party/libyuv/include/libyuv/video_common.h188
-rw-r--r--media/libvpx/libvpx/third_party/libyuv/source/compare.cc429
-rw-r--r--media/libvpx/libvpx/third_party/libyuv/source/compare_common.cc104
-rw-r--r--media/libvpx/libvpx/third_party/libyuv/source/compare_gcc.cc360
-rw-r--r--media/libvpx/libvpx/third_party/libyuv/source/compare_msa.cc97
-rw-r--r--media/libvpx/libvpx/third_party/libyuv/source/compare_neon.cc96
-rw-r--r--media/libvpx/libvpx/third_party/libyuv/source/compare_neon64.cc90
-rw-r--r--media/libvpx/libvpx/third_party/libyuv/source/compare_win.cc241
-rw-r--r--media/libvpx/libvpx/third_party/libyuv/source/convert.cc1740
-rw-r--r--media/libvpx/libvpx/third_party/libyuv/source/convert_argb.cc2231
-rw-r--r--media/libvpx/libvpx/third_party/libyuv/source/convert_from.cc1429
-rw-r--r--media/libvpx/libvpx/third_party/libyuv/source/convert_from_argb.cc1617
-rw-r--r--media/libvpx/libvpx/third_party/libyuv/source/convert_jpeg.cc332
-rw-r--r--media/libvpx/libvpx/third_party/libyuv/source/convert_to_argb.cc291
-rw-r--r--media/libvpx/libvpx/third_party/libyuv/source/convert_to_i420.cc277
-rw-r--r--media/libvpx/libvpx/third_party/libyuv/source/cpu_id.cc276
-rw-r--r--media/libvpx/libvpx/third_party/libyuv/source/mjpeg_decoder.cc573
-rw-r--r--media/libvpx/libvpx/third_party/libyuv/source/mjpeg_validate.cc70
-rw-r--r--media/libvpx/libvpx/third_party/libyuv/source/planar_functions.cc3587
-rw-r--r--media/libvpx/libvpx/third_party/libyuv/source/rotate.cc514
-rw-r--r--media/libvpx/libvpx/third_party/libyuv/source/rotate_any.cc73
-rw-r--r--media/libvpx/libvpx/third_party/libyuv/source/rotate_argb.cc224
-rw-r--r--media/libvpx/libvpx/third_party/libyuv/source/rotate_common.cc106
-rw-r--r--media/libvpx/libvpx/third_party/libyuv/source/rotate_gcc.cc374
-rw-r--r--media/libvpx/libvpx/third_party/libyuv/source/rotate_msa.cc250
-rw-r--r--media/libvpx/libvpx/third_party/libyuv/source/rotate_neon.cc416
-rw-r--r--media/libvpx/libvpx/third_party/libyuv/source/rotate_neon64.cc426
-rw-r--r--media/libvpx/libvpx/third_party/libyuv/source/rotate_win.cc252
-rw-r--r--media/libvpx/libvpx/third_party/libyuv/source/row_any.cc1211
-rw-r--r--media/libvpx/libvpx/third_party/libyuv/source/row_common.cc3237
-rw-r--r--media/libvpx/libvpx/third_party/libyuv/source/row_gcc.cc6677
-rw-r--r--media/libvpx/libvpx/third_party/libyuv/source/row_msa.cc3512
-rw-r--r--media/libvpx/libvpx/third_party/libyuv/source/row_neon.cc2693
-rw-r--r--media/libvpx/libvpx/third_party/libyuv/source/row_neon64.cc2884
-rw-r--r--media/libvpx/libvpx/third_party/libyuv/source/row_win.cc6234
-rw-r--r--media/libvpx/libvpx/third_party/libyuv/source/scale.cc1741
-rw-r--r--media/libvpx/libvpx/third_party/libyuv/source/scale_any.cc464
-rw-r--r--media/libvpx/libvpx/third_party/libyuv/source/scale_argb.cc1010
-rw-r--r--media/libvpx/libvpx/third_party/libyuv/source/scale_common.cc1323
-rw-r--r--media/libvpx/libvpx/third_party/libyuv/source/scale_gcc.cc1374
-rw-r--r--media/libvpx/libvpx/third_party/libyuv/source/scale_msa.cc949
-rw-r--r--media/libvpx/libvpx/third_party/libyuv/source/scale_neon.cc970
-rw-r--r--media/libvpx/libvpx/third_party/libyuv/source/scale_neon64.cc1064
-rw-r--r--media/libvpx/libvpx/third_party/libyuv/source/scale_win.cc1391
-rw-r--r--media/libvpx/libvpx/third_party/libyuv/source/video_common.cc62
-rw-r--r--media/libvpx/libvpx/third_party/x86inc/LICENSE18
-rw-r--r--media/libvpx/libvpx/third_party/x86inc/README.libvpx19
-rw-r--r--media/libvpx/libvpx/third_party/x86inc/x86inc.asm1923
110 files changed, 89411 insertions, 0 deletions
diff --git a/media/libvpx/libvpx/third_party/googletest/README.libvpx b/media/libvpx/libvpx/third_party/googletest/README.libvpx
new file mode 100644
index 0000000000..5f6b01b0ec
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/googletest/README.libvpx
@@ -0,0 +1,29 @@
+URL: https://github.com/google/googletest.git
+Version: release-1.12.1
+License: BSD
+License File: LICENSE
+
+Description:
+Google's framework for writing C++ tests on a variety of platforms
+(Linux, Mac OS X, Windows, Windows CE, Symbian, etc). Based on the
+xUnit architecture. Supports automatic test discovery, a rich set of
+assertions, user-defined assertions, death tests, fatal and non-fatal
+failures, various options for running the tests, and XML test report
+generation.
+
+Local Modifications:
+- Remove everything but:
+ .clang-format
+ CONTRIBUTORS
+ googletest/
+ include
+ README.md
+ src
+ LICENSE
+- Move .clang-format, CONTRIBUTORS, and LICENSE into googletest/
+- In googletest/include/gtest/internal/custom/gtest-port.h, define
+ GTEST_HAS_NOTIFICATION_ as 1 and use a stub Notification class to fix
+ the mingw32 g++ compilation errors caused by the lack of std::mutex
+ and std::condition_variable in the <mutex> and <condition_variable>
+ headers if mingw32 is configured with the win32 threads option. See
+ https://stackoverflow.com/questions/17242516/mingw-w64-threads-posix-vs-win32
diff --git a/media/libvpx/libvpx/third_party/googletest/gtest.mk b/media/libvpx/libvpx/third_party/googletest/gtest.mk
new file mode 100644
index 0000000000..0de3113c7a
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/googletest/gtest.mk
@@ -0,0 +1 @@
+GTEST_SRCS-yes += src/gtest-all.cc
diff --git a/media/libvpx/libvpx/third_party/googletest/src/.clang-format b/media/libvpx/libvpx/third_party/googletest/src/.clang-format
new file mode 100644
index 0000000000..5b9bfe6d22
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/googletest/src/.clang-format
@@ -0,0 +1,4 @@
+# Run manually to reformat a file:
+# clang-format -i --style=file <file>
+Language: Cpp
+BasedOnStyle: Google
diff --git a/media/libvpx/libvpx/third_party/googletest/src/CONTRIBUTORS b/media/libvpx/libvpx/third_party/googletest/src/CONTRIBUTORS
new file mode 100644
index 0000000000..77397a5b53
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/googletest/src/CONTRIBUTORS
@@ -0,0 +1,65 @@
+# This file contains a list of people who've made non-trivial
+# contribution to the Google C++ Testing Framework project. People
+# who commit code to the project are encouraged to add their names
+# here. Please keep the list sorted by first names.
+
+Ajay Joshi <jaj@google.com>
+Balázs Dán <balazs.dan@gmail.com>
+Benoit Sigoure <tsuna@google.com>
+Bharat Mediratta <bharat@menalto.com>
+Bogdan Piloca <boo@google.com>
+Chandler Carruth <chandlerc@google.com>
+Chris Prince <cprince@google.com>
+Chris Taylor <taylorc@google.com>
+Dan Egnor <egnor@google.com>
+Dave MacLachlan <dmaclach@gmail.com>
+David Anderson <danderson@google.com>
+Dean Sturtevant
+Eric Roman <eroman@chromium.org>
+Gene Volovich <gv@cite.com>
+Hady Zalek <hady.zalek@gmail.com>
+Hal Burch <gmock@hburch.com>
+Jeffrey Yasskin <jyasskin@google.com>
+Jim Keller <jimkeller@google.com>
+Joe Walnes <joe@truemesh.com>
+Jon Wray <jwray@google.com>
+Jói Sigurðsson <joi@google.com>
+Keir Mierle <mierle@gmail.com>
+Keith Ray <keith.ray@gmail.com>
+Kenton Varda <kenton@google.com>
+Kostya Serebryany <kcc@google.com>
+Krystian Kuzniarek <krystian.kuzniarek@gmail.com>
+Lev Makhlis
+Manuel Klimek <klimek@google.com>
+Mario Tanev <radix@google.com>
+Mark Paskin
+Markus Heule <markus.heule@gmail.com>
+Martijn Vels <mvels@google.com>
+Matthew Simmons <simmonmt@acm.org>
+Mika Raento <mikie@iki.fi>
+Mike Bland <mbland@google.com>
+Miklós Fazekas <mfazekas@szemafor.com>
+Neal Norwitz <nnorwitz@gmail.com>
+Nermin Ozkiranartli <nermin@google.com>
+Owen Carlsen <ocarlsen@google.com>
+Paneendra Ba <paneendra@google.com>
+Pasi Valminen <pasi.valminen@gmail.com>
+Patrick Hanna <phanna@google.com>
+Patrick Riley <pfr@google.com>
+Paul Menage <menage@google.com>
+Peter Kaminski <piotrk@google.com>
+Piotr Kaminski <piotrk@google.com>
+Preston Jackson <preston.a.jackson@gmail.com>
+Rainer Klaffenboeck <rainer.klaffenboeck@dynatrace.com>
+Russ Cox <rsc@google.com>
+Russ Rufer <russ@pentad.com>
+Sean Mcafee <eefacm@gmail.com>
+Sigurður Ásgeirsson <siggi@google.com>
+Sverre Sundsdal <sundsdal@gmail.com>
+Szymon Sobik <sobik.szymon@gmail.com>
+Takeshi Yoshino <tyoshino@google.com>
+Tracy Bialik <tracy@pentad.com>
+Vadim Berman <vadimb@google.com>
+Vlad Losev <vladl@google.com>
+Wolfgang Klier <wklier@google.com>
+Zhanyong Wan <wan@google.com>
diff --git a/media/libvpx/libvpx/third_party/googletest/src/LICENSE b/media/libvpx/libvpx/third_party/googletest/src/LICENSE
new file mode 100644
index 0000000000..1941a11f8c
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/googletest/src/LICENSE
@@ -0,0 +1,28 @@
+Copyright 2008, Google Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+ * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+ * Neither the name of Google Inc. nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/media/libvpx/libvpx/third_party/googletest/src/README.md b/media/libvpx/libvpx/third_party/googletest/src/README.md
new file mode 100644
index 0000000000..d26b309ed0
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/googletest/src/README.md
@@ -0,0 +1,217 @@
+### Generic Build Instructions
+
+#### Setup
+
+To build GoogleTest and your tests that use it, you need to tell your build
+system where to find its headers and source files. The exact way to do it
+depends on which build system you use, and is usually straightforward.
+
+### Build with CMake
+
+GoogleTest comes with a CMake build script
+([CMakeLists.txt](https://github.com/google/googletest/blob/master/CMakeLists.txt))
+that can be used on a wide range of platforms ("C" stands for cross-platform.).
+If you don't have CMake installed already, you can download it for free from
+<http://www.cmake.org/>.
+
+CMake works by generating native makefiles or build projects that can be used in
+the compiler environment of your choice. You can either build GoogleTest as a
+standalone project or it can be incorporated into an existing CMake build for
+another project.
+
+#### Standalone CMake Project
+
+When building GoogleTest as a standalone project, the typical workflow starts
+with
+
+```
+git clone https://github.com/google/googletest.git -b release-1.11.0
+cd googletest # Main directory of the cloned repository.
+mkdir build # Create a directory to hold the build output.
+cd build
+cmake .. # Generate native build scripts for GoogleTest.
+```
+
+The above command also includes GoogleMock by default. And so, if you want to
+build only GoogleTest, you should replace the last command with
+
+```
+cmake .. -DBUILD_GMOCK=OFF
+```
+
+If you are on a \*nix system, you should now see a Makefile in the current
+directory. Just type `make` to build GoogleTest. And then you can simply install
+GoogleTest if you are a system administrator.
+
+```
+make
+sudo make install # Install in /usr/local/ by default
+```
+
+If you use Windows and have Visual Studio installed, a `gtest.sln` file and
+several `.vcproj` files will be created. You can then build them using Visual
+Studio.
+
+On Mac OS X with Xcode installed, a `.xcodeproj` file will be generated.
+
+#### Incorporating Into An Existing CMake Project
+
+If you want to use GoogleTest in a project which already uses CMake, the easiest
+way is to get installed libraries and headers.
+
+* Import GoogleTest by using `find_package` (or `pkg_check_modules`). For
+ example, if `find_package(GTest CONFIG REQUIRED)` succeeds, you can use the
+ libraries as `GTest::gtest`, `GTest::gmock`.
+
+And a more robust and flexible approach is to build GoogleTest as part of that
+project directly. This is done by making the GoogleTest source code available to
+the main build and adding it using CMake's `add_subdirectory()` command. This
+has the significant advantage that the same compiler and linker settings are
+used between GoogleTest and the rest of your project, so issues associated with
+using incompatible libraries (eg debug/release), etc. are avoided. This is
+particularly useful on Windows. Making GoogleTest's source code available to the
+main build can be done a few different ways:
+
+* Download the GoogleTest source code manually and place it at a known
+ location. This is the least flexible approach and can make it more difficult
+ to use with continuous integration systems, etc.
+* Embed the GoogleTest source code as a direct copy in the main project's
+ source tree. This is often the simplest approach, but is also the hardest to
+ keep up to date. Some organizations may not permit this method.
+* Add GoogleTest as a git submodule or equivalent. This may not always be
+ possible or appropriate. Git submodules, for example, have their own set of
+ advantages and drawbacks.
+* Use CMake to download GoogleTest as part of the build's configure step. This
+ approach doesn't have the limitations of the other methods.
+
+The last of the above methods is implemented with a small piece of CMake code
+that downloads and pulls the GoogleTest code into the main build.
+
+Just add to your `CMakeLists.txt`:
+
+```cmake
+include(FetchContent)
+FetchContent_Declare(
+ googletest
+ # Specify the commit you depend on and update it regularly.
+ URL https://github.com/google/googletest/archive/e2239ee6043f73722e7aa812a459f54a28552929.zip
+)
+# For Windows: Prevent overriding the parent project's compiler/linker settings
+set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
+FetchContent_MakeAvailable(googletest)
+
+# Now simply link against gtest or gtest_main as needed. Eg
+add_executable(example example.cpp)
+target_link_libraries(example gtest_main)
+add_test(NAME example_test COMMAND example)
+```
+
+Note that this approach requires CMake 3.14 or later due to its use of the
+`FetchContent_MakeAvailable()` command.
+
+##### Visual Studio Dynamic vs Static Runtimes
+
+By default, new Visual Studio projects link the C runtimes dynamically but
+GoogleTest links them statically. This will generate an error that looks
+something like the following: gtest.lib(gtest-all.obj) : error LNK2038: mismatch
+detected for 'RuntimeLibrary': value 'MTd_StaticDebug' doesn't match value
+'MDd_DynamicDebug' in main.obj
+
+GoogleTest already has a CMake option for this: `gtest_force_shared_crt`
+
+Enabling this option will make gtest link the runtimes dynamically too, and
+match the project in which it is included.
+
+#### C++ Standard Version
+
+An environment that supports C++11 is required in order to successfully build
+GoogleTest. One way to ensure this is to specify the standard in the top-level
+project, for example by using the `set(CMAKE_CXX_STANDARD 11)` command. If this
+is not feasible, for example in a C project using GoogleTest for validation,
+then it can be specified by adding it to the options for cmake via the
+`DCMAKE_CXX_FLAGS` option.
+
+### Tweaking GoogleTest
+
+GoogleTest can be used in diverse environments. The default configuration may
+not work (or may not work well) out of the box in some environments. However,
+you can easily tweak GoogleTest by defining control macros on the compiler
+command line. Generally, these macros are named like `GTEST_XYZ` and you define
+them to either 1 or 0 to enable or disable a certain feature.
+
+We list the most frequently used macros below. For a complete list, see file
+[include/gtest/internal/gtest-port.h](https://github.com/google/googletest/blob/master/googletest/include/gtest/internal/gtest-port.h).
+
+### Multi-threaded Tests
+
+GoogleTest is thread-safe where the pthread library is available. After
+`#include "gtest/gtest.h"`, you can check the
+`GTEST_IS_THREADSAFE` macro to see whether this is the case (yes if the macro is
+`#defined` to 1, no if it's undefined.).
+
+If GoogleTest doesn't correctly detect whether pthread is available in your
+environment, you can force it with
+
+ -DGTEST_HAS_PTHREAD=1
+
+or
+
+ -DGTEST_HAS_PTHREAD=0
+
+When GoogleTest uses pthread, you may need to add flags to your compiler and/or
+linker to select the pthread library, or you'll get link errors. If you use the
+CMake script, this is taken care of for you. If you use your own build script,
+you'll need to read your compiler and linker's manual to figure out what flags
+to add.
+
+### As a Shared Library (DLL)
+
+GoogleTest is compact, so most users can build and link it as a static library
+for the simplicity. You can choose to use GoogleTest as a shared library (known
+as a DLL on Windows) if you prefer.
+
+To compile *gtest* as a shared library, add
+
+ -DGTEST_CREATE_SHARED_LIBRARY=1
+
+to the compiler flags. You'll also need to tell the linker to produce a shared
+library instead - consult your linker's manual for how to do it.
+
+To compile your *tests* that use the gtest shared library, add
+
+ -DGTEST_LINKED_AS_SHARED_LIBRARY=1
+
+to the compiler flags.
+
+Note: while the above steps aren't technically necessary today when using some
+compilers (e.g. GCC), they may become necessary in the future, if we decide to
+improve the speed of loading the library (see
+<http://gcc.gnu.org/wiki/Visibility> for details). Therefore you are recommended
+to always add the above flags when using GoogleTest as a shared library.
+Otherwise a future release of GoogleTest may break your build script.
+
+### Avoiding Macro Name Clashes
+
+In C++, macros don't obey namespaces. Therefore two libraries that both define a
+macro of the same name will clash if you `#include` both definitions. In case a
+GoogleTest macro clashes with another library, you can force GoogleTest to
+rename its macro to avoid the conflict.
+
+Specifically, if both GoogleTest and some other code define macro FOO, you can
+add
+
+ -DGTEST_DONT_DEFINE_FOO=1
+
+to the compiler flags to tell GoogleTest to change the macro's name from `FOO`
+to `GTEST_FOO`. Currently `FOO` can be `ASSERT_EQ`, `ASSERT_FALSE`, `ASSERT_GE`,
+`ASSERT_GT`, `ASSERT_LE`, `ASSERT_LT`, `ASSERT_NE`, `ASSERT_TRUE`,
+`EXPECT_FALSE`, `EXPECT_TRUE`, `FAIL`, `SUCCEED`, `TEST`, or `TEST_F`. For
+example, with `-DGTEST_DONT_DEFINE_TEST=1`, you'll need to write
+
+ GTEST_TEST(SomeTest, DoesThis) { ... }
+
+instead of
+
+ TEST(SomeTest, DoesThis) { ... }
+
+in order to define a test.
diff --git a/media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest-assertion-result.h b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest-assertion-result.h
new file mode 100644
index 0000000000..addbb59c64
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest-assertion-result.h
@@ -0,0 +1,237 @@
+// Copyright 2005, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// The Google C++ Testing and Mocking Framework (Google Test)
+//
+// This file implements the AssertionResult type.
+
+// IWYU pragma: private, include "gtest/gtest.h"
+// IWYU pragma: friend gtest/.*
+// IWYU pragma: friend gmock/.*
+
+#ifndef GOOGLETEST_INCLUDE_GTEST_GTEST_ASSERTION_RESULT_H_
+#define GOOGLETEST_INCLUDE_GTEST_GTEST_ASSERTION_RESULT_H_
+
+#include <memory>
+#include <ostream>
+#include <string>
+#include <type_traits>
+
+#include "gtest/gtest-message.h"
+#include "gtest/internal/gtest-port.h"
+
+GTEST_DISABLE_MSC_WARNINGS_PUSH_(4251 \
+/* class A needs to have dll-interface to be used by clients of class B */)
+
+namespace testing {
+
+// A class for indicating whether an assertion was successful. When
+// the assertion wasn't successful, the AssertionResult object
+// remembers a non-empty message that describes how it failed.
+//
+// To create an instance of this class, use one of the factory functions
+// (AssertionSuccess() and AssertionFailure()).
+//
+// This class is useful for two purposes:
+// 1. Defining predicate functions to be used with Boolean test assertions
+// EXPECT_TRUE/EXPECT_FALSE and their ASSERT_ counterparts
+// 2. Defining predicate-format functions to be
+// used with predicate assertions (ASSERT_PRED_FORMAT*, etc).
+//
+// For example, if you define IsEven predicate:
+//
+// testing::AssertionResult IsEven(int n) {
+// if ((n % 2) == 0)
+// return testing::AssertionSuccess();
+// else
+// return testing::AssertionFailure() << n << " is odd";
+// }
+//
+// Then the failed expectation EXPECT_TRUE(IsEven(Fib(5)))
+// will print the message
+//
+// Value of: IsEven(Fib(5))
+// Actual: false (5 is odd)
+// Expected: true
+//
+// instead of a more opaque
+//
+// Value of: IsEven(Fib(5))
+// Actual: false
+// Expected: true
+//
+// in case IsEven is a simple Boolean predicate.
+//
+// If you expect your predicate to be reused and want to support informative
+// messages in EXPECT_FALSE and ASSERT_FALSE (negative assertions show up
+// about half as often as positive ones in our tests), supply messages for
+// both success and failure cases:
+//
+// testing::AssertionResult IsEven(int n) {
+// if ((n % 2) == 0)
+// return testing::AssertionSuccess() << n << " is even";
+// else
+// return testing::AssertionFailure() << n << " is odd";
+// }
+//
+// Then a statement EXPECT_FALSE(IsEven(Fib(6))) will print
+//
+// Value of: IsEven(Fib(6))
+// Actual: true (8 is even)
+// Expected: false
+//
+// NB: Predicates that support negative Boolean assertions have reduced
+// performance in positive ones so be careful not to use them in tests
+// that have lots (tens of thousands) of positive Boolean assertions.
+//
+// To use this class with EXPECT_PRED_FORMAT assertions such as:
+//
+// // Verifies that Foo() returns an even number.
+// EXPECT_PRED_FORMAT1(IsEven, Foo());
+//
+// you need to define:
+//
+// testing::AssertionResult IsEven(const char* expr, int n) {
+// if ((n % 2) == 0)
+// return testing::AssertionSuccess();
+// else
+// return testing::AssertionFailure()
+// << "Expected: " << expr << " is even\n Actual: it's " << n;
+// }
+//
+// If Foo() returns 5, you will see the following message:
+//
+// Expected: Foo() is even
+// Actual: it's 5
+//
+class GTEST_API_ AssertionResult {
+ public:
+ // Copy constructor.
+ // Used in EXPECT_TRUE/FALSE(assertion_result).
+ AssertionResult(const AssertionResult& other);
+
+// C4800 is a level 3 warning in Visual Studio 2015 and earlier.
+// This warning is not emitted in Visual Studio 2017.
+// This warning is off by default starting in Visual Studio 2019 but can be
+// enabled with command-line options.
+#if defined(_MSC_VER) && (_MSC_VER < 1910 || _MSC_VER >= 1920)
+ GTEST_DISABLE_MSC_WARNINGS_PUSH_(4800 /* forcing value to bool */)
+#endif
+
+ // Used in the EXPECT_TRUE/FALSE(bool_expression).
+ //
+ // T must be contextually convertible to bool.
+ //
+ // The second parameter prevents this overload from being considered if
+ // the argument is implicitly convertible to AssertionResult. In that case
+ // we want AssertionResult's copy constructor to be used.
+ template <typename T>
+ explicit AssertionResult(
+ const T& success,
+ typename std::enable_if<
+ !std::is_convertible<T, AssertionResult>::value>::type*
+ /*enabler*/
+ = nullptr)
+ : success_(success) {}
+
+#if defined(_MSC_VER) && (_MSC_VER < 1910 || _MSC_VER >= 1920)
+ GTEST_DISABLE_MSC_WARNINGS_POP_()
+#endif
+
+ // Assignment operator.
+ AssertionResult& operator=(AssertionResult other) {
+ swap(other);
+ return *this;
+ }
+
+ // Returns true if and only if the assertion succeeded.
+ operator bool() const { return success_; } // NOLINT
+
+ // Returns the assertion's negation. Used with EXPECT/ASSERT_FALSE.
+ AssertionResult operator!() const;
+
+ // Returns the text streamed into this AssertionResult. Test assertions
+ // use it when they fail (i.e., the predicate's outcome doesn't match the
+ // assertion's expectation). When nothing has been streamed into the
+ // object, returns an empty string.
+ const char* message() const {
+ return message_.get() != nullptr ? message_->c_str() : "";
+ }
+ // Deprecated; please use message() instead.
+ const char* failure_message() const { return message(); }
+
+ // Streams a custom failure message into this object.
+ template <typename T>
+ AssertionResult& operator<<(const T& value) {
+ AppendMessage(Message() << value);
+ return *this;
+ }
+
+ // Allows streaming basic output manipulators such as endl or flush into
+ // this object.
+ AssertionResult& operator<<(
+ ::std::ostream& (*basic_manipulator)(::std::ostream& stream)) {
+ AppendMessage(Message() << basic_manipulator);
+ return *this;
+ }
+
+ private:
+ // Appends the contents of message to message_.
+ void AppendMessage(const Message& a_message) {
+ if (message_.get() == nullptr) message_.reset(new ::std::string);
+ message_->append(a_message.GetString().c_str());
+ }
+
+ // Swap the contents of this AssertionResult with other.
+ void swap(AssertionResult& other);
+
+ // Stores result of the assertion predicate.
+ bool success_;
+ // Stores the message describing the condition in case the expectation
+ // construct is not satisfied with the predicate's outcome.
+ // Referenced via a pointer to avoid taking too much stack frame space
+ // with test assertions.
+ std::unique_ptr< ::std::string> message_;
+};
+
+// Makes a successful assertion result.
+GTEST_API_ AssertionResult AssertionSuccess();
+
+// Makes a failed assertion result.
+GTEST_API_ AssertionResult AssertionFailure();
+
+// Makes a failed assertion result with the given failure message.
+// Deprecated; use AssertionFailure() << msg.
+GTEST_API_ AssertionResult AssertionFailure(const Message& msg);
+
+} // namespace testing
+
+GTEST_DISABLE_MSC_WARNINGS_POP_() // 4251
+
+#endif // GOOGLETEST_INCLUDE_GTEST_GTEST_ASSERTION_RESULT_H_
diff --git a/media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest-death-test.h b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest-death-test.h
new file mode 100644
index 0000000000..84e5a5bbd3
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest-death-test.h
@@ -0,0 +1,345 @@
+// Copyright 2005, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// The Google C++ Testing and Mocking Framework (Google Test)
+//
+// This header file defines the public API for death tests. It is
+// #included by gtest.h so a user doesn't need to include this
+// directly.
+
+// IWYU pragma: private, include "gtest/gtest.h"
+// IWYU pragma: friend gtest/.*
+// IWYU pragma: friend gmock/.*
+
+#ifndef GOOGLETEST_INCLUDE_GTEST_GTEST_DEATH_TEST_H_
+#define GOOGLETEST_INCLUDE_GTEST_GTEST_DEATH_TEST_H_
+
+#include "gtest/internal/gtest-death-test-internal.h"
+
+// This flag controls the style of death tests. Valid values are "threadsafe",
+// meaning that the death test child process will re-execute the test binary
+// from the start, running only a single death test, or "fast",
+// meaning that the child process will execute the test logic immediately
+// after forking.
+GTEST_DECLARE_string_(death_test_style);
+
+namespace testing {
+
+#if GTEST_HAS_DEATH_TEST
+
+namespace internal {
+
+// Returns a Boolean value indicating whether the caller is currently
+// executing in the context of the death test child process. Tools such as
+// Valgrind heap checkers may need this to modify their behavior in death
+// tests. IMPORTANT: This is an internal utility. Using it may break the
+// implementation of death tests. User code MUST NOT use it.
+GTEST_API_ bool InDeathTestChild();
+
+} // namespace internal
+
+// The following macros are useful for writing death tests.
+
+// Here's what happens when an ASSERT_DEATH* or EXPECT_DEATH* is
+// executed:
+//
+// 1. It generates a warning if there is more than one active
+// thread. This is because it's safe to fork() or clone() only
+// when there is a single thread.
+//
+// 2. The parent process clone()s a sub-process and runs the death
+// test in it; the sub-process exits with code 0 at the end of the
+// death test, if it hasn't exited already.
+//
+// 3. The parent process waits for the sub-process to terminate.
+//
+// 4. The parent process checks the exit code and error message of
+// the sub-process.
+//
+// Examples:
+//
+// ASSERT_DEATH(server.SendMessage(56, "Hello"), "Invalid port number");
+// for (int i = 0; i < 5; i++) {
+// EXPECT_DEATH(server.ProcessRequest(i),
+// "Invalid request .* in ProcessRequest()")
+// << "Failed to die on request " << i;
+// }
+//
+// ASSERT_EXIT(server.ExitNow(), ::testing::ExitedWithCode(0), "Exiting");
+//
+// bool KilledBySIGHUP(int exit_code) {
+// return WIFSIGNALED(exit_code) && WTERMSIG(exit_code) == SIGHUP;
+// }
+//
+// ASSERT_EXIT(client.HangUpServer(), KilledBySIGHUP, "Hanging up!");
+//
+// The final parameter to each of these macros is a matcher applied to any data
+// the sub-process wrote to stderr. For compatibility with existing tests, a
+// bare string is interpreted as a regular expression matcher.
+//
+// On the regular expressions used in death tests:
+//
+// On POSIX-compliant systems (*nix), we use the <regex.h> library,
+// which uses the POSIX extended regex syntax.
+//
+// On other platforms (e.g. Windows or Mac), we only support a simple regex
+// syntax implemented as part of Google Test. This limited
+// implementation should be enough most of the time when writing
+// death tests; though it lacks many features you can find in PCRE
+// or POSIX extended regex syntax. For example, we don't support
+// union ("x|y"), grouping ("(xy)"), brackets ("[xy]"), and
+// repetition count ("x{5,7}"), among others.
+//
+// Below is the syntax that we do support. We chose it to be a
+// subset of both PCRE and POSIX extended regex, so it's easy to
+// learn wherever you come from. In the following: 'A' denotes a
+// literal character, period (.), or a single \\ escape sequence;
+// 'x' and 'y' denote regular expressions; 'm' and 'n' are for
+// natural numbers.
+//
+// c matches any literal character c
+// \\d matches any decimal digit
+// \\D matches any character that's not a decimal digit
+// \\f matches \f
+// \\n matches \n
+// \\r matches \r
+// \\s matches any ASCII whitespace, including \n
+// \\S matches any character that's not a whitespace
+// \\t matches \t
+// \\v matches \v
+// \\w matches any letter, _, or decimal digit
+// \\W matches any character that \\w doesn't match
+// \\c matches any literal character c, which must be a punctuation
+// . matches any single character except \n
+// A? matches 0 or 1 occurrences of A
+// A* matches 0 or many occurrences of A
+// A+ matches 1 or many occurrences of A
+// ^ matches the beginning of a string (not that of each line)
+// $ matches the end of a string (not that of each line)
+// xy matches x followed by y
+//
+// If you accidentally use PCRE or POSIX extended regex features
+// not implemented by us, you will get a run-time failure. In that
+// case, please try to rewrite your regular expression within the
+// above syntax.
+//
+// This implementation is *not* meant to be as highly tuned or robust
+// as a compiled regex library, but should perform well enough for a
+// death test, which already incurs significant overhead by launching
+// a child process.
+//
+// Known caveats:
+//
+// A "threadsafe" style death test obtains the path to the test
+// program from argv[0] and re-executes it in the sub-process. For
+// simplicity, the current implementation doesn't search the PATH
+// when launching the sub-process. This means that the user must
+// invoke the test program via a path that contains at least one
+// path separator (e.g. path/to/foo_test and
+// /absolute/path/to/bar_test are fine, but foo_test is not). This
+// is rarely a problem as people usually don't put the test binary
+// directory in PATH.
+//
+
+// Asserts that a given `statement` causes the program to exit, with an
+// integer exit status that satisfies `predicate`, and emitting error output
+// that matches `matcher`.
+#define ASSERT_EXIT(statement, predicate, matcher) \
+ GTEST_DEATH_TEST_(statement, predicate, matcher, GTEST_FATAL_FAILURE_)
+
+// Like `ASSERT_EXIT`, but continues on to successive tests in the
+// test suite, if any:
+#define EXPECT_EXIT(statement, predicate, matcher) \
+ GTEST_DEATH_TEST_(statement, predicate, matcher, GTEST_NONFATAL_FAILURE_)
+
+// Asserts that a given `statement` causes the program to exit, either by
+// explicitly exiting with a nonzero exit code or being killed by a
+// signal, and emitting error output that matches `matcher`.
+#define ASSERT_DEATH(statement, matcher) \
+ ASSERT_EXIT(statement, ::testing::internal::ExitedUnsuccessfully, matcher)
+
+// Like `ASSERT_DEATH`, but continues on to successive tests in the
+// test suite, if any:
+#define EXPECT_DEATH(statement, matcher) \
+ EXPECT_EXIT(statement, ::testing::internal::ExitedUnsuccessfully, matcher)
+
+// Two predicate classes that can be used in {ASSERT,EXPECT}_EXIT*:
+
+// Tests that an exit code describes a normal exit with a given exit code.
+class GTEST_API_ ExitedWithCode {
+ public:
+ explicit ExitedWithCode(int exit_code);
+ ExitedWithCode(const ExitedWithCode&) = default;
+ void operator=(const ExitedWithCode& other) = delete;
+ bool operator()(int exit_status) const;
+
+ private:
+ const int exit_code_;
+};
+
+#if !GTEST_OS_WINDOWS && !GTEST_OS_FUCHSIA
+// Tests that an exit code describes an exit due to termination by a
+// given signal.
+class GTEST_API_ KilledBySignal {
+ public:
+ explicit KilledBySignal(int signum);
+ bool operator()(int exit_status) const;
+
+ private:
+ const int signum_;
+};
+#endif // !GTEST_OS_WINDOWS
+
+// EXPECT_DEBUG_DEATH asserts that the given statements die in debug mode.
+// The death testing framework causes this to have interesting semantics,
+// since the sideeffects of the call are only visible in opt mode, and not
+// in debug mode.
+//
+// In practice, this can be used to test functions that utilize the
+// LOG(DFATAL) macro using the following style:
+//
+// int DieInDebugOr12(int* sideeffect) {
+// if (sideeffect) {
+// *sideeffect = 12;
+// }
+// LOG(DFATAL) << "death";
+// return 12;
+// }
+//
+// TEST(TestSuite, TestDieOr12WorksInDgbAndOpt) {
+// int sideeffect = 0;
+// // Only asserts in dbg.
+// EXPECT_DEBUG_DEATH(DieInDebugOr12(&sideeffect), "death");
+//
+// #ifdef NDEBUG
+// // opt-mode has sideeffect visible.
+// EXPECT_EQ(12, sideeffect);
+// #else
+// // dbg-mode no visible sideeffect.
+// EXPECT_EQ(0, sideeffect);
+// #endif
+// }
+//
+// This will assert that DieInDebugReturn12InOpt() crashes in debug
+// mode, usually due to a DCHECK or LOG(DFATAL), but returns the
+// appropriate fallback value (12 in this case) in opt mode. If you
+// need to test that a function has appropriate side-effects in opt
+// mode, include assertions against the side-effects. A general
+// pattern for this is:
+//
+// EXPECT_DEBUG_DEATH({
+// // Side-effects here will have an effect after this statement in
+// // opt mode, but none in debug mode.
+// EXPECT_EQ(12, DieInDebugOr12(&sideeffect));
+// }, "death");
+//
+#ifdef NDEBUG
+
+#define EXPECT_DEBUG_DEATH(statement, regex) \
+ GTEST_EXECUTE_STATEMENT_(statement, regex)
+
+#define ASSERT_DEBUG_DEATH(statement, regex) \
+ GTEST_EXECUTE_STATEMENT_(statement, regex)
+
+#else
+
+#define EXPECT_DEBUG_DEATH(statement, regex) EXPECT_DEATH(statement, regex)
+
+#define ASSERT_DEBUG_DEATH(statement, regex) ASSERT_DEATH(statement, regex)
+
+#endif // NDEBUG for EXPECT_DEBUG_DEATH
+#endif // GTEST_HAS_DEATH_TEST
+
+// This macro is used for implementing macros such as
+// EXPECT_DEATH_IF_SUPPORTED and ASSERT_DEATH_IF_SUPPORTED on systems where
+// death tests are not supported. Those macros must compile on such systems
+// if and only if EXPECT_DEATH and ASSERT_DEATH compile with the same parameters
+// on systems that support death tests. This allows one to write such a macro on
+// a system that does not support death tests and be sure that it will compile
+// on a death-test supporting system. It is exposed publicly so that systems
+// that have death-tests with stricter requirements than GTEST_HAS_DEATH_TEST
+// can write their own equivalent of EXPECT_DEATH_IF_SUPPORTED and
+// ASSERT_DEATH_IF_SUPPORTED.
+//
+// Parameters:
+// statement - A statement that a macro such as EXPECT_DEATH would test
+// for program termination. This macro has to make sure this
+// statement is compiled but not executed, to ensure that
+// EXPECT_DEATH_IF_SUPPORTED compiles with a certain
+// parameter if and only if EXPECT_DEATH compiles with it.
+// regex - A regex that a macro such as EXPECT_DEATH would use to test
+// the output of statement. This parameter has to be
+// compiled but not evaluated by this macro, to ensure that
+// this macro only accepts expressions that a macro such as
+// EXPECT_DEATH would accept.
+// terminator - Must be an empty statement for EXPECT_DEATH_IF_SUPPORTED
+// and a return statement for ASSERT_DEATH_IF_SUPPORTED.
+// This ensures that ASSERT_DEATH_IF_SUPPORTED will not
+// compile inside functions where ASSERT_DEATH doesn't
+// compile.
+//
+// The branch that has an always false condition is used to ensure that
+// statement and regex are compiled (and thus syntactically correct) but
+// never executed. The unreachable code macro protects the terminator
+// statement from generating an 'unreachable code' warning in case
+// statement unconditionally returns or throws. The Message constructor at
+// the end allows the syntax of streaming additional messages into the
+// macro, for compilational compatibility with EXPECT_DEATH/ASSERT_DEATH.
+#define GTEST_UNSUPPORTED_DEATH_TEST(statement, regex, terminator) \
+ GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
+ if (::testing::internal::AlwaysTrue()) { \
+ GTEST_LOG_(WARNING) << "Death tests are not supported on this platform.\n" \
+ << "Statement '" #statement "' cannot be verified."; \
+ } else if (::testing::internal::AlwaysFalse()) { \
+ ::testing::internal::RE::PartialMatch(".*", (regex)); \
+ GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \
+ terminator; \
+ } else \
+ ::testing::Message()
+
+// EXPECT_DEATH_IF_SUPPORTED(statement, regex) and
+// ASSERT_DEATH_IF_SUPPORTED(statement, regex) expand to real death tests if
+// death tests are supported; otherwise they just issue a warning. This is
+// useful when you are combining death test assertions with normal test
+// assertions in one test.
+#if GTEST_HAS_DEATH_TEST
+#define EXPECT_DEATH_IF_SUPPORTED(statement, regex) \
+ EXPECT_DEATH(statement, regex)
+#define ASSERT_DEATH_IF_SUPPORTED(statement, regex) \
+ ASSERT_DEATH(statement, regex)
+#else
+#define EXPECT_DEATH_IF_SUPPORTED(statement, regex) \
+ GTEST_UNSUPPORTED_DEATH_TEST(statement, regex, )
+#define ASSERT_DEATH_IF_SUPPORTED(statement, regex) \
+ GTEST_UNSUPPORTED_DEATH_TEST(statement, regex, return)
+#endif
+
+} // namespace testing
+
+#endif // GOOGLETEST_INCLUDE_GTEST_GTEST_DEATH_TEST_H_
diff --git a/media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest-matchers.h b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest-matchers.h
new file mode 100644
index 0000000000..bffa00c533
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest-matchers.h
@@ -0,0 +1,956 @@
+// Copyright 2007, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// The Google C++ Testing and Mocking Framework (Google Test)
+//
+// This file implements just enough of the matcher interface to allow
+// EXPECT_DEATH and friends to accept a matcher argument.
+
+// IWYU pragma: private, include "gtest/gtest.h"
+// IWYU pragma: friend gtest/.*
+// IWYU pragma: friend gmock/.*
+
+#ifndef GOOGLETEST_INCLUDE_GTEST_GTEST_MATCHERS_H_
+#define GOOGLETEST_INCLUDE_GTEST_GTEST_MATCHERS_H_
+
+#include <atomic>
+#include <memory>
+#include <ostream>
+#include <string>
+#include <type_traits>
+
+#include "gtest/gtest-printers.h"
+#include "gtest/internal/gtest-internal.h"
+#include "gtest/internal/gtest-port.h"
+
+// MSVC warning C5046 is new as of VS2017 version 15.8.
+#if defined(_MSC_VER) && _MSC_VER >= 1915
+#define GTEST_MAYBE_5046_ 5046
+#else
+#define GTEST_MAYBE_5046_
+#endif
+
+GTEST_DISABLE_MSC_WARNINGS_PUSH_(
+ 4251 GTEST_MAYBE_5046_ /* class A needs to have dll-interface to be used by
+ clients of class B */
+ /* Symbol involving type with internal linkage not defined */)
+
+namespace testing {
+
+// To implement a matcher Foo for type T, define:
+// 1. a class FooMatcherMatcher that implements the matcher interface:
+// using is_gtest_matcher = void;
+// bool MatchAndExplain(const T&, std::ostream*);
+// (MatchResultListener* can also be used instead of std::ostream*)
+// void DescribeTo(std::ostream*);
+// void DescribeNegationTo(std::ostream*);
+//
+// 2. a factory function that creates a Matcher<T> object from a
+// FooMatcherMatcher.
+
+class MatchResultListener {
+ public:
+ // Creates a listener object with the given underlying ostream. The
+ // listener does not own the ostream, and does not dereference it
+ // in the constructor or destructor.
+ explicit MatchResultListener(::std::ostream* os) : stream_(os) {}
+ virtual ~MatchResultListener() = 0; // Makes this class abstract.
+
+ // Streams x to the underlying ostream; does nothing if the ostream
+ // is NULL.
+ template <typename T>
+ MatchResultListener& operator<<(const T& x) {
+ if (stream_ != nullptr) *stream_ << x;
+ return *this;
+ }
+
+ // Returns the underlying ostream.
+ ::std::ostream* stream() { return stream_; }
+
+ // Returns true if and only if the listener is interested in an explanation
+ // of the match result. A matcher's MatchAndExplain() method can use
+ // this information to avoid generating the explanation when no one
+ // intends to hear it.
+ bool IsInterested() const { return stream_ != nullptr; }
+
+ private:
+ ::std::ostream* const stream_;
+
+ MatchResultListener(const MatchResultListener&) = delete;
+ MatchResultListener& operator=(const MatchResultListener&) = delete;
+};
+
+inline MatchResultListener::~MatchResultListener() {}
+
+// An instance of a subclass of this knows how to describe itself as a
+// matcher.
+class GTEST_API_ MatcherDescriberInterface {
+ public:
+ virtual ~MatcherDescriberInterface() {}
+
+ // Describes this matcher to an ostream. The function should print
+ // a verb phrase that describes the property a value matching this
+ // matcher should have. The subject of the verb phrase is the value
+ // being matched. For example, the DescribeTo() method of the Gt(7)
+ // matcher prints "is greater than 7".
+ virtual void DescribeTo(::std::ostream* os) const = 0;
+
+ // Describes the negation of this matcher to an ostream. For
+ // example, if the description of this matcher is "is greater than
+ // 7", the negated description could be "is not greater than 7".
+ // You are not required to override this when implementing
+ // MatcherInterface, but it is highly advised so that your matcher
+ // can produce good error messages.
+ virtual void DescribeNegationTo(::std::ostream* os) const {
+ *os << "not (";
+ DescribeTo(os);
+ *os << ")";
+ }
+};
+
+// The implementation of a matcher.
+template <typename T>
+class MatcherInterface : public MatcherDescriberInterface {
+ public:
+ // Returns true if and only if the matcher matches x; also explains the
+ // match result to 'listener' if necessary (see the next paragraph), in
+ // the form of a non-restrictive relative clause ("which ...",
+ // "whose ...", etc) that describes x. For example, the
+ // MatchAndExplain() method of the Pointee(...) matcher should
+ // generate an explanation like "which points to ...".
+ //
+ // Implementations of MatchAndExplain() should add an explanation of
+ // the match result *if and only if* they can provide additional
+ // information that's not already present (or not obvious) in the
+ // print-out of x and the matcher's description. Whether the match
+ // succeeds is not a factor in deciding whether an explanation is
+ // needed, as sometimes the caller needs to print a failure message
+ // when the match succeeds (e.g. when the matcher is used inside
+ // Not()).
+ //
+ // For example, a "has at least 10 elements" matcher should explain
+ // what the actual element count is, regardless of the match result,
+ // as it is useful information to the reader; on the other hand, an
+ // "is empty" matcher probably only needs to explain what the actual
+ // size is when the match fails, as it's redundant to say that the
+ // size is 0 when the value is already known to be empty.
+ //
+ // You should override this method when defining a new matcher.
+ //
+ // It's the responsibility of the caller (Google Test) to guarantee
+ // that 'listener' is not NULL. This helps to simplify a matcher's
+ // implementation when it doesn't care about the performance, as it
+ // can talk to 'listener' without checking its validity first.
+ // However, in order to implement dummy listeners efficiently,
+ // listener->stream() may be NULL.
+ virtual bool MatchAndExplain(T x, MatchResultListener* listener) const = 0;
+
+ // Inherits these methods from MatcherDescriberInterface:
+ // virtual void DescribeTo(::std::ostream* os) const = 0;
+ // virtual void DescribeNegationTo(::std::ostream* os) const;
+};
+
+namespace internal {
+
+struct AnyEq {
+ template <typename A, typename B>
+ bool operator()(const A& a, const B& b) const {
+ return a == b;
+ }
+};
+struct AnyNe {
+ template <typename A, typename B>
+ bool operator()(const A& a, const B& b) const {
+ return a != b;
+ }
+};
+struct AnyLt {
+ template <typename A, typename B>
+ bool operator()(const A& a, const B& b) const {
+ return a < b;
+ }
+};
+struct AnyGt {
+ template <typename A, typename B>
+ bool operator()(const A& a, const B& b) const {
+ return a > b;
+ }
+};
+struct AnyLe {
+ template <typename A, typename B>
+ bool operator()(const A& a, const B& b) const {
+ return a <= b;
+ }
+};
+struct AnyGe {
+ template <typename A, typename B>
+ bool operator()(const A& a, const B& b) const {
+ return a >= b;
+ }
+};
+
+// A match result listener that ignores the explanation.
+class DummyMatchResultListener : public MatchResultListener {
+ public:
+ DummyMatchResultListener() : MatchResultListener(nullptr) {}
+
+ private:
+ DummyMatchResultListener(const DummyMatchResultListener&) = delete;
+ DummyMatchResultListener& operator=(const DummyMatchResultListener&) = delete;
+};
+
+// A match result listener that forwards the explanation to a given
+// ostream. The difference between this and MatchResultListener is
+// that the former is concrete.
+class StreamMatchResultListener : public MatchResultListener {
+ public:
+ explicit StreamMatchResultListener(::std::ostream* os)
+ : MatchResultListener(os) {}
+
+ private:
+ StreamMatchResultListener(const StreamMatchResultListener&) = delete;
+ StreamMatchResultListener& operator=(const StreamMatchResultListener&) =
+ delete;
+};
+
+struct SharedPayloadBase {
+ std::atomic<int> ref{1};
+ void Ref() { ref.fetch_add(1, std::memory_order_relaxed); }
+ bool Unref() { return ref.fetch_sub(1, std::memory_order_acq_rel) == 1; }
+};
+
+template <typename T>
+struct SharedPayload : SharedPayloadBase {
+ explicit SharedPayload(const T& v) : value(v) {}
+ explicit SharedPayload(T&& v) : value(std::move(v)) {}
+
+ static void Destroy(SharedPayloadBase* shared) {
+ delete static_cast<SharedPayload*>(shared);
+ }
+
+ T value;
+};
+
+// An internal class for implementing Matcher<T>, which will derive
+// from it. We put functionalities common to all Matcher<T>
+// specializations here to avoid code duplication.
+template <typename T>
+class MatcherBase : private MatcherDescriberInterface {
+ public:
+ // Returns true if and only if the matcher matches x; also explains the
+ // match result to 'listener'.
+ bool MatchAndExplain(const T& x, MatchResultListener* listener) const {
+ GTEST_CHECK_(vtable_ != nullptr);
+ return vtable_->match_and_explain(*this, x, listener);
+ }
+
+ // Returns true if and only if this matcher matches x.
+ bool Matches(const T& x) const {
+ DummyMatchResultListener dummy;
+ return MatchAndExplain(x, &dummy);
+ }
+
+ // Describes this matcher to an ostream.
+ void DescribeTo(::std::ostream* os) const final {
+ GTEST_CHECK_(vtable_ != nullptr);
+ vtable_->describe(*this, os, false);
+ }
+
+ // Describes the negation of this matcher to an ostream.
+ void DescribeNegationTo(::std::ostream* os) const final {
+ GTEST_CHECK_(vtable_ != nullptr);
+ vtable_->describe(*this, os, true);
+ }
+
+ // Explains why x matches, or doesn't match, the matcher.
+ void ExplainMatchResultTo(const T& x, ::std::ostream* os) const {
+ StreamMatchResultListener listener(os);
+ MatchAndExplain(x, &listener);
+ }
+
+ // Returns the describer for this matcher object; retains ownership
+ // of the describer, which is only guaranteed to be alive when
+ // this matcher object is alive.
+ const MatcherDescriberInterface* GetDescriber() const {
+ if (vtable_ == nullptr) return nullptr;
+ return vtable_->get_describer(*this);
+ }
+
+ protected:
+ MatcherBase() : vtable_(nullptr), buffer_() {}
+
+ // Constructs a matcher from its implementation.
+ template <typename U>
+ explicit MatcherBase(const MatcherInterface<U>* impl)
+ : vtable_(nullptr), buffer_() {
+ Init(impl);
+ }
+
+ template <typename M, typename = typename std::remove_reference<
+ M>::type::is_gtest_matcher>
+ MatcherBase(M&& m) : vtable_(nullptr), buffer_() { // NOLINT
+ Init(std::forward<M>(m));
+ }
+
+ MatcherBase(const MatcherBase& other)
+ : vtable_(other.vtable_), buffer_(other.buffer_) {
+ if (IsShared()) buffer_.shared->Ref();
+ }
+
+ MatcherBase& operator=(const MatcherBase& other) {
+ if (this == &other) return *this;
+ Destroy();
+ vtable_ = other.vtable_;
+ buffer_ = other.buffer_;
+ if (IsShared()) buffer_.shared->Ref();
+ return *this;
+ }
+
+ MatcherBase(MatcherBase&& other)
+ : vtable_(other.vtable_), buffer_(other.buffer_) {
+ other.vtable_ = nullptr;
+ }
+
+ MatcherBase& operator=(MatcherBase&& other) {
+ if (this == &other) return *this;
+ Destroy();
+ vtable_ = other.vtable_;
+ buffer_ = other.buffer_;
+ other.vtable_ = nullptr;
+ return *this;
+ }
+
+ ~MatcherBase() override { Destroy(); }
+
+ private:
+ struct VTable {
+ bool (*match_and_explain)(const MatcherBase&, const T&,
+ MatchResultListener*);
+ void (*describe)(const MatcherBase&, std::ostream*, bool negation);
+ // Returns the captured object if it implements the interface, otherwise
+ // returns the MatcherBase itself.
+ const MatcherDescriberInterface* (*get_describer)(const MatcherBase&);
+ // Called on shared instances when the reference count reaches 0.
+ void (*shared_destroy)(SharedPayloadBase*);
+ };
+
+ bool IsShared() const {
+ return vtable_ != nullptr && vtable_->shared_destroy != nullptr;
+ }
+
+ // If the implementation uses a listener, call that.
+ template <typename P>
+ static auto MatchAndExplainImpl(const MatcherBase& m, const T& value,
+ MatchResultListener* listener)
+ -> decltype(P::Get(m).MatchAndExplain(value, listener->stream())) {
+ return P::Get(m).MatchAndExplain(value, listener->stream());
+ }
+
+ template <typename P>
+ static auto MatchAndExplainImpl(const MatcherBase& m, const T& value,
+ MatchResultListener* listener)
+ -> decltype(P::Get(m).MatchAndExplain(value, listener)) {
+ return P::Get(m).MatchAndExplain(value, listener);
+ }
+
+ template <typename P>
+ static void DescribeImpl(const MatcherBase& m, std::ostream* os,
+ bool negation) {
+ if (negation) {
+ P::Get(m).DescribeNegationTo(os);
+ } else {
+ P::Get(m).DescribeTo(os);
+ }
+ }
+
+ template <typename P>
+ static const MatcherDescriberInterface* GetDescriberImpl(
+ const MatcherBase& m) {
+ // If the impl is a MatcherDescriberInterface, then return it.
+ // Otherwise use MatcherBase itself.
+ // This allows us to implement the GetDescriber() function without support
+ // from the impl, but some users really want to get their impl back when
+ // they call GetDescriber().
+ // We use std::get on a tuple as a workaround of not having `if constexpr`.
+ return std::get<(
+ std::is_convertible<decltype(&P::Get(m)),
+ const MatcherDescriberInterface*>::value
+ ? 1
+ : 0)>(std::make_tuple(&m, &P::Get(m)));
+ }
+
+ template <typename P>
+ const VTable* GetVTable() {
+ static constexpr VTable kVTable = {&MatchAndExplainImpl<P>,
+ &DescribeImpl<P>, &GetDescriberImpl<P>,
+ P::shared_destroy};
+ return &kVTable;
+ }
+
+ union Buffer {
+ // Add some types to give Buffer some common alignment/size use cases.
+ void* ptr;
+ double d;
+ int64_t i;
+ // And add one for the out-of-line cases.
+ SharedPayloadBase* shared;
+ };
+
+ void Destroy() {
+ if (IsShared() && buffer_.shared->Unref()) {
+ vtable_->shared_destroy(buffer_.shared);
+ }
+ }
+
+ template <typename M>
+ static constexpr bool IsInlined() {
+ return sizeof(M) <= sizeof(Buffer) && alignof(M) <= alignof(Buffer) &&
+ std::is_trivially_copy_constructible<M>::value &&
+ std::is_trivially_destructible<M>::value;
+ }
+
+ template <typename M, bool = MatcherBase::IsInlined<M>()>
+ struct ValuePolicy {
+ static const M& Get(const MatcherBase& m) {
+ // When inlined along with Init, need to be explicit to avoid violating
+ // strict aliasing rules.
+ const M* ptr =
+ static_cast<const M*>(static_cast<const void*>(&m.buffer_));
+ return *ptr;
+ }
+ static void Init(MatcherBase& m, M impl) {
+ ::new (static_cast<void*>(&m.buffer_)) M(impl);
+ }
+ static constexpr auto shared_destroy = nullptr;
+ };
+
+ template <typename M>
+ struct ValuePolicy<M, false> {
+ using Shared = SharedPayload<M>;
+ static const M& Get(const MatcherBase& m) {
+ return static_cast<Shared*>(m.buffer_.shared)->value;
+ }
+ template <typename Arg>
+ static void Init(MatcherBase& m, Arg&& arg) {
+ m.buffer_.shared = new Shared(std::forward<Arg>(arg));
+ }
+ static constexpr auto shared_destroy = &Shared::Destroy;
+ };
+
+ template <typename U, bool B>
+ struct ValuePolicy<const MatcherInterface<U>*, B> {
+ using M = const MatcherInterface<U>;
+ using Shared = SharedPayload<std::unique_ptr<M>>;
+ static const M& Get(const MatcherBase& m) {
+ return *static_cast<Shared*>(m.buffer_.shared)->value;
+ }
+ static void Init(MatcherBase& m, M* impl) {
+ m.buffer_.shared = new Shared(std::unique_ptr<M>(impl));
+ }
+
+ static constexpr auto shared_destroy = &Shared::Destroy;
+ };
+
+ template <typename M>
+ void Init(M&& m) {
+ using MM = typename std::decay<M>::type;
+ using Policy = ValuePolicy<MM>;
+ vtable_ = GetVTable<Policy>();
+ Policy::Init(*this, std::forward<M>(m));
+ }
+
+ const VTable* vtable_;
+ Buffer buffer_;
+};
+
+} // namespace internal
+
+// A Matcher<T> is a copyable and IMMUTABLE (except by assignment)
+// object that can check whether a value of type T matches. The
+// implementation of Matcher<T> is just a std::shared_ptr to const
+// MatcherInterface<T>. Don't inherit from Matcher!
+template <typename T>
+class Matcher : public internal::MatcherBase<T> {
+ public:
+ // Constructs a null matcher. Needed for storing Matcher objects in STL
+ // containers. A default-constructed matcher is not yet initialized. You
+ // cannot use it until a valid value has been assigned to it.
+ explicit Matcher() {} // NOLINT
+
+ // Constructs a matcher from its implementation.
+ explicit Matcher(const MatcherInterface<const T&>* impl)
+ : internal::MatcherBase<T>(impl) {}
+
+ template <typename U>
+ explicit Matcher(
+ const MatcherInterface<U>* impl,
+ typename std::enable_if<!std::is_same<U, const U&>::value>::type* =
+ nullptr)
+ : internal::MatcherBase<T>(impl) {}
+
+ template <typename M, typename = typename std::remove_reference<
+ M>::type::is_gtest_matcher>
+ Matcher(M&& m) : internal::MatcherBase<T>(std::forward<M>(m)) {} // NOLINT
+
+ // Implicit constructor here allows people to write
+ // EXPECT_CALL(foo, Bar(5)) instead of EXPECT_CALL(foo, Bar(Eq(5))) sometimes
+ Matcher(T value); // NOLINT
+};
+
+// The following two specializations allow the user to write str
+// instead of Eq(str) and "foo" instead of Eq("foo") when a std::string
+// matcher is expected.
+template <>
+class GTEST_API_ Matcher<const std::string&>
+ : public internal::MatcherBase<const std::string&> {
+ public:
+ Matcher() {}
+
+ explicit Matcher(const MatcherInterface<const std::string&>* impl)
+ : internal::MatcherBase<const std::string&>(impl) {}
+
+ template <typename M, typename = typename std::remove_reference<
+ M>::type::is_gtest_matcher>
+ Matcher(M&& m) // NOLINT
+ : internal::MatcherBase<const std::string&>(std::forward<M>(m)) {}
+
+ // Allows the user to write str instead of Eq(str) sometimes, where
+ // str is a std::string object.
+ Matcher(const std::string& s); // NOLINT
+
+ // Allows the user to write "foo" instead of Eq("foo") sometimes.
+ Matcher(const char* s); // NOLINT
+};
+
+template <>
+class GTEST_API_ Matcher<std::string>
+ : public internal::MatcherBase<std::string> {
+ public:
+ Matcher() {}
+
+ explicit Matcher(const MatcherInterface<const std::string&>* impl)
+ : internal::MatcherBase<std::string>(impl) {}
+ explicit Matcher(const MatcherInterface<std::string>* impl)
+ : internal::MatcherBase<std::string>(impl) {}
+
+ template <typename M, typename = typename std::remove_reference<
+ M>::type::is_gtest_matcher>
+ Matcher(M&& m) // NOLINT
+ : internal::MatcherBase<std::string>(std::forward<M>(m)) {}
+
+ // Allows the user to write str instead of Eq(str) sometimes, where
+ // str is a string object.
+ Matcher(const std::string& s); // NOLINT
+
+ // Allows the user to write "foo" instead of Eq("foo") sometimes.
+ Matcher(const char* s); // NOLINT
+};
+
+#if GTEST_INTERNAL_HAS_STRING_VIEW
+// The following two specializations allow the user to write str
+// instead of Eq(str) and "foo" instead of Eq("foo") when a absl::string_view
+// matcher is expected.
+template <>
+class GTEST_API_ Matcher<const internal::StringView&>
+ : public internal::MatcherBase<const internal::StringView&> {
+ public:
+ Matcher() {}
+
+ explicit Matcher(const MatcherInterface<const internal::StringView&>* impl)
+ : internal::MatcherBase<const internal::StringView&>(impl) {}
+
+ template <typename M, typename = typename std::remove_reference<
+ M>::type::is_gtest_matcher>
+ Matcher(M&& m) // NOLINT
+ : internal::MatcherBase<const internal::StringView&>(std::forward<M>(m)) {
+ }
+
+ // Allows the user to write str instead of Eq(str) sometimes, where
+ // str is a std::string object.
+ Matcher(const std::string& s); // NOLINT
+
+ // Allows the user to write "foo" instead of Eq("foo") sometimes.
+ Matcher(const char* s); // NOLINT
+
+ // Allows the user to pass absl::string_views or std::string_views directly.
+ Matcher(internal::StringView s); // NOLINT
+};
+
+template <>
+class GTEST_API_ Matcher<internal::StringView>
+ : public internal::MatcherBase<internal::StringView> {
+ public:
+ Matcher() {}
+
+ explicit Matcher(const MatcherInterface<const internal::StringView&>* impl)
+ : internal::MatcherBase<internal::StringView>(impl) {}
+ explicit Matcher(const MatcherInterface<internal::StringView>* impl)
+ : internal::MatcherBase<internal::StringView>(impl) {}
+
+ template <typename M, typename = typename std::remove_reference<
+ M>::type::is_gtest_matcher>
+ Matcher(M&& m) // NOLINT
+ : internal::MatcherBase<internal::StringView>(std::forward<M>(m)) {}
+
+ // Allows the user to write str instead of Eq(str) sometimes, where
+ // str is a std::string object.
+ Matcher(const std::string& s); // NOLINT
+
+ // Allows the user to write "foo" instead of Eq("foo") sometimes.
+ Matcher(const char* s); // NOLINT
+
+ // Allows the user to pass absl::string_views or std::string_views directly.
+ Matcher(internal::StringView s); // NOLINT
+};
+#endif // GTEST_INTERNAL_HAS_STRING_VIEW
+
+// Prints a matcher in a human-readable format.
+template <typename T>
+std::ostream& operator<<(std::ostream& os, const Matcher<T>& matcher) {
+ matcher.DescribeTo(&os);
+ return os;
+}
+
+// The PolymorphicMatcher class template makes it easy to implement a
+// polymorphic matcher (i.e. a matcher that can match values of more
+// than one type, e.g. Eq(n) and NotNull()).
+//
+// To define a polymorphic matcher, a user should provide an Impl
+// class that has a DescribeTo() method and a DescribeNegationTo()
+// method, and define a member function (or member function template)
+//
+// bool MatchAndExplain(const Value& value,
+// MatchResultListener* listener) const;
+//
+// See the definition of NotNull() for a complete example.
+template <class Impl>
+class PolymorphicMatcher {
+ public:
+ explicit PolymorphicMatcher(const Impl& an_impl) : impl_(an_impl) {}
+
+ // Returns a mutable reference to the underlying matcher
+ // implementation object.
+ Impl& mutable_impl() { return impl_; }
+
+ // Returns an immutable reference to the underlying matcher
+ // implementation object.
+ const Impl& impl() const { return impl_; }
+
+ template <typename T>
+ operator Matcher<T>() const {
+ return Matcher<T>(new MonomorphicImpl<const T&>(impl_));
+ }
+
+ private:
+ template <typename T>
+ class MonomorphicImpl : public MatcherInterface<T> {
+ public:
+ explicit MonomorphicImpl(const Impl& impl) : impl_(impl) {}
+
+ void DescribeTo(::std::ostream* os) const override { impl_.DescribeTo(os); }
+
+ void DescribeNegationTo(::std::ostream* os) const override {
+ impl_.DescribeNegationTo(os);
+ }
+
+ bool MatchAndExplain(T x, MatchResultListener* listener) const override {
+ return impl_.MatchAndExplain(x, listener);
+ }
+
+ private:
+ const Impl impl_;
+ };
+
+ Impl impl_;
+};
+
+// Creates a matcher from its implementation.
+// DEPRECATED: Especially in the generic code, prefer:
+// Matcher<T>(new MyMatcherImpl<const T&>(...));
+//
+// MakeMatcher may create a Matcher that accepts its argument by value, which
+// leads to unnecessary copies & lack of support for non-copyable types.
+template <typename T>
+inline Matcher<T> MakeMatcher(const MatcherInterface<T>* impl) {
+ return Matcher<T>(impl);
+}
+
+// Creates a polymorphic matcher from its implementation. This is
+// easier to use than the PolymorphicMatcher<Impl> constructor as it
+// doesn't require you to explicitly write the template argument, e.g.
+//
+// MakePolymorphicMatcher(foo);
+// vs
+// PolymorphicMatcher<TypeOfFoo>(foo);
+template <class Impl>
+inline PolymorphicMatcher<Impl> MakePolymorphicMatcher(const Impl& impl) {
+ return PolymorphicMatcher<Impl>(impl);
+}
+
+namespace internal {
+// Implements a matcher that compares a given value with a
+// pre-supplied value using one of the ==, <=, <, etc, operators. The
+// two values being compared don't have to have the same type.
+//
+// The matcher defined here is polymorphic (for example, Eq(5) can be
+// used to match an int, a short, a double, etc). Therefore we use
+// a template type conversion operator in the implementation.
+//
+// The following template definition assumes that the Rhs parameter is
+// a "bare" type (i.e. neither 'const T' nor 'T&').
+template <typename D, typename Rhs, typename Op>
+class ComparisonBase {
+ public:
+ explicit ComparisonBase(const Rhs& rhs) : rhs_(rhs) {}
+
+ using is_gtest_matcher = void;
+
+ template <typename Lhs>
+ bool MatchAndExplain(const Lhs& lhs, std::ostream*) const {
+ return Op()(lhs, Unwrap(rhs_));
+ }
+ void DescribeTo(std::ostream* os) const {
+ *os << D::Desc() << " ";
+ UniversalPrint(Unwrap(rhs_), os);
+ }
+ void DescribeNegationTo(std::ostream* os) const {
+ *os << D::NegatedDesc() << " ";
+ UniversalPrint(Unwrap(rhs_), os);
+ }
+
+ private:
+ template <typename T>
+ static const T& Unwrap(const T& v) {
+ return v;
+ }
+ template <typename T>
+ static const T& Unwrap(std::reference_wrapper<T> v) {
+ return v;
+ }
+
+ Rhs rhs_;
+};
+
+template <typename Rhs>
+class EqMatcher : public ComparisonBase<EqMatcher<Rhs>, Rhs, AnyEq> {
+ public:
+ explicit EqMatcher(const Rhs& rhs)
+ : ComparisonBase<EqMatcher<Rhs>, Rhs, AnyEq>(rhs) {}
+ static const char* Desc() { return "is equal to"; }
+ static const char* NegatedDesc() { return "isn't equal to"; }
+};
+template <typename Rhs>
+class NeMatcher : public ComparisonBase<NeMatcher<Rhs>, Rhs, AnyNe> {
+ public:
+ explicit NeMatcher(const Rhs& rhs)
+ : ComparisonBase<NeMatcher<Rhs>, Rhs, AnyNe>(rhs) {}
+ static const char* Desc() { return "isn't equal to"; }
+ static const char* NegatedDesc() { return "is equal to"; }
+};
+template <typename Rhs>
+class LtMatcher : public ComparisonBase<LtMatcher<Rhs>, Rhs, AnyLt> {
+ public:
+ explicit LtMatcher(const Rhs& rhs)
+ : ComparisonBase<LtMatcher<Rhs>, Rhs, AnyLt>(rhs) {}
+ static const char* Desc() { return "is <"; }
+ static const char* NegatedDesc() { return "isn't <"; }
+};
+template <typename Rhs>
+class GtMatcher : public ComparisonBase<GtMatcher<Rhs>, Rhs, AnyGt> {
+ public:
+ explicit GtMatcher(const Rhs& rhs)
+ : ComparisonBase<GtMatcher<Rhs>, Rhs, AnyGt>(rhs) {}
+ static const char* Desc() { return "is >"; }
+ static const char* NegatedDesc() { return "isn't >"; }
+};
+template <typename Rhs>
+class LeMatcher : public ComparisonBase<LeMatcher<Rhs>, Rhs, AnyLe> {
+ public:
+ explicit LeMatcher(const Rhs& rhs)
+ : ComparisonBase<LeMatcher<Rhs>, Rhs, AnyLe>(rhs) {}
+ static const char* Desc() { return "is <="; }
+ static const char* NegatedDesc() { return "isn't <="; }
+};
+template <typename Rhs>
+class GeMatcher : public ComparisonBase<GeMatcher<Rhs>, Rhs, AnyGe> {
+ public:
+ explicit GeMatcher(const Rhs& rhs)
+ : ComparisonBase<GeMatcher<Rhs>, Rhs, AnyGe>(rhs) {}
+ static const char* Desc() { return "is >="; }
+ static const char* NegatedDesc() { return "isn't >="; }
+};
+
+template <typename T, typename = typename std::enable_if<
+ std::is_constructible<std::string, T>::value>::type>
+using StringLike = T;
+
+// Implements polymorphic matchers MatchesRegex(regex) and
+// ContainsRegex(regex), which can be used as a Matcher<T> as long as
+// T can be converted to a string.
+class MatchesRegexMatcher {
+ public:
+ MatchesRegexMatcher(const RE* regex, bool full_match)
+ : regex_(regex), full_match_(full_match) {}
+
+#if GTEST_INTERNAL_HAS_STRING_VIEW
+ bool MatchAndExplain(const internal::StringView& s,
+ MatchResultListener* listener) const {
+ return MatchAndExplain(std::string(s), listener);
+ }
+#endif // GTEST_INTERNAL_HAS_STRING_VIEW
+
+ // Accepts pointer types, particularly:
+ // const char*
+ // char*
+ // const wchar_t*
+ // wchar_t*
+ template <typename CharType>
+ bool MatchAndExplain(CharType* s, MatchResultListener* listener) const {
+ return s != nullptr && MatchAndExplain(std::string(s), listener);
+ }
+
+ // Matches anything that can convert to std::string.
+ //
+ // This is a template, not just a plain function with const std::string&,
+ // because absl::string_view has some interfering non-explicit constructors.
+ template <class MatcheeStringType>
+ bool MatchAndExplain(const MatcheeStringType& s,
+ MatchResultListener* /* listener */) const {
+ const std::string& s2(s);
+ return full_match_ ? RE::FullMatch(s2, *regex_)
+ : RE::PartialMatch(s2, *regex_);
+ }
+
+ void DescribeTo(::std::ostream* os) const {
+ *os << (full_match_ ? "matches" : "contains") << " regular expression ";
+ UniversalPrinter<std::string>::Print(regex_->pattern(), os);
+ }
+
+ void DescribeNegationTo(::std::ostream* os) const {
+ *os << "doesn't " << (full_match_ ? "match" : "contain")
+ << " regular expression ";
+ UniversalPrinter<std::string>::Print(regex_->pattern(), os);
+ }
+
+ private:
+ const std::shared_ptr<const RE> regex_;
+ const bool full_match_;
+};
+} // namespace internal
+
+// Matches a string that fully matches regular expression 'regex'.
+// The matcher takes ownership of 'regex'.
+inline PolymorphicMatcher<internal::MatchesRegexMatcher> MatchesRegex(
+ const internal::RE* regex) {
+ return MakePolymorphicMatcher(internal::MatchesRegexMatcher(regex, true));
+}
+template <typename T = std::string>
+PolymorphicMatcher<internal::MatchesRegexMatcher> MatchesRegex(
+ const internal::StringLike<T>& regex) {
+ return MatchesRegex(new internal::RE(std::string(regex)));
+}
+
+// Matches a string that contains regular expression 'regex'.
+// The matcher takes ownership of 'regex'.
+inline PolymorphicMatcher<internal::MatchesRegexMatcher> ContainsRegex(
+ const internal::RE* regex) {
+ return MakePolymorphicMatcher(internal::MatchesRegexMatcher(regex, false));
+}
+template <typename T = std::string>
+PolymorphicMatcher<internal::MatchesRegexMatcher> ContainsRegex(
+ const internal::StringLike<T>& regex) {
+ return ContainsRegex(new internal::RE(std::string(regex)));
+}
+
+// Creates a polymorphic matcher that matches anything equal to x.
+// Note: if the parameter of Eq() were declared as const T&, Eq("foo")
+// wouldn't compile.
+template <typename T>
+inline internal::EqMatcher<T> Eq(T x) {
+ return internal::EqMatcher<T>(x);
+}
+
+// Constructs a Matcher<T> from a 'value' of type T. The constructed
+// matcher matches any value that's equal to 'value'.
+template <typename T>
+Matcher<T>::Matcher(T value) {
+ *this = Eq(value);
+}
+
+// Creates a monomorphic matcher that matches anything with type Lhs
+// and equal to rhs. A user may need to use this instead of Eq(...)
+// in order to resolve an overloading ambiguity.
+//
+// TypedEq<T>(x) is just a convenient short-hand for Matcher<T>(Eq(x))
+// or Matcher<T>(x), but more readable than the latter.
+//
+// We could define similar monomorphic matchers for other comparison
+// operations (e.g. TypedLt, TypedGe, and etc), but decided not to do
+// it yet as those are used much less than Eq() in practice. A user
+// can always write Matcher<T>(Lt(5)) to be explicit about the type,
+// for example.
+template <typename Lhs, typename Rhs>
+inline Matcher<Lhs> TypedEq(const Rhs& rhs) {
+ return Eq(rhs);
+}
+
+// Creates a polymorphic matcher that matches anything >= x.
+template <typename Rhs>
+inline internal::GeMatcher<Rhs> Ge(Rhs x) {
+ return internal::GeMatcher<Rhs>(x);
+}
+
+// Creates a polymorphic matcher that matches anything > x.
+template <typename Rhs>
+inline internal::GtMatcher<Rhs> Gt(Rhs x) {
+ return internal::GtMatcher<Rhs>(x);
+}
+
+// Creates a polymorphic matcher that matches anything <= x.
+template <typename Rhs>
+inline internal::LeMatcher<Rhs> Le(Rhs x) {
+ return internal::LeMatcher<Rhs>(x);
+}
+
+// Creates a polymorphic matcher that matches anything < x.
+template <typename Rhs>
+inline internal::LtMatcher<Rhs> Lt(Rhs x) {
+ return internal::LtMatcher<Rhs>(x);
+}
+
+// Creates a polymorphic matcher that matches anything != x.
+template <typename Rhs>
+inline internal::NeMatcher<Rhs> Ne(Rhs x) {
+ return internal::NeMatcher<Rhs>(x);
+}
+} // namespace testing
+
+GTEST_DISABLE_MSC_WARNINGS_POP_() // 4251 5046
+
+#endif // GOOGLETEST_INCLUDE_GTEST_GTEST_MATCHERS_H_
diff --git a/media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest-message.h b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest-message.h
new file mode 100644
index 0000000000..6c8bf90009
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest-message.h
@@ -0,0 +1,218 @@
+// Copyright 2005, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// The Google C++ Testing and Mocking Framework (Google Test)
+//
+// This header file defines the Message class.
+//
+// IMPORTANT NOTE: Due to limitation of the C++ language, we have to
+// leave some internal implementation details in this header file.
+// They are clearly marked by comments like this:
+//
+// // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+//
+// Such code is NOT meant to be used by a user directly, and is subject
+// to CHANGE WITHOUT NOTICE. Therefore DO NOT DEPEND ON IT in a user
+// program!
+
+// IWYU pragma: private, include "gtest/gtest.h"
+// IWYU pragma: friend gtest/.*
+// IWYU pragma: friend gmock/.*
+
+#ifndef GOOGLETEST_INCLUDE_GTEST_GTEST_MESSAGE_H_
+#define GOOGLETEST_INCLUDE_GTEST_GTEST_MESSAGE_H_
+
+#include <limits>
+#include <memory>
+#include <sstream>
+
+#include "gtest/internal/gtest-port.h"
+
+GTEST_DISABLE_MSC_WARNINGS_PUSH_(4251 \
+/* class A needs to have dll-interface to be used by clients of class B */)
+
+// Ensures that there is at least one operator<< in the global namespace.
+// See Message& operator<<(...) below for why.
+void operator<<(const testing::internal::Secret&, int);
+
+namespace testing {
+
+// The Message class works like an ostream repeater.
+//
+// Typical usage:
+//
+// 1. You stream a bunch of values to a Message object.
+// It will remember the text in a stringstream.
+// 2. Then you stream the Message object to an ostream.
+// This causes the text in the Message to be streamed
+// to the ostream.
+//
+// For example;
+//
+// testing::Message foo;
+// foo << 1 << " != " << 2;
+// std::cout << foo;
+//
+// will print "1 != 2".
+//
+// Message is not intended to be inherited from. In particular, its
+// destructor is not virtual.
+//
+// Note that stringstream behaves differently in gcc and in MSVC. You
+// can stream a NULL char pointer to it in the former, but not in the
+// latter (it causes an access violation if you do). The Message
+// class hides this difference by treating a NULL char pointer as
+// "(null)".
+class GTEST_API_ Message {
+ private:
+ // The type of basic IO manipulators (endl, ends, and flush) for
+ // narrow streams.
+ typedef std::ostream& (*BasicNarrowIoManip)(std::ostream&);
+
+ public:
+ // Constructs an empty Message.
+ Message();
+
+ // Copy constructor.
+ Message(const Message& msg) : ss_(new ::std::stringstream) { // NOLINT
+ *ss_ << msg.GetString();
+ }
+
+ // Constructs a Message from a C-string.
+ explicit Message(const char* str) : ss_(new ::std::stringstream) {
+ *ss_ << str;
+ }
+
+ // Streams a non-pointer value to this object.
+ template <typename T>
+ inline Message& operator<<(const T& val) {
+ // Some libraries overload << for STL containers. These
+ // overloads are defined in the global namespace instead of ::std.
+ //
+ // C++'s symbol lookup rule (i.e. Koenig lookup) says that these
+ // overloads are visible in either the std namespace or the global
+ // namespace, but not other namespaces, including the testing
+ // namespace which Google Test's Message class is in.
+ //
+ // To allow STL containers (and other types that has a << operator
+ // defined in the global namespace) to be used in Google Test
+ // assertions, testing::Message must access the custom << operator
+ // from the global namespace. With this using declaration,
+ // overloads of << defined in the global namespace and those
+ // visible via Koenig lookup are both exposed in this function.
+ using ::operator<<;
+ *ss_ << val;
+ return *this;
+ }
+
+ // Streams a pointer value to this object.
+ //
+ // This function is an overload of the previous one. When you
+ // stream a pointer to a Message, this definition will be used as it
+ // is more specialized. (The C++ Standard, section
+ // [temp.func.order].) If you stream a non-pointer, then the
+ // previous definition will be used.
+ //
+ // The reason for this overload is that streaming a NULL pointer to
+ // ostream is undefined behavior. Depending on the compiler, you
+ // may get "0", "(nil)", "(null)", or an access violation. To
+ // ensure consistent result across compilers, we always treat NULL
+ // as "(null)".
+ template <typename T>
+ inline Message& operator<<(T* const& pointer) { // NOLINT
+ if (pointer == nullptr) {
+ *ss_ << "(null)";
+ } else {
+ *ss_ << pointer;
+ }
+ return *this;
+ }
+
+ // Since the basic IO manipulators are overloaded for both narrow
+ // and wide streams, we have to provide this specialized definition
+ // of operator <<, even though its body is the same as the
+ // templatized version above. Without this definition, streaming
+ // endl or other basic IO manipulators to Message will confuse the
+ // compiler.
+ Message& operator<<(BasicNarrowIoManip val) {
+ *ss_ << val;
+ return *this;
+ }
+
+ // Instead of 1/0, we want to see true/false for bool values.
+ Message& operator<<(bool b) { return *this << (b ? "true" : "false"); }
+
+ // These two overloads allow streaming a wide C string to a Message
+ // using the UTF-8 encoding.
+ Message& operator<<(const wchar_t* wide_c_str);
+ Message& operator<<(wchar_t* wide_c_str);
+
+#if GTEST_HAS_STD_WSTRING
+ // Converts the given wide string to a narrow string using the UTF-8
+ // encoding, and streams the result to this Message object.
+ Message& operator<<(const ::std::wstring& wstr);
+#endif // GTEST_HAS_STD_WSTRING
+
+ // Gets the text streamed to this object so far as an std::string.
+ // Each '\0' character in the buffer is replaced with "\\0".
+ //
+ // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+ std::string GetString() const;
+
+ private:
+ // We'll hold the text streamed to this object here.
+ const std::unique_ptr< ::std::stringstream> ss_;
+
+ // We declare (but don't implement) this to prevent the compiler
+ // from implementing the assignment operator.
+ void operator=(const Message&);
+};
+
+// Streams a Message to an ostream.
+inline std::ostream& operator<<(std::ostream& os, const Message& sb) {
+ return os << sb.GetString();
+}
+
+namespace internal {
+
+// Converts a streamable value to an std::string. A NULL pointer is
+// converted to "(null)". When the input value is a ::string,
+// ::std::string, ::wstring, or ::std::wstring object, each NUL
+// character in it is replaced with "\\0".
+template <typename T>
+std::string StreamableToString(const T& streamable) {
+ return (Message() << streamable).GetString();
+}
+
+} // namespace internal
+} // namespace testing
+
+GTEST_DISABLE_MSC_WARNINGS_POP_() // 4251
+
+#endif // GOOGLETEST_INCLUDE_GTEST_GTEST_MESSAGE_H_
diff --git a/media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest-param-test.h b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest-param-test.h
new file mode 100644
index 0000000000..b55119ac62
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest-param-test.h
@@ -0,0 +1,510 @@
+// Copyright 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Macros and functions for implementing parameterized tests
+// in Google C++ Testing and Mocking Framework (Google Test)
+
+// IWYU pragma: private, include "gtest/gtest.h"
+// IWYU pragma: friend gtest/.*
+// IWYU pragma: friend gmock/.*
+
+#ifndef GOOGLETEST_INCLUDE_GTEST_GTEST_PARAM_TEST_H_
+#define GOOGLETEST_INCLUDE_GTEST_GTEST_PARAM_TEST_H_
+
+// Value-parameterized tests allow you to test your code with different
+// parameters without writing multiple copies of the same test.
+//
+// Here is how you use value-parameterized tests:
+
+#if 0
+
+// To write value-parameterized tests, first you should define a fixture
+// class. It is usually derived from testing::TestWithParam<T> (see below for
+// another inheritance scheme that's sometimes useful in more complicated
+// class hierarchies), where the type of your parameter values.
+// TestWithParam<T> is itself derived from testing::Test. T can be any
+// copyable type. If it's a raw pointer, you are responsible for managing the
+// lifespan of the pointed values.
+
+class FooTest : public ::testing::TestWithParam<const char*> {
+ // You can implement all the usual class fixture members here.
+};
+
+// Then, use the TEST_P macro to define as many parameterized tests
+// for this fixture as you want. The _P suffix is for "parameterized"
+// or "pattern", whichever you prefer to think.
+
+TEST_P(FooTest, DoesBlah) {
+ // Inside a test, access the test parameter with the GetParam() method
+ // of the TestWithParam<T> class:
+ EXPECT_TRUE(foo.Blah(GetParam()));
+ ...
+}
+
+TEST_P(FooTest, HasBlahBlah) {
+ ...
+}
+
+// Finally, you can use INSTANTIATE_TEST_SUITE_P to instantiate the test
+// case with any set of parameters you want. Google Test defines a number
+// of functions for generating test parameters. They return what we call
+// (surprise!) parameter generators. Here is a summary of them, which
+// are all in the testing namespace:
+//
+//
+// Range(begin, end [, step]) - Yields values {begin, begin+step,
+// begin+step+step, ...}. The values do not
+// include end. step defaults to 1.
+// Values(v1, v2, ..., vN) - Yields values {v1, v2, ..., vN}.
+// ValuesIn(container) - Yields values from a C-style array, an STL
+// ValuesIn(begin,end) container, or an iterator range [begin, end).
+// Bool() - Yields sequence {false, true}.
+// Combine(g1, g2, ..., gN) - Yields all combinations (the Cartesian product
+// for the math savvy) of the values generated
+// by the N generators.
+//
+// For more details, see comments at the definitions of these functions below
+// in this file.
+//
+// The following statement will instantiate tests from the FooTest test suite
+// each with parameter values "meeny", "miny", and "moe".
+
+INSTANTIATE_TEST_SUITE_P(InstantiationName,
+ FooTest,
+ Values("meeny", "miny", "moe"));
+
+// To distinguish different instances of the pattern, (yes, you
+// can instantiate it more than once) the first argument to the
+// INSTANTIATE_TEST_SUITE_P macro is a prefix that will be added to the
+// actual test suite name. Remember to pick unique prefixes for different
+// instantiations. The tests from the instantiation above will have
+// these names:
+//
+// * InstantiationName/FooTest.DoesBlah/0 for "meeny"
+// * InstantiationName/FooTest.DoesBlah/1 for "miny"
+// * InstantiationName/FooTest.DoesBlah/2 for "moe"
+// * InstantiationName/FooTest.HasBlahBlah/0 for "meeny"
+// * InstantiationName/FooTest.HasBlahBlah/1 for "miny"
+// * InstantiationName/FooTest.HasBlahBlah/2 for "moe"
+//
+// You can use these names in --gtest_filter.
+//
+// This statement will instantiate all tests from FooTest again, each
+// with parameter values "cat" and "dog":
+
+const char* pets[] = {"cat", "dog"};
+INSTANTIATE_TEST_SUITE_P(AnotherInstantiationName, FooTest, ValuesIn(pets));
+
+// The tests from the instantiation above will have these names:
+//
+// * AnotherInstantiationName/FooTest.DoesBlah/0 for "cat"
+// * AnotherInstantiationName/FooTest.DoesBlah/1 for "dog"
+// * AnotherInstantiationName/FooTest.HasBlahBlah/0 for "cat"
+// * AnotherInstantiationName/FooTest.HasBlahBlah/1 for "dog"
+//
+// Please note that INSTANTIATE_TEST_SUITE_P will instantiate all tests
+// in the given test suite, whether their definitions come before or
+// AFTER the INSTANTIATE_TEST_SUITE_P statement.
+//
+// Please also note that generator expressions (including parameters to the
+// generators) are evaluated in InitGoogleTest(), after main() has started.
+// This allows the user on one hand, to adjust generator parameters in order
+// to dynamically determine a set of tests to run and on the other hand,
+// give the user a chance to inspect the generated tests with Google Test
+// reflection API before RUN_ALL_TESTS() is executed.
+//
+// You can see samples/sample7_unittest.cc and samples/sample8_unittest.cc
+// for more examples.
+//
+// In the future, we plan to publish the API for defining new parameter
+// generators. But for now this interface remains part of the internal
+// implementation and is subject to change.
+//
+//
+// A parameterized test fixture must be derived from testing::Test and from
+// testing::WithParamInterface<T>, where T is the type of the parameter
+// values. Inheriting from TestWithParam<T> satisfies that requirement because
+// TestWithParam<T> inherits from both Test and WithParamInterface. In more
+// complicated hierarchies, however, it is occasionally useful to inherit
+// separately from Test and WithParamInterface. For example:
+
+class BaseTest : public ::testing::Test {
+ // You can inherit all the usual members for a non-parameterized test
+ // fixture here.
+};
+
+class DerivedTest : public BaseTest, public ::testing::WithParamInterface<int> {
+ // The usual test fixture members go here too.
+};
+
+TEST_F(BaseTest, HasFoo) {
+ // This is an ordinary non-parameterized test.
+}
+
+TEST_P(DerivedTest, DoesBlah) {
+ // GetParam works just the same here as if you inherit from TestWithParam.
+ EXPECT_TRUE(foo.Blah(GetParam()));
+}
+
+#endif // 0
+
+#include <iterator>
+#include <utility>
+
+#include "gtest/internal/gtest-internal.h"
+#include "gtest/internal/gtest-param-util.h"
+#include "gtest/internal/gtest-port.h"
+
+namespace testing {
+
+// Functions producing parameter generators.
+//
+// Google Test uses these generators to produce parameters for value-
+// parameterized tests. When a parameterized test suite is instantiated
+// with a particular generator, Google Test creates and runs tests
+// for each element in the sequence produced by the generator.
+//
+// In the following sample, tests from test suite FooTest are instantiated
+// each three times with parameter values 3, 5, and 8:
+//
+// class FooTest : public TestWithParam<int> { ... };
+//
+// TEST_P(FooTest, TestThis) {
+// }
+// TEST_P(FooTest, TestThat) {
+// }
+// INSTANTIATE_TEST_SUITE_P(TestSequence, FooTest, Values(3, 5, 8));
+//
+
+// Range() returns generators providing sequences of values in a range.
+//
+// Synopsis:
+// Range(start, end)
+// - returns a generator producing a sequence of values {start, start+1,
+// start+2, ..., }.
+// Range(start, end, step)
+// - returns a generator producing a sequence of values {start, start+step,
+// start+step+step, ..., }.
+// Notes:
+// * The generated sequences never include end. For example, Range(1, 5)
+// returns a generator producing a sequence {1, 2, 3, 4}. Range(1, 9, 2)
+// returns a generator producing {1, 3, 5, 7}.
+// * start and end must have the same type. That type may be any integral or
+// floating-point type or a user defined type satisfying these conditions:
+// * It must be assignable (have operator=() defined).
+// * It must have operator+() (operator+(int-compatible type) for
+// two-operand version).
+// * It must have operator<() defined.
+// Elements in the resulting sequences will also have that type.
+// * Condition start < end must be satisfied in order for resulting sequences
+// to contain any elements.
+//
+template <typename T, typename IncrementT>
+internal::ParamGenerator<T> Range(T start, T end, IncrementT step) {
+ return internal::ParamGenerator<T>(
+ new internal::RangeGenerator<T, IncrementT>(start, end, step));
+}
+
+template <typename T>
+internal::ParamGenerator<T> Range(T start, T end) {
+ return Range(start, end, 1);
+}
+
+// ValuesIn() function allows generation of tests with parameters coming from
+// a container.
+//
+// Synopsis:
+// ValuesIn(const T (&array)[N])
+// - returns a generator producing sequences with elements from
+// a C-style array.
+// ValuesIn(const Container& container)
+// - returns a generator producing sequences with elements from
+// an STL-style container.
+// ValuesIn(Iterator begin, Iterator end)
+// - returns a generator producing sequences with elements from
+// a range [begin, end) defined by a pair of STL-style iterators. These
+// iterators can also be plain C pointers.
+//
+// Please note that ValuesIn copies the values from the containers
+// passed in and keeps them to generate tests in RUN_ALL_TESTS().
+//
+// Examples:
+//
+// This instantiates tests from test suite StringTest
+// each with C-string values of "foo", "bar", and "baz":
+//
+// const char* strings[] = {"foo", "bar", "baz"};
+// INSTANTIATE_TEST_SUITE_P(StringSequence, StringTest, ValuesIn(strings));
+//
+// This instantiates tests from test suite StlStringTest
+// each with STL strings with values "a" and "b":
+//
+// ::std::vector< ::std::string> GetParameterStrings() {
+// ::std::vector< ::std::string> v;
+// v.push_back("a");
+// v.push_back("b");
+// return v;
+// }
+//
+// INSTANTIATE_TEST_SUITE_P(CharSequence,
+// StlStringTest,
+// ValuesIn(GetParameterStrings()));
+//
+//
+// This will also instantiate tests from CharTest
+// each with parameter values 'a' and 'b':
+//
+// ::std::list<char> GetParameterChars() {
+// ::std::list<char> list;
+// list.push_back('a');
+// list.push_back('b');
+// return list;
+// }
+// ::std::list<char> l = GetParameterChars();
+// INSTANTIATE_TEST_SUITE_P(CharSequence2,
+// CharTest,
+// ValuesIn(l.begin(), l.end()));
+//
+template <typename ForwardIterator>
+internal::ParamGenerator<
+ typename std::iterator_traits<ForwardIterator>::value_type>
+ValuesIn(ForwardIterator begin, ForwardIterator end) {
+ typedef typename std::iterator_traits<ForwardIterator>::value_type ParamType;
+ return internal::ParamGenerator<ParamType>(
+ new internal::ValuesInIteratorRangeGenerator<ParamType>(begin, end));
+}
+
+template <typename T, size_t N>
+internal::ParamGenerator<T> ValuesIn(const T (&array)[N]) {
+ return ValuesIn(array, array + N);
+}
+
+template <class Container>
+internal::ParamGenerator<typename Container::value_type> ValuesIn(
+ const Container& container) {
+ return ValuesIn(container.begin(), container.end());
+}
+
+// Values() allows generating tests from explicitly specified list of
+// parameters.
+//
+// Synopsis:
+// Values(T v1, T v2, ..., T vN)
+// - returns a generator producing sequences with elements v1, v2, ..., vN.
+//
+// For example, this instantiates tests from test suite BarTest each
+// with values "one", "two", and "three":
+//
+// INSTANTIATE_TEST_SUITE_P(NumSequence,
+// BarTest,
+// Values("one", "two", "three"));
+//
+// This instantiates tests from test suite BazTest each with values 1, 2, 3.5.
+// The exact type of values will depend on the type of parameter in BazTest.
+//
+// INSTANTIATE_TEST_SUITE_P(FloatingNumbers, BazTest, Values(1, 2, 3.5));
+//
+//
+template <typename... T>
+internal::ValueArray<T...> Values(T... v) {
+ return internal::ValueArray<T...>(std::move(v)...);
+}
+
+// Bool() allows generating tests with parameters in a set of (false, true).
+//
+// Synopsis:
+// Bool()
+// - returns a generator producing sequences with elements {false, true}.
+//
+// It is useful when testing code that depends on Boolean flags. Combinations
+// of multiple flags can be tested when several Bool()'s are combined using
+// Combine() function.
+//
+// In the following example all tests in the test suite FlagDependentTest
+// will be instantiated twice with parameters false and true.
+//
+// class FlagDependentTest : public testing::TestWithParam<bool> {
+// virtual void SetUp() {
+// external_flag = GetParam();
+// }
+// }
+// INSTANTIATE_TEST_SUITE_P(BoolSequence, FlagDependentTest, Bool());
+//
+inline internal::ParamGenerator<bool> Bool() { return Values(false, true); }
+
+// Combine() allows the user to combine two or more sequences to produce
+// values of a Cartesian product of those sequences' elements.
+//
+// Synopsis:
+// Combine(gen1, gen2, ..., genN)
+// - returns a generator producing sequences with elements coming from
+// the Cartesian product of elements from the sequences generated by
+// gen1, gen2, ..., genN. The sequence elements will have a type of
+// std::tuple<T1, T2, ..., TN> where T1, T2, ..., TN are the types
+// of elements from sequences produces by gen1, gen2, ..., genN.
+//
+// Example:
+//
+// This will instantiate tests in test suite AnimalTest each one with
+// the parameter values tuple("cat", BLACK), tuple("cat", WHITE),
+// tuple("dog", BLACK), and tuple("dog", WHITE):
+//
+// enum Color { BLACK, GRAY, WHITE };
+// class AnimalTest
+// : public testing::TestWithParam<std::tuple<const char*, Color> > {...};
+//
+// TEST_P(AnimalTest, AnimalLooksNice) {...}
+//
+// INSTANTIATE_TEST_SUITE_P(AnimalVariations, AnimalTest,
+// Combine(Values("cat", "dog"),
+// Values(BLACK, WHITE)));
+//
+// This will instantiate tests in FlagDependentTest with all variations of two
+// Boolean flags:
+//
+// class FlagDependentTest
+// : public testing::TestWithParam<std::tuple<bool, bool> > {
+// virtual void SetUp() {
+// // Assigns external_flag_1 and external_flag_2 values from the tuple.
+// std::tie(external_flag_1, external_flag_2) = GetParam();
+// }
+// };
+//
+// TEST_P(FlagDependentTest, TestFeature1) {
+// // Test your code using external_flag_1 and external_flag_2 here.
+// }
+// INSTANTIATE_TEST_SUITE_P(TwoBoolSequence, FlagDependentTest,
+// Combine(Bool(), Bool()));
+//
+template <typename... Generator>
+internal::CartesianProductHolder<Generator...> Combine(const Generator&... g) {
+ return internal::CartesianProductHolder<Generator...>(g...);
+}
+
+#define TEST_P(test_suite_name, test_name) \
+ class GTEST_TEST_CLASS_NAME_(test_suite_name, test_name) \
+ : public test_suite_name { \
+ public: \
+ GTEST_TEST_CLASS_NAME_(test_suite_name, test_name)() {} \
+ void TestBody() override; \
+ \
+ private: \
+ static int AddToRegistry() { \
+ ::testing::UnitTest::GetInstance() \
+ ->parameterized_test_registry() \
+ .GetTestSuitePatternHolder<test_suite_name>( \
+ GTEST_STRINGIFY_(test_suite_name), \
+ ::testing::internal::CodeLocation(__FILE__, __LINE__)) \
+ ->AddTestPattern( \
+ GTEST_STRINGIFY_(test_suite_name), GTEST_STRINGIFY_(test_name), \
+ new ::testing::internal::TestMetaFactory<GTEST_TEST_CLASS_NAME_( \
+ test_suite_name, test_name)>(), \
+ ::testing::internal::CodeLocation(__FILE__, __LINE__)); \
+ return 0; \
+ } \
+ static int gtest_registering_dummy_ GTEST_ATTRIBUTE_UNUSED_; \
+ GTEST_TEST_CLASS_NAME_(test_suite_name, test_name) \
+ (const GTEST_TEST_CLASS_NAME_(test_suite_name, test_name) &) = delete; \
+ GTEST_TEST_CLASS_NAME_(test_suite_name, test_name) & operator=( \
+ const GTEST_TEST_CLASS_NAME_(test_suite_name, \
+ test_name) &) = delete; /* NOLINT */ \
+ }; \
+ int GTEST_TEST_CLASS_NAME_(test_suite_name, \
+ test_name)::gtest_registering_dummy_ = \
+ GTEST_TEST_CLASS_NAME_(test_suite_name, test_name)::AddToRegistry(); \
+ void GTEST_TEST_CLASS_NAME_(test_suite_name, test_name)::TestBody()
+
+// The last argument to INSTANTIATE_TEST_SUITE_P allows the user to specify
+// generator and an optional function or functor that generates custom test name
+// suffixes based on the test parameters. Such a function or functor should
+// accept one argument of type testing::TestParamInfo<class ParamType>, and
+// return std::string.
+//
+// testing::PrintToStringParamName is a builtin test suffix generator that
+// returns the value of testing::PrintToString(GetParam()).
+//
+// Note: test names must be non-empty, unique, and may only contain ASCII
+// alphanumeric characters or underscore. Because PrintToString adds quotes
+// to std::string and C strings, it won't work for these types.
+
+#define GTEST_EXPAND_(arg) arg
+#define GTEST_GET_FIRST_(first, ...) first
+#define GTEST_GET_SECOND_(first, second, ...) second
+
+#define INSTANTIATE_TEST_SUITE_P(prefix, test_suite_name, ...) \
+ static ::testing::internal::ParamGenerator<test_suite_name::ParamType> \
+ gtest_##prefix##test_suite_name##_EvalGenerator_() { \
+ return GTEST_EXPAND_(GTEST_GET_FIRST_(__VA_ARGS__, DUMMY_PARAM_)); \
+ } \
+ static ::std::string gtest_##prefix##test_suite_name##_EvalGenerateName_( \
+ const ::testing::TestParamInfo<test_suite_name::ParamType>& info) { \
+ if (::testing::internal::AlwaysFalse()) { \
+ ::testing::internal::TestNotEmpty(GTEST_EXPAND_(GTEST_GET_SECOND_( \
+ __VA_ARGS__, \
+ ::testing::internal::DefaultParamName<test_suite_name::ParamType>, \
+ DUMMY_PARAM_))); \
+ auto t = std::make_tuple(__VA_ARGS__); \
+ static_assert(std::tuple_size<decltype(t)>::value <= 2, \
+ "Too Many Args!"); \
+ } \
+ return ((GTEST_EXPAND_(GTEST_GET_SECOND_( \
+ __VA_ARGS__, \
+ ::testing::internal::DefaultParamName<test_suite_name::ParamType>, \
+ DUMMY_PARAM_))))(info); \
+ } \
+ static int gtest_##prefix##test_suite_name##_dummy_ \
+ GTEST_ATTRIBUTE_UNUSED_ = \
+ ::testing::UnitTest::GetInstance() \
+ ->parameterized_test_registry() \
+ .GetTestSuitePatternHolder<test_suite_name>( \
+ GTEST_STRINGIFY_(test_suite_name), \
+ ::testing::internal::CodeLocation(__FILE__, __LINE__)) \
+ ->AddTestSuiteInstantiation( \
+ GTEST_STRINGIFY_(prefix), \
+ &gtest_##prefix##test_suite_name##_EvalGenerator_, \
+ &gtest_##prefix##test_suite_name##_EvalGenerateName_, \
+ __FILE__, __LINE__)
+
+// Allow Marking a Parameterized test class as not needing to be instantiated.
+#define GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(T) \
+ namespace gtest_do_not_use_outside_namespace_scope {} \
+ static const ::testing::internal::MarkAsIgnored gtest_allow_ignore_##T( \
+ GTEST_STRINGIFY_(T))
+
+// Legacy API is deprecated but still available
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+#define INSTANTIATE_TEST_CASE_P \
+ static_assert(::testing::internal::InstantiateTestCase_P_IsDeprecated(), \
+ ""); \
+ INSTANTIATE_TEST_SUITE_P
+#endif // GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+
+} // namespace testing
+
+#endif // GOOGLETEST_INCLUDE_GTEST_GTEST_PARAM_TEST_H_
diff --git a/media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest-printers.h b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest-printers.h
new file mode 100644
index 0000000000..a91e8b8b10
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest-printers.h
@@ -0,0 +1,1048 @@
+// Copyright 2007, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Google Test - The Google C++ Testing and Mocking Framework
+//
+// This file implements a universal value printer that can print a
+// value of any type T:
+//
+// void ::testing::internal::UniversalPrinter<T>::Print(value, ostream_ptr);
+//
+// A user can teach this function how to print a class type T by
+// defining either operator<<() or PrintTo() in the namespace that
+// defines T. More specifically, the FIRST defined function in the
+// following list will be used (assuming T is defined in namespace
+// foo):
+//
+// 1. foo::PrintTo(const T&, ostream*)
+// 2. operator<<(ostream&, const T&) defined in either foo or the
+// global namespace.
+//
+// However if T is an STL-style container then it is printed element-wise
+// unless foo::PrintTo(const T&, ostream*) is defined. Note that
+// operator<<() is ignored for container types.
+//
+// If none of the above is defined, it will print the debug string of
+// the value if it is a protocol buffer, or print the raw bytes in the
+// value otherwise.
+//
+// To aid debugging: when T is a reference type, the address of the
+// value is also printed; when T is a (const) char pointer, both the
+// pointer value and the NUL-terminated string it points to are
+// printed.
+//
+// We also provide some convenient wrappers:
+//
+// // Prints a value to a string. For a (const or not) char
+// // pointer, the NUL-terminated string (but not the pointer) is
+// // printed.
+// std::string ::testing::PrintToString(const T& value);
+//
+// // Prints a value tersely: for a reference type, the referenced
+// // value (but not the address) is printed; for a (const or not) char
+// // pointer, the NUL-terminated string (but not the pointer) is
+// // printed.
+// void ::testing::internal::UniversalTersePrint(const T& value, ostream*);
+//
+// // Prints value using the type inferred by the compiler. The difference
+// // from UniversalTersePrint() is that this function prints both the
+// // pointer and the NUL-terminated string for a (const or not) char pointer.
+// void ::testing::internal::UniversalPrint(const T& value, ostream*);
+//
+// // Prints the fields of a tuple tersely to a string vector, one
+// // element for each field. Tuple support must be enabled in
+// // gtest-port.h.
+// std::vector<string> UniversalTersePrintTupleFieldsToStrings(
+// const Tuple& value);
+//
+// Known limitation:
+//
+// The print primitives print the elements of an STL-style container
+// using the compiler-inferred type of *iter where iter is a
+// const_iterator of the container. When const_iterator is an input
+// iterator but not a forward iterator, this inferred type may not
+// match value_type, and the print output may be incorrect. In
+// practice, this is rarely a problem as for most containers
+// const_iterator is a forward iterator. We'll fix this if there's an
+// actual need for it. Note that this fix cannot rely on value_type
+// being defined as many user-defined container types don't have
+// value_type.
+
+// IWYU pragma: private, include "gtest/gtest.h"
+// IWYU pragma: friend gtest/.*
+// IWYU pragma: friend gmock/.*
+
+#ifndef GOOGLETEST_INCLUDE_GTEST_GTEST_PRINTERS_H_
+#define GOOGLETEST_INCLUDE_GTEST_GTEST_PRINTERS_H_
+
+#include <functional>
+#include <memory>
+#include <ostream> // NOLINT
+#include <sstream>
+#include <string>
+#include <tuple>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#include "gtest/internal/gtest-internal.h"
+#include "gtest/internal/gtest-port.h"
+
+namespace testing {
+
+// Definitions in the internal* namespaces are subject to change without notice.
+// DO NOT USE THEM IN USER CODE!
+namespace internal {
+
+template <typename T>
+void UniversalPrint(const T& value, ::std::ostream* os);
+
+// Used to print an STL-style container when the user doesn't define
+// a PrintTo() for it.
+struct ContainerPrinter {
+ template <typename T,
+ typename = typename std::enable_if<
+ (sizeof(IsContainerTest<T>(0)) == sizeof(IsContainer)) &&
+ !IsRecursiveContainer<T>::value>::type>
+ static void PrintValue(const T& container, std::ostream* os) {
+ const size_t kMaxCount = 32; // The maximum number of elements to print.
+ *os << '{';
+ size_t count = 0;
+ for (auto&& elem : container) {
+ if (count > 0) {
+ *os << ',';
+ if (count == kMaxCount) { // Enough has been printed.
+ *os << " ...";
+ break;
+ }
+ }
+ *os << ' ';
+ // We cannot call PrintTo(elem, os) here as PrintTo() doesn't
+ // handle `elem` being a native array.
+ internal::UniversalPrint(elem, os);
+ ++count;
+ }
+
+ if (count > 0) {
+ *os << ' ';
+ }
+ *os << '}';
+ }
+};
+
+// Used to print a pointer that is neither a char pointer nor a member
+// pointer, when the user doesn't define PrintTo() for it. (A member
+// variable pointer or member function pointer doesn't really point to
+// a location in the address space. Their representation is
+// implementation-defined. Therefore they will be printed as raw
+// bytes.)
+struct FunctionPointerPrinter {
+ template <typename T, typename = typename std::enable_if<
+ std::is_function<T>::value>::type>
+ static void PrintValue(T* p, ::std::ostream* os) {
+ if (p == nullptr) {
+ *os << "NULL";
+ } else {
+ // T is a function type, so '*os << p' doesn't do what we want
+ // (it just prints p as bool). We want to print p as a const
+ // void*.
+ *os << reinterpret_cast<const void*>(p);
+ }
+ }
+};
+
+struct PointerPrinter {
+ template <typename T>
+ static void PrintValue(T* p, ::std::ostream* os) {
+ if (p == nullptr) {
+ *os << "NULL";
+ } else {
+ // T is not a function type. We just call << to print p,
+ // relying on ADL to pick up user-defined << for their pointer
+ // types, if any.
+ *os << p;
+ }
+ }
+};
+
+namespace internal_stream_operator_without_lexical_name_lookup {
+
+// The presence of an operator<< here will terminate lexical scope lookup
+// straight away (even though it cannot be a match because of its argument
+// types). Thus, the two operator<< calls in StreamPrinter will find only ADL
+// candidates.
+struct LookupBlocker {};
+void operator<<(LookupBlocker, LookupBlocker);
+
+struct StreamPrinter {
+ template <typename T,
+ // Don't accept member pointers here. We'd print them via implicit
+ // conversion to bool, which isn't useful.
+ typename = typename std::enable_if<
+ !std::is_member_pointer<T>::value>::type,
+ // Only accept types for which we can find a streaming operator via
+ // ADL (possibly involving implicit conversions).
+ typename = decltype(std::declval<std::ostream&>()
+ << std::declval<const T&>())>
+ static void PrintValue(const T& value, ::std::ostream* os) {
+ // Call streaming operator found by ADL, possibly with implicit conversions
+ // of the arguments.
+ *os << value;
+ }
+};
+
+} // namespace internal_stream_operator_without_lexical_name_lookup
+
+struct ProtobufPrinter {
+ // We print a protobuf using its ShortDebugString() when the string
+ // doesn't exceed this many characters; otherwise we print it using
+ // DebugString() for better readability.
+ static const size_t kProtobufOneLinerMaxLength = 50;
+
+ template <typename T,
+ typename = typename std::enable_if<
+ internal::HasDebugStringAndShortDebugString<T>::value>::type>
+ static void PrintValue(const T& value, ::std::ostream* os) {
+ std::string pretty_str = value.ShortDebugString();
+ if (pretty_str.length() > kProtobufOneLinerMaxLength) {
+ pretty_str = "\n" + value.DebugString();
+ }
+ *os << ("<" + pretty_str + ">");
+ }
+};
+
+struct ConvertibleToIntegerPrinter {
+ // Since T has no << operator or PrintTo() but can be implicitly
+ // converted to BiggestInt, we print it as a BiggestInt.
+ //
+ // Most likely T is an enum type (either named or unnamed), in which
+ // case printing it as an integer is the desired behavior. In case
+ // T is not an enum, printing it as an integer is the best we can do
+ // given that it has no user-defined printer.
+ static void PrintValue(internal::BiggestInt value, ::std::ostream* os) {
+ *os << value;
+ }
+};
+
+struct ConvertibleToStringViewPrinter {
+#if GTEST_INTERNAL_HAS_STRING_VIEW
+ static void PrintValue(internal::StringView value, ::std::ostream* os) {
+ internal::UniversalPrint(value, os);
+ }
+#endif
+};
+
+// Prints the given number of bytes in the given object to the given
+// ostream.
+GTEST_API_ void PrintBytesInObjectTo(const unsigned char* obj_bytes,
+ size_t count, ::std::ostream* os);
+struct RawBytesPrinter {
+ // SFINAE on `sizeof` to make sure we have a complete type.
+ template <typename T, size_t = sizeof(T)>
+ static void PrintValue(const T& value, ::std::ostream* os) {
+ PrintBytesInObjectTo(
+ static_cast<const unsigned char*>(
+ // Load bearing cast to void* to support iOS
+ reinterpret_cast<const void*>(std::addressof(value))),
+ sizeof(value), os);
+ }
+};
+
+struct FallbackPrinter {
+ template <typename T>
+ static void PrintValue(const T&, ::std::ostream* os) {
+ *os << "(incomplete type)";
+ }
+};
+
+// Try every printer in order and return the first one that works.
+template <typename T, typename E, typename Printer, typename... Printers>
+struct FindFirstPrinter : FindFirstPrinter<T, E, Printers...> {};
+
+template <typename T, typename Printer, typename... Printers>
+struct FindFirstPrinter<
+ T, decltype(Printer::PrintValue(std::declval<const T&>(), nullptr)),
+ Printer, Printers...> {
+ using type = Printer;
+};
+
+// Select the best printer in the following order:
+// - Print containers (they have begin/end/etc).
+// - Print function pointers.
+// - Print object pointers.
+// - Use the stream operator, if available.
+// - Print protocol buffers.
+// - Print types convertible to BiggestInt.
+// - Print types convertible to StringView, if available.
+// - Fallback to printing the raw bytes of the object.
+template <typename T>
+void PrintWithFallback(const T& value, ::std::ostream* os) {
+ using Printer = typename FindFirstPrinter<
+ T, void, ContainerPrinter, FunctionPointerPrinter, PointerPrinter,
+ internal_stream_operator_without_lexical_name_lookup::StreamPrinter,
+ ProtobufPrinter, ConvertibleToIntegerPrinter,
+ ConvertibleToStringViewPrinter, RawBytesPrinter, FallbackPrinter>::type;
+ Printer::PrintValue(value, os);
+}
+
+// FormatForComparison<ToPrint, OtherOperand>::Format(value) formats a
+// value of type ToPrint that is an operand of a comparison assertion
+// (e.g. ASSERT_EQ). OtherOperand is the type of the other operand in
+// the comparison, and is used to help determine the best way to
+// format the value. In particular, when the value is a C string
+// (char pointer) and the other operand is an STL string object, we
+// want to format the C string as a string, since we know it is
+// compared by value with the string object. If the value is a char
+// pointer but the other operand is not an STL string object, we don't
+// know whether the pointer is supposed to point to a NUL-terminated
+// string, and thus want to print it as a pointer to be safe.
+//
+// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+
+// The default case.
+template <typename ToPrint, typename OtherOperand>
+class FormatForComparison {
+ public:
+ static ::std::string Format(const ToPrint& value) {
+ return ::testing::PrintToString(value);
+ }
+};
+
+// Array.
+template <typename ToPrint, size_t N, typename OtherOperand>
+class FormatForComparison<ToPrint[N], OtherOperand> {
+ public:
+ static ::std::string Format(const ToPrint* value) {
+ return FormatForComparison<const ToPrint*, OtherOperand>::Format(value);
+ }
+};
+
+// By default, print C string as pointers to be safe, as we don't know
+// whether they actually point to a NUL-terminated string.
+
+#define GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(CharType) \
+ template <typename OtherOperand> \
+ class FormatForComparison<CharType*, OtherOperand> { \
+ public: \
+ static ::std::string Format(CharType* value) { \
+ return ::testing::PrintToString(static_cast<const void*>(value)); \
+ } \
+ }
+
+GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(char);
+GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(const char);
+GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(wchar_t);
+GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(const wchar_t);
+#ifdef __cpp_lib_char8_t
+GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(char8_t);
+GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(const char8_t);
+#endif
+GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(char16_t);
+GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(const char16_t);
+GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(char32_t);
+GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(const char32_t);
+
+#undef GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_
+
+// If a C string is compared with an STL string object, we know it's meant
+// to point to a NUL-terminated string, and thus can print it as a string.
+
+#define GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(CharType, OtherStringType) \
+ template <> \
+ class FormatForComparison<CharType*, OtherStringType> { \
+ public: \
+ static ::std::string Format(CharType* value) { \
+ return ::testing::PrintToString(value); \
+ } \
+ }
+
+GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(char, ::std::string);
+GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(const char, ::std::string);
+#ifdef __cpp_char8_t
+GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(char8_t, ::std::u8string);
+GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(const char8_t, ::std::u8string);
+#endif
+GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(char16_t, ::std::u16string);
+GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(const char16_t, ::std::u16string);
+GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(char32_t, ::std::u32string);
+GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(const char32_t, ::std::u32string);
+
+#if GTEST_HAS_STD_WSTRING
+GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(wchar_t, ::std::wstring);
+GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(const wchar_t, ::std::wstring);
+#endif
+
+#undef GTEST_IMPL_FORMAT_C_STRING_AS_STRING_
+
+// Formats a comparison assertion (e.g. ASSERT_EQ, EXPECT_LT, and etc)
+// operand to be used in a failure message. The type (but not value)
+// of the other operand may affect the format. This allows us to
+// print a char* as a raw pointer when it is compared against another
+// char* or void*, and print it as a C string when it is compared
+// against an std::string object, for example.
+//
+// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+template <typename T1, typename T2>
+std::string FormatForComparisonFailureMessage(const T1& value,
+ const T2& /* other_operand */) {
+ return FormatForComparison<T1, T2>::Format(value);
+}
+
+// UniversalPrinter<T>::Print(value, ostream_ptr) prints the given
+// value to the given ostream. The caller must ensure that
+// 'ostream_ptr' is not NULL, or the behavior is undefined.
+//
+// We define UniversalPrinter as a class template (as opposed to a
+// function template), as we need to partially specialize it for
+// reference types, which cannot be done with function templates.
+template <typename T>
+class UniversalPrinter;
+
+// Prints the given value using the << operator if it has one;
+// otherwise prints the bytes in it. This is what
+// UniversalPrinter<T>::Print() does when PrintTo() is not specialized
+// or overloaded for type T.
+//
+// A user can override this behavior for a class type Foo by defining
+// an overload of PrintTo() in the namespace where Foo is defined. We
+// give the user this option as sometimes defining a << operator for
+// Foo is not desirable (e.g. the coding style may prevent doing it,
+// or there is already a << operator but it doesn't do what the user
+// wants).
+template <typename T>
+void PrintTo(const T& value, ::std::ostream* os) {
+ internal::PrintWithFallback(value, os);
+}
+
+// The following list of PrintTo() overloads tells
+// UniversalPrinter<T>::Print() how to print standard types (built-in
+// types, strings, plain arrays, and pointers).
+
+// Overloads for various char types.
+GTEST_API_ void PrintTo(unsigned char c, ::std::ostream* os);
+GTEST_API_ void PrintTo(signed char c, ::std::ostream* os);
+inline void PrintTo(char c, ::std::ostream* os) {
+ // When printing a plain char, we always treat it as unsigned. This
+ // way, the output won't be affected by whether the compiler thinks
+ // char is signed or not.
+ PrintTo(static_cast<unsigned char>(c), os);
+}
+
+// Overloads for other simple built-in types.
+inline void PrintTo(bool x, ::std::ostream* os) {
+ *os << (x ? "true" : "false");
+}
+
+// Overload for wchar_t type.
+// Prints a wchar_t as a symbol if it is printable or as its internal
+// code otherwise and also as its decimal code (except for L'\0').
+// The L'\0' char is printed as "L'\\0'". The decimal code is printed
+// as signed integer when wchar_t is implemented by the compiler
+// as a signed type and is printed as an unsigned integer when wchar_t
+// is implemented as an unsigned type.
+GTEST_API_ void PrintTo(wchar_t wc, ::std::ostream* os);
+
+GTEST_API_ void PrintTo(char32_t c, ::std::ostream* os);
+inline void PrintTo(char16_t c, ::std::ostream* os) {
+ PrintTo(ImplicitCast_<char32_t>(c), os);
+}
+#ifdef __cpp_char8_t
+inline void PrintTo(char8_t c, ::std::ostream* os) {
+ PrintTo(ImplicitCast_<char32_t>(c), os);
+}
+#endif
+
+// gcc/clang __{u,}int128_t
+#if defined(__SIZEOF_INT128__)
+GTEST_API_ void PrintTo(__uint128_t v, ::std::ostream* os);
+GTEST_API_ void PrintTo(__int128_t v, ::std::ostream* os);
+#endif // __SIZEOF_INT128__
+
+// Overloads for C strings.
+GTEST_API_ void PrintTo(const char* s, ::std::ostream* os);
+inline void PrintTo(char* s, ::std::ostream* os) {
+ PrintTo(ImplicitCast_<const char*>(s), os);
+}
+
+// signed/unsigned char is often used for representing binary data, so
+// we print pointers to it as void* to be safe.
+inline void PrintTo(const signed char* s, ::std::ostream* os) {
+ PrintTo(ImplicitCast_<const void*>(s), os);
+}
+inline void PrintTo(signed char* s, ::std::ostream* os) {
+ PrintTo(ImplicitCast_<const void*>(s), os);
+}
+inline void PrintTo(const unsigned char* s, ::std::ostream* os) {
+ PrintTo(ImplicitCast_<const void*>(s), os);
+}
+inline void PrintTo(unsigned char* s, ::std::ostream* os) {
+ PrintTo(ImplicitCast_<const void*>(s), os);
+}
+#ifdef __cpp_char8_t
+// Overloads for u8 strings.
+GTEST_API_ void PrintTo(const char8_t* s, ::std::ostream* os);
+inline void PrintTo(char8_t* s, ::std::ostream* os) {
+ PrintTo(ImplicitCast_<const char8_t*>(s), os);
+}
+#endif
+// Overloads for u16 strings.
+GTEST_API_ void PrintTo(const char16_t* s, ::std::ostream* os);
+inline void PrintTo(char16_t* s, ::std::ostream* os) {
+ PrintTo(ImplicitCast_<const char16_t*>(s), os);
+}
+// Overloads for u32 strings.
+GTEST_API_ void PrintTo(const char32_t* s, ::std::ostream* os);
+inline void PrintTo(char32_t* s, ::std::ostream* os) {
+ PrintTo(ImplicitCast_<const char32_t*>(s), os);
+}
+
+// MSVC can be configured to define wchar_t as a typedef of unsigned
+// short. It defines _NATIVE_WCHAR_T_DEFINED when wchar_t is a native
+// type. When wchar_t is a typedef, defining an overload for const
+// wchar_t* would cause unsigned short* be printed as a wide string,
+// possibly causing invalid memory accesses.
+#if !defined(_MSC_VER) || defined(_NATIVE_WCHAR_T_DEFINED)
+// Overloads for wide C strings
+GTEST_API_ void PrintTo(const wchar_t* s, ::std::ostream* os);
+inline void PrintTo(wchar_t* s, ::std::ostream* os) {
+ PrintTo(ImplicitCast_<const wchar_t*>(s), os);
+}
+#endif
+
+// Overload for C arrays. Multi-dimensional arrays are printed
+// properly.
+
+// Prints the given number of elements in an array, without printing
+// the curly braces.
+template <typename T>
+void PrintRawArrayTo(const T a[], size_t count, ::std::ostream* os) {
+ UniversalPrint(a[0], os);
+ for (size_t i = 1; i != count; i++) {
+ *os << ", ";
+ UniversalPrint(a[i], os);
+ }
+}
+
+// Overloads for ::std::string.
+GTEST_API_ void PrintStringTo(const ::std::string& s, ::std::ostream* os);
+inline void PrintTo(const ::std::string& s, ::std::ostream* os) {
+ PrintStringTo(s, os);
+}
+
+// Overloads for ::std::u8string
+#ifdef __cpp_char8_t
+GTEST_API_ void PrintU8StringTo(const ::std::u8string& s, ::std::ostream* os);
+inline void PrintTo(const ::std::u8string& s, ::std::ostream* os) {
+ PrintU8StringTo(s, os);
+}
+#endif
+
+// Overloads for ::std::u16string
+GTEST_API_ void PrintU16StringTo(const ::std::u16string& s, ::std::ostream* os);
+inline void PrintTo(const ::std::u16string& s, ::std::ostream* os) {
+ PrintU16StringTo(s, os);
+}
+
+// Overloads for ::std::u32string
+GTEST_API_ void PrintU32StringTo(const ::std::u32string& s, ::std::ostream* os);
+inline void PrintTo(const ::std::u32string& s, ::std::ostream* os) {
+ PrintU32StringTo(s, os);
+}
+
+// Overloads for ::std::wstring.
+#if GTEST_HAS_STD_WSTRING
+GTEST_API_ void PrintWideStringTo(const ::std::wstring& s, ::std::ostream* os);
+inline void PrintTo(const ::std::wstring& s, ::std::ostream* os) {
+ PrintWideStringTo(s, os);
+}
+#endif // GTEST_HAS_STD_WSTRING
+
+#if GTEST_INTERNAL_HAS_STRING_VIEW
+// Overload for internal::StringView.
+inline void PrintTo(internal::StringView sp, ::std::ostream* os) {
+ PrintTo(::std::string(sp), os);
+}
+#endif // GTEST_INTERNAL_HAS_STRING_VIEW
+
+inline void PrintTo(std::nullptr_t, ::std::ostream* os) { *os << "(nullptr)"; }
+
+#if GTEST_HAS_RTTI
+inline void PrintTo(const std::type_info& info, std::ostream* os) {
+ *os << internal::GetTypeName(info);
+}
+#endif // GTEST_HAS_RTTI
+
+template <typename T>
+void PrintTo(std::reference_wrapper<T> ref, ::std::ostream* os) {
+ UniversalPrinter<T&>::Print(ref.get(), os);
+}
+
+inline const void* VoidifyPointer(const void* p) { return p; }
+inline const void* VoidifyPointer(volatile const void* p) {
+ return const_cast<const void*>(p);
+}
+
+template <typename T, typename Ptr>
+void PrintSmartPointer(const Ptr& ptr, std::ostream* os, char) {
+ if (ptr == nullptr) {
+ *os << "(nullptr)";
+ } else {
+ // We can't print the value. Just print the pointer..
+ *os << "(" << (VoidifyPointer)(ptr.get()) << ")";
+ }
+}
+template <typename T, typename Ptr,
+ typename = typename std::enable_if<!std::is_void<T>::value &&
+ !std::is_array<T>::value>::type>
+void PrintSmartPointer(const Ptr& ptr, std::ostream* os, int) {
+ if (ptr == nullptr) {
+ *os << "(nullptr)";
+ } else {
+ *os << "(ptr = " << (VoidifyPointer)(ptr.get()) << ", value = ";
+ UniversalPrinter<T>::Print(*ptr, os);
+ *os << ")";
+ }
+}
+
+template <typename T, typename D>
+void PrintTo(const std::unique_ptr<T, D>& ptr, std::ostream* os) {
+ (PrintSmartPointer<T>)(ptr, os, 0);
+}
+
+template <typename T>
+void PrintTo(const std::shared_ptr<T>& ptr, std::ostream* os) {
+ (PrintSmartPointer<T>)(ptr, os, 0);
+}
+
+// Helper function for printing a tuple. T must be instantiated with
+// a tuple type.
+template <typename T>
+void PrintTupleTo(const T&, std::integral_constant<size_t, 0>,
+ ::std::ostream*) {}
+
+template <typename T, size_t I>
+void PrintTupleTo(const T& t, std::integral_constant<size_t, I>,
+ ::std::ostream* os) {
+ PrintTupleTo(t, std::integral_constant<size_t, I - 1>(), os);
+ GTEST_INTENTIONAL_CONST_COND_PUSH_()
+ if (I > 1) {
+ GTEST_INTENTIONAL_CONST_COND_POP_()
+ *os << ", ";
+ }
+ UniversalPrinter<typename std::tuple_element<I - 1, T>::type>::Print(
+ std::get<I - 1>(t), os);
+}
+
+template <typename... Types>
+void PrintTo(const ::std::tuple<Types...>& t, ::std::ostream* os) {
+ *os << "(";
+ PrintTupleTo(t, std::integral_constant<size_t, sizeof...(Types)>(), os);
+ *os << ")";
+}
+
+// Overload for std::pair.
+template <typename T1, typename T2>
+void PrintTo(const ::std::pair<T1, T2>& value, ::std::ostream* os) {
+ *os << '(';
+ // We cannot use UniversalPrint(value.first, os) here, as T1 may be
+ // a reference type. The same for printing value.second.
+ UniversalPrinter<T1>::Print(value.first, os);
+ *os << ", ";
+ UniversalPrinter<T2>::Print(value.second, os);
+ *os << ')';
+}
+
+// Implements printing a non-reference type T by letting the compiler
+// pick the right overload of PrintTo() for T.
+template <typename T>
+class UniversalPrinter {
+ public:
+ // MSVC warns about adding const to a function type, so we want to
+ // disable the warning.
+ GTEST_DISABLE_MSC_WARNINGS_PUSH_(4180)
+
+ // Note: we deliberately don't call this PrintTo(), as that name
+ // conflicts with ::testing::internal::PrintTo in the body of the
+ // function.
+ static void Print(const T& value, ::std::ostream* os) {
+ // By default, ::testing::internal::PrintTo() is used for printing
+ // the value.
+ //
+ // Thanks to Koenig look-up, if T is a class and has its own
+ // PrintTo() function defined in its namespace, that function will
+ // be visible here. Since it is more specific than the generic ones
+ // in ::testing::internal, it will be picked by the compiler in the
+ // following statement - exactly what we want.
+ PrintTo(value, os);
+ }
+
+ GTEST_DISABLE_MSC_WARNINGS_POP_()
+};
+
+// Remove any const-qualifiers before passing a type to UniversalPrinter.
+template <typename T>
+class UniversalPrinter<const T> : public UniversalPrinter<T> {};
+
+#if GTEST_INTERNAL_HAS_ANY
+
+// Printer for std::any / absl::any
+
+template <>
+class UniversalPrinter<Any> {
+ public:
+ static void Print(const Any& value, ::std::ostream* os) {
+ if (value.has_value()) {
+ *os << "value of type " << GetTypeName(value);
+ } else {
+ *os << "no value";
+ }
+ }
+
+ private:
+ static std::string GetTypeName(const Any& value) {
+#if GTEST_HAS_RTTI
+ return internal::GetTypeName(value.type());
+#else
+ static_cast<void>(value); // possibly unused
+ return "<unknown_type>";
+#endif // GTEST_HAS_RTTI
+ }
+};
+
+#endif // GTEST_INTERNAL_HAS_ANY
+
+#if GTEST_INTERNAL_HAS_OPTIONAL
+
+// Printer for std::optional / absl::optional
+
+template <typename T>
+class UniversalPrinter<Optional<T>> {
+ public:
+ static void Print(const Optional<T>& value, ::std::ostream* os) {
+ *os << '(';
+ if (!value) {
+ *os << "nullopt";
+ } else {
+ UniversalPrint(*value, os);
+ }
+ *os << ')';
+ }
+};
+
+template <>
+class UniversalPrinter<decltype(Nullopt())> {
+ public:
+ static void Print(decltype(Nullopt()), ::std::ostream* os) {
+ *os << "(nullopt)";
+ }
+};
+
+#endif // GTEST_INTERNAL_HAS_OPTIONAL
+
+#if GTEST_INTERNAL_HAS_VARIANT
+
+// Printer for std::variant / absl::variant
+
+template <typename... T>
+class UniversalPrinter<Variant<T...>> {
+ public:
+ static void Print(const Variant<T...>& value, ::std::ostream* os) {
+ *os << '(';
+#if GTEST_HAS_ABSL
+ absl::visit(Visitor{os, value.index()}, value);
+#else
+ std::visit(Visitor{os, value.index()}, value);
+#endif // GTEST_HAS_ABSL
+ *os << ')';
+ }
+
+ private:
+ struct Visitor {
+ template <typename U>
+ void operator()(const U& u) const {
+ *os << "'" << GetTypeName<U>() << "(index = " << index
+ << ")' with value ";
+ UniversalPrint(u, os);
+ }
+ ::std::ostream* os;
+ std::size_t index;
+ };
+};
+
+#endif // GTEST_INTERNAL_HAS_VARIANT
+
+// UniversalPrintArray(begin, len, os) prints an array of 'len'
+// elements, starting at address 'begin'.
+template <typename T>
+void UniversalPrintArray(const T* begin, size_t len, ::std::ostream* os) {
+ if (len == 0) {
+ *os << "{}";
+ } else {
+ *os << "{ ";
+ const size_t kThreshold = 18;
+ const size_t kChunkSize = 8;
+ // If the array has more than kThreshold elements, we'll have to
+ // omit some details by printing only the first and the last
+ // kChunkSize elements.
+ if (len <= kThreshold) {
+ PrintRawArrayTo(begin, len, os);
+ } else {
+ PrintRawArrayTo(begin, kChunkSize, os);
+ *os << ", ..., ";
+ PrintRawArrayTo(begin + len - kChunkSize, kChunkSize, os);
+ }
+ *os << " }";
+ }
+}
+// This overload prints a (const) char array compactly.
+GTEST_API_ void UniversalPrintArray(const char* begin, size_t len,
+ ::std::ostream* os);
+
+#ifdef __cpp_char8_t
+// This overload prints a (const) char8_t array compactly.
+GTEST_API_ void UniversalPrintArray(const char8_t* begin, size_t len,
+ ::std::ostream* os);
+#endif
+
+// This overload prints a (const) char16_t array compactly.
+GTEST_API_ void UniversalPrintArray(const char16_t* begin, size_t len,
+ ::std::ostream* os);
+
+// This overload prints a (const) char32_t array compactly.
+GTEST_API_ void UniversalPrintArray(const char32_t* begin, size_t len,
+ ::std::ostream* os);
+
+// This overload prints a (const) wchar_t array compactly.
+GTEST_API_ void UniversalPrintArray(const wchar_t* begin, size_t len,
+ ::std::ostream* os);
+
+// Implements printing an array type T[N].
+template <typename T, size_t N>
+class UniversalPrinter<T[N]> {
+ public:
+ // Prints the given array, omitting some elements when there are too
+ // many.
+ static void Print(const T (&a)[N], ::std::ostream* os) {
+ UniversalPrintArray(a, N, os);
+ }
+};
+
+// Implements printing a reference type T&.
+template <typename T>
+class UniversalPrinter<T&> {
+ public:
+ // MSVC warns about adding const to a function type, so we want to
+ // disable the warning.
+ GTEST_DISABLE_MSC_WARNINGS_PUSH_(4180)
+
+ static void Print(const T& value, ::std::ostream* os) {
+ // Prints the address of the value. We use reinterpret_cast here
+ // as static_cast doesn't compile when T is a function type.
+ *os << "@" << reinterpret_cast<const void*>(&value) << " ";
+
+ // Then prints the value itself.
+ UniversalPrint(value, os);
+ }
+
+ GTEST_DISABLE_MSC_WARNINGS_POP_()
+};
+
+// Prints a value tersely: for a reference type, the referenced value
+// (but not the address) is printed; for a (const) char pointer, the
+// NUL-terminated string (but not the pointer) is printed.
+
+template <typename T>
+class UniversalTersePrinter {
+ public:
+ static void Print(const T& value, ::std::ostream* os) {
+ UniversalPrint(value, os);
+ }
+};
+template <typename T>
+class UniversalTersePrinter<T&> {
+ public:
+ static void Print(const T& value, ::std::ostream* os) {
+ UniversalPrint(value, os);
+ }
+};
+template <typename T, size_t N>
+class UniversalTersePrinter<T[N]> {
+ public:
+ static void Print(const T (&value)[N], ::std::ostream* os) {
+ UniversalPrinter<T[N]>::Print(value, os);
+ }
+};
+template <>
+class UniversalTersePrinter<const char*> {
+ public:
+ static void Print(const char* str, ::std::ostream* os) {
+ if (str == nullptr) {
+ *os << "NULL";
+ } else {
+ UniversalPrint(std::string(str), os);
+ }
+ }
+};
+template <>
+class UniversalTersePrinter<char*> : public UniversalTersePrinter<const char*> {
+};
+
+#ifdef __cpp_char8_t
+template <>
+class UniversalTersePrinter<const char8_t*> {
+ public:
+ static void Print(const char8_t* str, ::std::ostream* os) {
+ if (str == nullptr) {
+ *os << "NULL";
+ } else {
+ UniversalPrint(::std::u8string(str), os);
+ }
+ }
+};
+template <>
+class UniversalTersePrinter<char8_t*>
+ : public UniversalTersePrinter<const char8_t*> {};
+#endif
+
+template <>
+class UniversalTersePrinter<const char16_t*> {
+ public:
+ static void Print(const char16_t* str, ::std::ostream* os) {
+ if (str == nullptr) {
+ *os << "NULL";
+ } else {
+ UniversalPrint(::std::u16string(str), os);
+ }
+ }
+};
+template <>
+class UniversalTersePrinter<char16_t*>
+ : public UniversalTersePrinter<const char16_t*> {};
+
+template <>
+class UniversalTersePrinter<const char32_t*> {
+ public:
+ static void Print(const char32_t* str, ::std::ostream* os) {
+ if (str == nullptr) {
+ *os << "NULL";
+ } else {
+ UniversalPrint(::std::u32string(str), os);
+ }
+ }
+};
+template <>
+class UniversalTersePrinter<char32_t*>
+ : public UniversalTersePrinter<const char32_t*> {};
+
+#if GTEST_HAS_STD_WSTRING
+template <>
+class UniversalTersePrinter<const wchar_t*> {
+ public:
+ static void Print(const wchar_t* str, ::std::ostream* os) {
+ if (str == nullptr) {
+ *os << "NULL";
+ } else {
+ UniversalPrint(::std::wstring(str), os);
+ }
+ }
+};
+#endif
+
+template <>
+class UniversalTersePrinter<wchar_t*> {
+ public:
+ static void Print(wchar_t* str, ::std::ostream* os) {
+ UniversalTersePrinter<const wchar_t*>::Print(str, os);
+ }
+};
+
+template <typename T>
+void UniversalTersePrint(const T& value, ::std::ostream* os) {
+ UniversalTersePrinter<T>::Print(value, os);
+}
+
+// Prints a value using the type inferred by the compiler. The
+// difference between this and UniversalTersePrint() is that for a
+// (const) char pointer, this prints both the pointer and the
+// NUL-terminated string.
+template <typename T>
+void UniversalPrint(const T& value, ::std::ostream* os) {
+ // A workarond for the bug in VC++ 7.1 that prevents us from instantiating
+ // UniversalPrinter with T directly.
+ typedef T T1;
+ UniversalPrinter<T1>::Print(value, os);
+}
+
+typedef ::std::vector<::std::string> Strings;
+
+// Tersely prints the first N fields of a tuple to a string vector,
+// one element for each field.
+template <typename Tuple>
+void TersePrintPrefixToStrings(const Tuple&, std::integral_constant<size_t, 0>,
+ Strings*) {}
+template <typename Tuple, size_t I>
+void TersePrintPrefixToStrings(const Tuple& t,
+ std::integral_constant<size_t, I>,
+ Strings* strings) {
+ TersePrintPrefixToStrings(t, std::integral_constant<size_t, I - 1>(),
+ strings);
+ ::std::stringstream ss;
+ UniversalTersePrint(std::get<I - 1>(t), &ss);
+ strings->push_back(ss.str());
+}
+
+// Prints the fields of a tuple tersely to a string vector, one
+// element for each field. See the comment before
+// UniversalTersePrint() for how we define "tersely".
+template <typename Tuple>
+Strings UniversalTersePrintTupleFieldsToStrings(const Tuple& value) {
+ Strings result;
+ TersePrintPrefixToStrings(
+ value, std::integral_constant<size_t, std::tuple_size<Tuple>::value>(),
+ &result);
+ return result;
+}
+
+} // namespace internal
+
+template <typename T>
+::std::string PrintToString(const T& value) {
+ ::std::stringstream ss;
+ internal::UniversalTersePrinter<T>::Print(value, &ss);
+ return ss.str();
+}
+
+} // namespace testing
+
+// Include any custom printer added by the local installation.
+// We must include this header at the end to make sure it can use the
+// declarations from this file.
+#include "gtest/internal/custom/gtest-printers.h"
+
+#endif // GOOGLETEST_INCLUDE_GTEST_GTEST_PRINTERS_H_
diff --git a/media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest-spi.h b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest-spi.h
new file mode 100644
index 0000000000..bec8c4810b
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest-spi.h
@@ -0,0 +1,248 @@
+// Copyright 2007, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Utilities for testing Google Test itself and code that uses Google Test
+// (e.g. frameworks built on top of Google Test).
+
+#ifndef GOOGLETEST_INCLUDE_GTEST_GTEST_SPI_H_
+#define GOOGLETEST_INCLUDE_GTEST_GTEST_SPI_H_
+
+#include "gtest/gtest.h"
+
+GTEST_DISABLE_MSC_WARNINGS_PUSH_(4251 \
+/* class A needs to have dll-interface to be used by clients of class B */)
+
+namespace testing {
+
+// This helper class can be used to mock out Google Test failure reporting
+// so that we can test Google Test or code that builds on Google Test.
+//
+// An object of this class appends a TestPartResult object to the
+// TestPartResultArray object given in the constructor whenever a Google Test
+// failure is reported. It can either intercept only failures that are
+// generated in the same thread that created this object or it can intercept
+// all generated failures. The scope of this mock object can be controlled with
+// the second argument to the two arguments constructor.
+class GTEST_API_ ScopedFakeTestPartResultReporter
+ : public TestPartResultReporterInterface {
+ public:
+ // The two possible mocking modes of this object.
+ enum InterceptMode {
+ INTERCEPT_ONLY_CURRENT_THREAD, // Intercepts only thread local failures.
+ INTERCEPT_ALL_THREADS // Intercepts all failures.
+ };
+
+ // The c'tor sets this object as the test part result reporter used
+ // by Google Test. The 'result' parameter specifies where to report the
+ // results. This reporter will only catch failures generated in the current
+ // thread. DEPRECATED
+ explicit ScopedFakeTestPartResultReporter(TestPartResultArray* result);
+
+ // Same as above, but you can choose the interception scope of this object.
+ ScopedFakeTestPartResultReporter(InterceptMode intercept_mode,
+ TestPartResultArray* result);
+
+ // The d'tor restores the previous test part result reporter.
+ ~ScopedFakeTestPartResultReporter() override;
+
+ // Appends the TestPartResult object to the TestPartResultArray
+ // received in the constructor.
+ //
+ // This method is from the TestPartResultReporterInterface
+ // interface.
+ void ReportTestPartResult(const TestPartResult& result) override;
+
+ private:
+ void Init();
+
+ const InterceptMode intercept_mode_;
+ TestPartResultReporterInterface* old_reporter_;
+ TestPartResultArray* const result_;
+
+ ScopedFakeTestPartResultReporter(const ScopedFakeTestPartResultReporter&) =
+ delete;
+ ScopedFakeTestPartResultReporter& operator=(
+ const ScopedFakeTestPartResultReporter&) = delete;
+};
+
+namespace internal {
+
+// A helper class for implementing EXPECT_FATAL_FAILURE() and
+// EXPECT_NONFATAL_FAILURE(). Its destructor verifies that the given
+// TestPartResultArray contains exactly one failure that has the given
+// type and contains the given substring. If that's not the case, a
+// non-fatal failure will be generated.
+class GTEST_API_ SingleFailureChecker {
+ public:
+ // The constructor remembers the arguments.
+ SingleFailureChecker(const TestPartResultArray* results,
+ TestPartResult::Type type, const std::string& substr);
+ ~SingleFailureChecker();
+
+ private:
+ const TestPartResultArray* const results_;
+ const TestPartResult::Type type_;
+ const std::string substr_;
+
+ SingleFailureChecker(const SingleFailureChecker&) = delete;
+ SingleFailureChecker& operator=(const SingleFailureChecker&) = delete;
+};
+
+} // namespace internal
+
+} // namespace testing
+
+GTEST_DISABLE_MSC_WARNINGS_POP_() // 4251
+
+// A set of macros for testing Google Test assertions or code that's expected
+// to generate Google Test fatal failures (e.g. a failure from an ASSERT_EQ, but
+// not a non-fatal failure, as from EXPECT_EQ). It verifies that the given
+// statement will cause exactly one fatal Google Test failure with 'substr'
+// being part of the failure message.
+//
+// There are two different versions of this macro. EXPECT_FATAL_FAILURE only
+// affects and considers failures generated in the current thread and
+// EXPECT_FATAL_FAILURE_ON_ALL_THREADS does the same but for all threads.
+//
+// The verification of the assertion is done correctly even when the statement
+// throws an exception or aborts the current function.
+//
+// Known restrictions:
+// - 'statement' cannot reference local non-static variables or
+// non-static members of the current object.
+// - 'statement' cannot return a value.
+// - You cannot stream a failure message to this macro.
+//
+// Note that even though the implementations of the following two
+// macros are much alike, we cannot refactor them to use a common
+// helper macro, due to some peculiarity in how the preprocessor
+// works. The AcceptsMacroThatExpandsToUnprotectedComma test in
+// gtest_unittest.cc will fail to compile if we do that.
+#define EXPECT_FATAL_FAILURE(statement, substr) \
+ do { \
+ class GTestExpectFatalFailureHelper { \
+ public: \
+ static void Execute() { statement; } \
+ }; \
+ ::testing::TestPartResultArray gtest_failures; \
+ ::testing::internal::SingleFailureChecker gtest_checker( \
+ &gtest_failures, ::testing::TestPartResult::kFatalFailure, (substr)); \
+ { \
+ ::testing::ScopedFakeTestPartResultReporter gtest_reporter( \
+ ::testing::ScopedFakeTestPartResultReporter:: \
+ INTERCEPT_ONLY_CURRENT_THREAD, \
+ &gtest_failures); \
+ GTestExpectFatalFailureHelper::Execute(); \
+ } \
+ } while (::testing::internal::AlwaysFalse())
+
+#define EXPECT_FATAL_FAILURE_ON_ALL_THREADS(statement, substr) \
+ do { \
+ class GTestExpectFatalFailureHelper { \
+ public: \
+ static void Execute() { statement; } \
+ }; \
+ ::testing::TestPartResultArray gtest_failures; \
+ ::testing::internal::SingleFailureChecker gtest_checker( \
+ &gtest_failures, ::testing::TestPartResult::kFatalFailure, (substr)); \
+ { \
+ ::testing::ScopedFakeTestPartResultReporter gtest_reporter( \
+ ::testing::ScopedFakeTestPartResultReporter::INTERCEPT_ALL_THREADS, \
+ &gtest_failures); \
+ GTestExpectFatalFailureHelper::Execute(); \
+ } \
+ } while (::testing::internal::AlwaysFalse())
+
+// A macro for testing Google Test assertions or code that's expected to
+// generate Google Test non-fatal failures (e.g. a failure from an EXPECT_EQ,
+// but not from an ASSERT_EQ). It asserts that the given statement will cause
+// exactly one non-fatal Google Test failure with 'substr' being part of the
+// failure message.
+//
+// There are two different versions of this macro. EXPECT_NONFATAL_FAILURE only
+// affects and considers failures generated in the current thread and
+// EXPECT_NONFATAL_FAILURE_ON_ALL_THREADS does the same but for all threads.
+//
+// 'statement' is allowed to reference local variables and members of
+// the current object.
+//
+// The verification of the assertion is done correctly even when the statement
+// throws an exception or aborts the current function.
+//
+// Known restrictions:
+// - You cannot stream a failure message to this macro.
+//
+// Note that even though the implementations of the following two
+// macros are much alike, we cannot refactor them to use a common
+// helper macro, due to some peculiarity in how the preprocessor
+// works. If we do that, the code won't compile when the user gives
+// EXPECT_NONFATAL_FAILURE() a statement that contains a macro that
+// expands to code containing an unprotected comma. The
+// AcceptsMacroThatExpandsToUnprotectedComma test in gtest_unittest.cc
+// catches that.
+//
+// For the same reason, we have to write
+// if (::testing::internal::AlwaysTrue()) { statement; }
+// instead of
+// GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement)
+// to avoid an MSVC warning on unreachable code.
+#define EXPECT_NONFATAL_FAILURE(statement, substr) \
+ do { \
+ ::testing::TestPartResultArray gtest_failures; \
+ ::testing::internal::SingleFailureChecker gtest_checker( \
+ &gtest_failures, ::testing::TestPartResult::kNonFatalFailure, \
+ (substr)); \
+ { \
+ ::testing::ScopedFakeTestPartResultReporter gtest_reporter( \
+ ::testing::ScopedFakeTestPartResultReporter:: \
+ INTERCEPT_ONLY_CURRENT_THREAD, \
+ &gtest_failures); \
+ if (::testing::internal::AlwaysTrue()) { \
+ statement; \
+ } \
+ } \
+ } while (::testing::internal::AlwaysFalse())
+
+#define EXPECT_NONFATAL_FAILURE_ON_ALL_THREADS(statement, substr) \
+ do { \
+ ::testing::TestPartResultArray gtest_failures; \
+ ::testing::internal::SingleFailureChecker gtest_checker( \
+ &gtest_failures, ::testing::TestPartResult::kNonFatalFailure, \
+ (substr)); \
+ { \
+ ::testing::ScopedFakeTestPartResultReporter gtest_reporter( \
+ ::testing::ScopedFakeTestPartResultReporter::INTERCEPT_ALL_THREADS, \
+ &gtest_failures); \
+ if (::testing::internal::AlwaysTrue()) { \
+ statement; \
+ } \
+ } \
+ } while (::testing::internal::AlwaysFalse())
+
+#endif // GOOGLETEST_INCLUDE_GTEST_GTEST_SPI_H_
diff --git a/media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest-test-part.h b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest-test-part.h
new file mode 100644
index 0000000000..09cc8c34f0
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest-test-part.h
@@ -0,0 +1,190 @@
+// Copyright 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// IWYU pragma: private, include "gtest/gtest.h"
+// IWYU pragma: friend gtest/.*
+// IWYU pragma: friend gmock/.*
+
+#ifndef GOOGLETEST_INCLUDE_GTEST_GTEST_TEST_PART_H_
+#define GOOGLETEST_INCLUDE_GTEST_GTEST_TEST_PART_H_
+
+#include <iosfwd>
+#include <vector>
+
+#include "gtest/internal/gtest-internal.h"
+#include "gtest/internal/gtest-string.h"
+
+GTEST_DISABLE_MSC_WARNINGS_PUSH_(4251 \
+/* class A needs to have dll-interface to be used by clients of class B */)
+
+namespace testing {
+
+// A copyable object representing the result of a test part (i.e. an
+// assertion or an explicit FAIL(), ADD_FAILURE(), or SUCCESS()).
+//
+// Don't inherit from TestPartResult as its destructor is not virtual.
+class GTEST_API_ TestPartResult {
+ public:
+ // The possible outcomes of a test part (i.e. an assertion or an
+ // explicit SUCCEED(), FAIL(), or ADD_FAILURE()).
+ enum Type {
+ kSuccess, // Succeeded.
+ kNonFatalFailure, // Failed but the test can continue.
+ kFatalFailure, // Failed and the test should be terminated.
+ kSkip // Skipped.
+ };
+
+ // C'tor. TestPartResult does NOT have a default constructor.
+ // Always use this constructor (with parameters) to create a
+ // TestPartResult object.
+ TestPartResult(Type a_type, const char* a_file_name, int a_line_number,
+ const char* a_message)
+ : type_(a_type),
+ file_name_(a_file_name == nullptr ? "" : a_file_name),
+ line_number_(a_line_number),
+ summary_(ExtractSummary(a_message)),
+ message_(a_message) {}
+
+ // Gets the outcome of the test part.
+ Type type() const { return type_; }
+
+ // Gets the name of the source file where the test part took place, or
+ // NULL if it's unknown.
+ const char* file_name() const {
+ return file_name_.empty() ? nullptr : file_name_.c_str();
+ }
+
+ // Gets the line in the source file where the test part took place,
+ // or -1 if it's unknown.
+ int line_number() const { return line_number_; }
+
+ // Gets the summary of the failure message.
+ const char* summary() const { return summary_.c_str(); }
+
+ // Gets the message associated with the test part.
+ const char* message() const { return message_.c_str(); }
+
+ // Returns true if and only if the test part was skipped.
+ bool skipped() const { return type_ == kSkip; }
+
+ // Returns true if and only if the test part passed.
+ bool passed() const { return type_ == kSuccess; }
+
+ // Returns true if and only if the test part non-fatally failed.
+ bool nonfatally_failed() const { return type_ == kNonFatalFailure; }
+
+ // Returns true if and only if the test part fatally failed.
+ bool fatally_failed() const { return type_ == kFatalFailure; }
+
+ // Returns true if and only if the test part failed.
+ bool failed() const { return fatally_failed() || nonfatally_failed(); }
+
+ private:
+ Type type_;
+
+ // Gets the summary of the failure message by omitting the stack
+ // trace in it.
+ static std::string ExtractSummary(const char* message);
+
+ // The name of the source file where the test part took place, or
+ // "" if the source file is unknown.
+ std::string file_name_;
+ // The line in the source file where the test part took place, or -1
+ // if the line number is unknown.
+ int line_number_;
+ std::string summary_; // The test failure summary.
+ std::string message_; // The test failure message.
+};
+
+// Prints a TestPartResult object.
+std::ostream& operator<<(std::ostream& os, const TestPartResult& result);
+
+// An array of TestPartResult objects.
+//
+// Don't inherit from TestPartResultArray as its destructor is not
+// virtual.
+class GTEST_API_ TestPartResultArray {
+ public:
+ TestPartResultArray() {}
+
+ // Appends the given TestPartResult to the array.
+ void Append(const TestPartResult& result);
+
+ // Returns the TestPartResult at the given index (0-based).
+ const TestPartResult& GetTestPartResult(int index) const;
+
+ // Returns the number of TestPartResult objects in the array.
+ int size() const;
+
+ private:
+ std::vector<TestPartResult> array_;
+
+ TestPartResultArray(const TestPartResultArray&) = delete;
+ TestPartResultArray& operator=(const TestPartResultArray&) = delete;
+};
+
+// This interface knows how to report a test part result.
+class GTEST_API_ TestPartResultReporterInterface {
+ public:
+ virtual ~TestPartResultReporterInterface() {}
+
+ virtual void ReportTestPartResult(const TestPartResult& result) = 0;
+};
+
+namespace internal {
+
+// This helper class is used by {ASSERT|EXPECT}_NO_FATAL_FAILURE to check if a
+// statement generates new fatal failures. To do so it registers itself as the
+// current test part result reporter. Besides checking if fatal failures were
+// reported, it only delegates the reporting to the former result reporter.
+// The original result reporter is restored in the destructor.
+// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+class GTEST_API_ HasNewFatalFailureHelper
+ : public TestPartResultReporterInterface {
+ public:
+ HasNewFatalFailureHelper();
+ ~HasNewFatalFailureHelper() override;
+ void ReportTestPartResult(const TestPartResult& result) override;
+ bool has_new_fatal_failure() const { return has_new_fatal_failure_; }
+
+ private:
+ bool has_new_fatal_failure_;
+ TestPartResultReporterInterface* original_reporter_;
+
+ HasNewFatalFailureHelper(const HasNewFatalFailureHelper&) = delete;
+ HasNewFatalFailureHelper& operator=(const HasNewFatalFailureHelper&) = delete;
+};
+
+} // namespace internal
+
+} // namespace testing
+
+GTEST_DISABLE_MSC_WARNINGS_POP_() // 4251
+
+#endif // GOOGLETEST_INCLUDE_GTEST_GTEST_TEST_PART_H_
diff --git a/media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest-typed-test.h b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest-typed-test.h
new file mode 100644
index 0000000000..bd35a32660
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest-typed-test.h
@@ -0,0 +1,331 @@
+// Copyright 2008 Google Inc.
+// All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// IWYU pragma: private, include "gtest/gtest.h"
+// IWYU pragma: friend gtest/.*
+// IWYU pragma: friend gmock/.*
+
+#ifndef GOOGLETEST_INCLUDE_GTEST_GTEST_TYPED_TEST_H_
+#define GOOGLETEST_INCLUDE_GTEST_GTEST_TYPED_TEST_H_
+
+// This header implements typed tests and type-parameterized tests.
+
+// Typed (aka type-driven) tests repeat the same test for types in a
+// list. You must know which types you want to test with when writing
+// typed tests. Here's how you do it:
+
+#if 0
+
+// First, define a fixture class template. It should be parameterized
+// by a type. Remember to derive it from testing::Test.
+template <typename T>
+class FooTest : public testing::Test {
+ public:
+ ...
+ typedef std::list<T> List;
+ static T shared_;
+ T value_;
+};
+
+// Next, associate a list of types with the test suite, which will be
+// repeated for each type in the list. The typedef is necessary for
+// the macro to parse correctly.
+typedef testing::Types<char, int, unsigned int> MyTypes;
+TYPED_TEST_SUITE(FooTest, MyTypes);
+
+// If the type list contains only one type, you can write that type
+// directly without Types<...>:
+// TYPED_TEST_SUITE(FooTest, int);
+
+// Then, use TYPED_TEST() instead of TEST_F() to define as many typed
+// tests for this test suite as you want.
+TYPED_TEST(FooTest, DoesBlah) {
+ // Inside a test, refer to the special name TypeParam to get the type
+ // parameter. Since we are inside a derived class template, C++ requires
+ // us to visit the members of FooTest via 'this'.
+ TypeParam n = this->value_;
+
+ // To visit static members of the fixture, add the TestFixture::
+ // prefix.
+ n += TestFixture::shared_;
+
+ // To refer to typedefs in the fixture, add the "typename
+ // TestFixture::" prefix.
+ typename TestFixture::List values;
+ values.push_back(n);
+ ...
+}
+
+TYPED_TEST(FooTest, HasPropertyA) { ... }
+
+// TYPED_TEST_SUITE takes an optional third argument which allows to specify a
+// class that generates custom test name suffixes based on the type. This should
+// be a class which has a static template function GetName(int index) returning
+// a string for each type. The provided integer index equals the index of the
+// type in the provided type list. In many cases the index can be ignored.
+//
+// For example:
+// class MyTypeNames {
+// public:
+// template <typename T>
+// static std::string GetName(int) {
+// if (std::is_same<T, char>()) return "char";
+// if (std::is_same<T, int>()) return "int";
+// if (std::is_same<T, unsigned int>()) return "unsignedInt";
+// }
+// };
+// TYPED_TEST_SUITE(FooTest, MyTypes, MyTypeNames);
+
+#endif // 0
+
+// Type-parameterized tests are abstract test patterns parameterized
+// by a type. Compared with typed tests, type-parameterized tests
+// allow you to define the test pattern without knowing what the type
+// parameters are. The defined pattern can be instantiated with
+// different types any number of times, in any number of translation
+// units.
+//
+// If you are designing an interface or concept, you can define a
+// suite of type-parameterized tests to verify properties that any
+// valid implementation of the interface/concept should have. Then,
+// each implementation can easily instantiate the test suite to verify
+// that it conforms to the requirements, without having to write
+// similar tests repeatedly. Here's an example:
+
+#if 0
+
+// First, define a fixture class template. It should be parameterized
+// by a type. Remember to derive it from testing::Test.
+template <typename T>
+class FooTest : public testing::Test {
+ ...
+};
+
+// Next, declare that you will define a type-parameterized test suite
+// (the _P suffix is for "parameterized" or "pattern", whichever you
+// prefer):
+TYPED_TEST_SUITE_P(FooTest);
+
+// Then, use TYPED_TEST_P() to define as many type-parameterized tests
+// for this type-parameterized test suite as you want.
+TYPED_TEST_P(FooTest, DoesBlah) {
+ // Inside a test, refer to TypeParam to get the type parameter.
+ TypeParam n = 0;
+ ...
+}
+
+TYPED_TEST_P(FooTest, HasPropertyA) { ... }
+
+// Now the tricky part: you need to register all test patterns before
+// you can instantiate them. The first argument of the macro is the
+// test suite name; the rest are the names of the tests in this test
+// case.
+REGISTER_TYPED_TEST_SUITE_P(FooTest,
+ DoesBlah, HasPropertyA);
+
+// Finally, you are free to instantiate the pattern with the types you
+// want. If you put the above code in a header file, you can #include
+// it in multiple C++ source files and instantiate it multiple times.
+//
+// To distinguish different instances of the pattern, the first
+// argument to the INSTANTIATE_* macro is a prefix that will be added
+// to the actual test suite name. Remember to pick unique prefixes for
+// different instances.
+typedef testing::Types<char, int, unsigned int> MyTypes;
+INSTANTIATE_TYPED_TEST_SUITE_P(My, FooTest, MyTypes);
+
+// If the type list contains only one type, you can write that type
+// directly without Types<...>:
+// INSTANTIATE_TYPED_TEST_SUITE_P(My, FooTest, int);
+//
+// Similar to the optional argument of TYPED_TEST_SUITE above,
+// INSTANTIATE_TEST_SUITE_P takes an optional fourth argument which allows to
+// generate custom names.
+// INSTANTIATE_TYPED_TEST_SUITE_P(My, FooTest, MyTypes, MyTypeNames);
+
+#endif // 0
+
+#include "gtest/internal/gtest-internal.h"
+#include "gtest/internal/gtest-port.h"
+#include "gtest/internal/gtest-type-util.h"
+
+// Implements typed tests.
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// Expands to the name of the typedef for the type parameters of the
+// given test suite.
+#define GTEST_TYPE_PARAMS_(TestSuiteName) gtest_type_params_##TestSuiteName##_
+
+// Expands to the name of the typedef for the NameGenerator, responsible for
+// creating the suffixes of the name.
+#define GTEST_NAME_GENERATOR_(TestSuiteName) \
+ gtest_type_params_##TestSuiteName##_NameGenerator
+
+#define TYPED_TEST_SUITE(CaseName, Types, ...) \
+ typedef ::testing::internal::GenerateTypeList<Types>::type \
+ GTEST_TYPE_PARAMS_(CaseName); \
+ typedef ::testing::internal::NameGeneratorSelector<__VA_ARGS__>::type \
+ GTEST_NAME_GENERATOR_(CaseName)
+
+#define TYPED_TEST(CaseName, TestName) \
+ static_assert(sizeof(GTEST_STRINGIFY_(TestName)) > 1, \
+ "test-name must not be empty"); \
+ template <typename gtest_TypeParam_> \
+ class GTEST_TEST_CLASS_NAME_(CaseName, TestName) \
+ : public CaseName<gtest_TypeParam_> { \
+ private: \
+ typedef CaseName<gtest_TypeParam_> TestFixture; \
+ typedef gtest_TypeParam_ TypeParam; \
+ void TestBody() override; \
+ }; \
+ static bool gtest_##CaseName##_##TestName##_registered_ \
+ GTEST_ATTRIBUTE_UNUSED_ = ::testing::internal::TypeParameterizedTest< \
+ CaseName, \
+ ::testing::internal::TemplateSel<GTEST_TEST_CLASS_NAME_(CaseName, \
+ TestName)>, \
+ GTEST_TYPE_PARAMS_( \
+ CaseName)>::Register("", \
+ ::testing::internal::CodeLocation( \
+ __FILE__, __LINE__), \
+ GTEST_STRINGIFY_(CaseName), \
+ GTEST_STRINGIFY_(TestName), 0, \
+ ::testing::internal::GenerateNames< \
+ GTEST_NAME_GENERATOR_(CaseName), \
+ GTEST_TYPE_PARAMS_(CaseName)>()); \
+ template <typename gtest_TypeParam_> \
+ void GTEST_TEST_CLASS_NAME_(CaseName, \
+ TestName)<gtest_TypeParam_>::TestBody()
+
+// Legacy API is deprecated but still available
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+#define TYPED_TEST_CASE \
+ static_assert(::testing::internal::TypedTestCaseIsDeprecated(), ""); \
+ TYPED_TEST_SUITE
+#endif // GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+
+// Implements type-parameterized tests.
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// Expands to the namespace name that the type-parameterized tests for
+// the given type-parameterized test suite are defined in. The exact
+// name of the namespace is subject to change without notice.
+#define GTEST_SUITE_NAMESPACE_(TestSuiteName) gtest_suite_##TestSuiteName##_
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// Expands to the name of the variable used to remember the names of
+// the defined tests in the given test suite.
+#define GTEST_TYPED_TEST_SUITE_P_STATE_(TestSuiteName) \
+ gtest_typed_test_suite_p_state_##TestSuiteName##_
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE DIRECTLY.
+//
+// Expands to the name of the variable used to remember the names of
+// the registered tests in the given test suite.
+#define GTEST_REGISTERED_TEST_NAMES_(TestSuiteName) \
+ gtest_registered_test_names_##TestSuiteName##_
+
+// The variables defined in the type-parameterized test macros are
+// static as typically these macros are used in a .h file that can be
+// #included in multiple translation units linked together.
+#define TYPED_TEST_SUITE_P(SuiteName) \
+ static ::testing::internal::TypedTestSuitePState \
+ GTEST_TYPED_TEST_SUITE_P_STATE_(SuiteName)
+
+// Legacy API is deprecated but still available
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+#define TYPED_TEST_CASE_P \
+ static_assert(::testing::internal::TypedTestCase_P_IsDeprecated(), ""); \
+ TYPED_TEST_SUITE_P
+#endif // GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+
+#define TYPED_TEST_P(SuiteName, TestName) \
+ namespace GTEST_SUITE_NAMESPACE_(SuiteName) { \
+ template <typename gtest_TypeParam_> \
+ class TestName : public SuiteName<gtest_TypeParam_> { \
+ private: \
+ typedef SuiteName<gtest_TypeParam_> TestFixture; \
+ typedef gtest_TypeParam_ TypeParam; \
+ void TestBody() override; \
+ }; \
+ static bool gtest_##TestName##_defined_ GTEST_ATTRIBUTE_UNUSED_ = \
+ GTEST_TYPED_TEST_SUITE_P_STATE_(SuiteName).AddTestName( \
+ __FILE__, __LINE__, GTEST_STRINGIFY_(SuiteName), \
+ GTEST_STRINGIFY_(TestName)); \
+ } \
+ template <typename gtest_TypeParam_> \
+ void GTEST_SUITE_NAMESPACE_( \
+ SuiteName)::TestName<gtest_TypeParam_>::TestBody()
+
+// Note: this won't work correctly if the trailing arguments are macros.
+#define REGISTER_TYPED_TEST_SUITE_P(SuiteName, ...) \
+ namespace GTEST_SUITE_NAMESPACE_(SuiteName) { \
+ typedef ::testing::internal::Templates<__VA_ARGS__> gtest_AllTests_; \
+ } \
+ static const char* const GTEST_REGISTERED_TEST_NAMES_( \
+ SuiteName) GTEST_ATTRIBUTE_UNUSED_ = \
+ GTEST_TYPED_TEST_SUITE_P_STATE_(SuiteName).VerifyRegisteredTestNames( \
+ GTEST_STRINGIFY_(SuiteName), __FILE__, __LINE__, #__VA_ARGS__)
+
+// Legacy API is deprecated but still available
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+#define REGISTER_TYPED_TEST_CASE_P \
+ static_assert(::testing::internal::RegisterTypedTestCase_P_IsDeprecated(), \
+ ""); \
+ REGISTER_TYPED_TEST_SUITE_P
+#endif // GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+
+#define INSTANTIATE_TYPED_TEST_SUITE_P(Prefix, SuiteName, Types, ...) \
+ static_assert(sizeof(GTEST_STRINGIFY_(Prefix)) > 1, \
+ "test-suit-prefix must not be empty"); \
+ static bool gtest_##Prefix##_##SuiteName GTEST_ATTRIBUTE_UNUSED_ = \
+ ::testing::internal::TypeParameterizedTestSuite< \
+ SuiteName, GTEST_SUITE_NAMESPACE_(SuiteName)::gtest_AllTests_, \
+ ::testing::internal::GenerateTypeList<Types>::type>:: \
+ Register(GTEST_STRINGIFY_(Prefix), \
+ ::testing::internal::CodeLocation(__FILE__, __LINE__), \
+ &GTEST_TYPED_TEST_SUITE_P_STATE_(SuiteName), \
+ GTEST_STRINGIFY_(SuiteName), \
+ GTEST_REGISTERED_TEST_NAMES_(SuiteName), \
+ ::testing::internal::GenerateNames< \
+ ::testing::internal::NameGeneratorSelector< \
+ __VA_ARGS__>::type, \
+ ::testing::internal::GenerateTypeList<Types>::type>())
+
+// Legacy API is deprecated but still available
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+#define INSTANTIATE_TYPED_TEST_CASE_P \
+ static_assert( \
+ ::testing::internal::InstantiateTypedTestCase_P_IsDeprecated(), ""); \
+ INSTANTIATE_TYPED_TEST_SUITE_P
+#endif // GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+
+#endif // GOOGLETEST_INCLUDE_GTEST_GTEST_TYPED_TEST_H_
diff --git a/media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest.h b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest.h
new file mode 100644
index 0000000000..d19a587a18
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest.h
@@ -0,0 +1,2297 @@
+// Copyright 2005, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// The Google C++ Testing and Mocking Framework (Google Test)
+//
+// This header file defines the public API for Google Test. It should be
+// included by any test program that uses Google Test.
+//
+// IMPORTANT NOTE: Due to limitation of the C++ language, we have to
+// leave some internal implementation details in this header file.
+// They are clearly marked by comments like this:
+//
+// // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+//
+// Such code is NOT meant to be used by a user directly, and is subject
+// to CHANGE WITHOUT NOTICE. Therefore DO NOT DEPEND ON IT in a user
+// program!
+//
+// Acknowledgment: Google Test borrowed the idea of automatic test
+// registration from Barthelemy Dagenais' (barthelemy@prologique.com)
+// easyUnit framework.
+
+#ifndef GOOGLETEST_INCLUDE_GTEST_GTEST_H_
+#define GOOGLETEST_INCLUDE_GTEST_GTEST_H_
+
+#include <cstddef>
+#include <limits>
+#include <memory>
+#include <ostream>
+#include <type_traits>
+#include <vector>
+
+#include "gtest/gtest-assertion-result.h"
+#include "gtest/gtest-death-test.h"
+#include "gtest/gtest-matchers.h"
+#include "gtest/gtest-message.h"
+#include "gtest/gtest-param-test.h"
+#include "gtest/gtest-printers.h"
+#include "gtest/gtest-test-part.h"
+#include "gtest/gtest-typed-test.h"
+#include "gtest/gtest_pred_impl.h"
+#include "gtest/gtest_prod.h"
+#include "gtest/internal/gtest-internal.h"
+#include "gtest/internal/gtest-string.h"
+
+GTEST_DISABLE_MSC_WARNINGS_PUSH_(4251 \
+/* class A needs to have dll-interface to be used by clients of class B */)
+
+// Declares the flags.
+
+// This flag temporary enables the disabled tests.
+GTEST_DECLARE_bool_(also_run_disabled_tests);
+
+// This flag brings the debugger on an assertion failure.
+GTEST_DECLARE_bool_(break_on_failure);
+
+// This flag controls whether Google Test catches all test-thrown exceptions
+// and logs them as failures.
+GTEST_DECLARE_bool_(catch_exceptions);
+
+// This flag enables using colors in terminal output. Available values are
+// "yes" to enable colors, "no" (disable colors), or "auto" (the default)
+// to let Google Test decide.
+GTEST_DECLARE_string_(color);
+
+// This flag controls whether the test runner should continue execution past
+// first failure.
+GTEST_DECLARE_bool_(fail_fast);
+
+// This flag sets up the filter to select by name using a glob pattern
+// the tests to run. If the filter is not given all tests are executed.
+GTEST_DECLARE_string_(filter);
+
+// This flag controls whether Google Test installs a signal handler that dumps
+// debugging information when fatal signals are raised.
+GTEST_DECLARE_bool_(install_failure_signal_handler);
+
+// This flag causes the Google Test to list tests. None of the tests listed
+// are actually run if the flag is provided.
+GTEST_DECLARE_bool_(list_tests);
+
+// This flag controls whether Google Test emits a detailed XML report to a file
+// in addition to its normal textual output.
+GTEST_DECLARE_string_(output);
+
+// This flags control whether Google Test prints only test failures.
+GTEST_DECLARE_bool_(brief);
+
+// This flags control whether Google Test prints the elapsed time for each
+// test.
+GTEST_DECLARE_bool_(print_time);
+
+// This flags control whether Google Test prints UTF8 characters as text.
+GTEST_DECLARE_bool_(print_utf8);
+
+// This flag specifies the random number seed.
+GTEST_DECLARE_int32_(random_seed);
+
+// This flag sets how many times the tests are repeated. The default value
+// is 1. If the value is -1 the tests are repeating forever.
+GTEST_DECLARE_int32_(repeat);
+
+// This flag controls whether Google Test Environments are recreated for each
+// repeat of the tests. The default value is true. If set to false the global
+// test Environment objects are only set up once, for the first iteration, and
+// only torn down once, for the last.
+GTEST_DECLARE_bool_(recreate_environments_when_repeating);
+
+// This flag controls whether Google Test includes Google Test internal
+// stack frames in failure stack traces.
+GTEST_DECLARE_bool_(show_internal_stack_frames);
+
+// When this flag is specified, tests' order is randomized on every iteration.
+GTEST_DECLARE_bool_(shuffle);
+
+// This flag specifies the maximum number of stack frames to be
+// printed in a failure message.
+GTEST_DECLARE_int32_(stack_trace_depth);
+
+// When this flag is specified, a failed assertion will throw an
+// exception if exceptions are enabled, or exit the program with a
+// non-zero code otherwise. For use with an external test framework.
+GTEST_DECLARE_bool_(throw_on_failure);
+
+// When this flag is set with a "host:port" string, on supported
+// platforms test results are streamed to the specified port on
+// the specified host machine.
+GTEST_DECLARE_string_(stream_result_to);
+
+#if GTEST_USE_OWN_FLAGFILE_FLAG_
+GTEST_DECLARE_string_(flagfile);
+#endif // GTEST_USE_OWN_FLAGFILE_FLAG_
+
+namespace testing {
+
+// Silence C4100 (unreferenced formal parameter) and 4805
+// unsafe mix of type 'const int' and type 'const bool'
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable : 4805)
+#pragma warning(disable : 4100)
+#endif
+
+// The upper limit for valid stack trace depths.
+const int kMaxStackTraceDepth = 100;
+
+namespace internal {
+
+class AssertHelper;
+class DefaultGlobalTestPartResultReporter;
+class ExecDeathTest;
+class NoExecDeathTest;
+class FinalSuccessChecker;
+class GTestFlagSaver;
+class StreamingListenerTest;
+class TestResultAccessor;
+class TestEventListenersAccessor;
+class TestEventRepeater;
+class UnitTestRecordPropertyTestHelper;
+class WindowsDeathTest;
+class FuchsiaDeathTest;
+class UnitTestImpl* GetUnitTestImpl();
+void ReportFailureInUnknownLocation(TestPartResult::Type result_type,
+ const std::string& message);
+std::set<std::string>* GetIgnoredParameterizedTestSuites();
+
+} // namespace internal
+
+// The friend relationship of some of these classes is cyclic.
+// If we don't forward declare them the compiler might confuse the classes
+// in friendship clauses with same named classes on the scope.
+class Test;
+class TestSuite;
+
+// Old API is still available but deprecated
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+using TestCase = TestSuite;
+#endif
+class TestInfo;
+class UnitTest;
+
+// The abstract class that all tests inherit from.
+//
+// In Google Test, a unit test program contains one or many TestSuites, and
+// each TestSuite contains one or many Tests.
+//
+// When you define a test using the TEST macro, you don't need to
+// explicitly derive from Test - the TEST macro automatically does
+// this for you.
+//
+// The only time you derive from Test is when defining a test fixture
+// to be used in a TEST_F. For example:
+//
+// class FooTest : public testing::Test {
+// protected:
+// void SetUp() override { ... }
+// void TearDown() override { ... }
+// ...
+// };
+//
+// TEST_F(FooTest, Bar) { ... }
+// TEST_F(FooTest, Baz) { ... }
+//
+// Test is not copyable.
+class GTEST_API_ Test {
+ public:
+ friend class TestInfo;
+
+ // The d'tor is virtual as we intend to inherit from Test.
+ virtual ~Test();
+
+ // Sets up the stuff shared by all tests in this test suite.
+ //
+ // Google Test will call Foo::SetUpTestSuite() before running the first
+ // test in test suite Foo. Hence a sub-class can define its own
+ // SetUpTestSuite() method to shadow the one defined in the super
+ // class.
+ static void SetUpTestSuite() {}
+
+ // Tears down the stuff shared by all tests in this test suite.
+ //
+ // Google Test will call Foo::TearDownTestSuite() after running the last
+ // test in test suite Foo. Hence a sub-class can define its own
+ // TearDownTestSuite() method to shadow the one defined in the super
+ // class.
+ static void TearDownTestSuite() {}
+
+ // Legacy API is deprecated but still available. Use SetUpTestSuite and
+ // TearDownTestSuite instead.
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+ static void TearDownTestCase() {}
+ static void SetUpTestCase() {}
+#endif // GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+
+ // Returns true if and only if the current test has a fatal failure.
+ static bool HasFatalFailure();
+
+ // Returns true if and only if the current test has a non-fatal failure.
+ static bool HasNonfatalFailure();
+
+ // Returns true if and only if the current test was skipped.
+ static bool IsSkipped();
+
+ // Returns true if and only if the current test has a (either fatal or
+ // non-fatal) failure.
+ static bool HasFailure() { return HasFatalFailure() || HasNonfatalFailure(); }
+
+ // Logs a property for the current test, test suite, or for the entire
+ // invocation of the test program when used outside of the context of a
+ // test suite. Only the last value for a given key is remembered. These
+ // are public static so they can be called from utility functions that are
+ // not members of the test fixture. Calls to RecordProperty made during
+ // lifespan of the test (from the moment its constructor starts to the
+ // moment its destructor finishes) will be output in XML as attributes of
+ // the <testcase> element. Properties recorded from fixture's
+ // SetUpTestSuite or TearDownTestSuite are logged as attributes of the
+ // corresponding <testsuite> element. Calls to RecordProperty made in the
+ // global context (before or after invocation of RUN_ALL_TESTS and from
+ // SetUp/TearDown method of Environment objects registered with Google
+ // Test) will be output as attributes of the <testsuites> element.
+ static void RecordProperty(const std::string& key, const std::string& value);
+ static void RecordProperty(const std::string& key, int value);
+
+ protected:
+ // Creates a Test object.
+ Test();
+
+ // Sets up the test fixture.
+ virtual void SetUp();
+
+ // Tears down the test fixture.
+ virtual void TearDown();
+
+ private:
+ // Returns true if and only if the current test has the same fixture class
+ // as the first test in the current test suite.
+ static bool HasSameFixtureClass();
+
+ // Runs the test after the test fixture has been set up.
+ //
+ // A sub-class must implement this to define the test logic.
+ //
+ // DO NOT OVERRIDE THIS FUNCTION DIRECTLY IN A USER PROGRAM.
+ // Instead, use the TEST or TEST_F macro.
+ virtual void TestBody() = 0;
+
+ // Sets up, executes, and tears down the test.
+ void Run();
+
+ // Deletes self. We deliberately pick an unusual name for this
+ // internal method to avoid clashing with names used in user TESTs.
+ void DeleteSelf_() { delete this; }
+
+ const std::unique_ptr<GTEST_FLAG_SAVER_> gtest_flag_saver_;
+
+ // Often a user misspells SetUp() as Setup() and spends a long time
+ // wondering why it is never called by Google Test. The declaration of
+ // the following method is solely for catching such an error at
+ // compile time:
+ //
+ // - The return type is deliberately chosen to be not void, so it
+ // will be a conflict if void Setup() is declared in the user's
+ // test fixture.
+ //
+ // - This method is private, so it will be another compiler error
+ // if the method is called from the user's test fixture.
+ //
+ // DO NOT OVERRIDE THIS FUNCTION.
+ //
+ // If you see an error about overriding the following function or
+ // about it being private, you have mis-spelled SetUp() as Setup().
+ struct Setup_should_be_spelled_SetUp {};
+ virtual Setup_should_be_spelled_SetUp* Setup() { return nullptr; }
+
+ // We disallow copying Tests.
+ Test(const Test&) = delete;
+ Test& operator=(const Test&) = delete;
+};
+
+typedef internal::TimeInMillis TimeInMillis;
+
+// A copyable object representing a user specified test property which can be
+// output as a key/value string pair.
+//
+// Don't inherit from TestProperty as its destructor is not virtual.
+class TestProperty {
+ public:
+ // C'tor. TestProperty does NOT have a default constructor.
+ // Always use this constructor (with parameters) to create a
+ // TestProperty object.
+ TestProperty(const std::string& a_key, const std::string& a_value)
+ : key_(a_key), value_(a_value) {}
+
+ // Gets the user supplied key.
+ const char* key() const { return key_.c_str(); }
+
+ // Gets the user supplied value.
+ const char* value() const { return value_.c_str(); }
+
+ // Sets a new value, overriding the one supplied in the constructor.
+ void SetValue(const std::string& new_value) { value_ = new_value; }
+
+ private:
+ // The key supplied by the user.
+ std::string key_;
+ // The value supplied by the user.
+ std::string value_;
+};
+
+// The result of a single Test. This includes a list of
+// TestPartResults, a list of TestProperties, a count of how many
+// death tests there are in the Test, and how much time it took to run
+// the Test.
+//
+// TestResult is not copyable.
+class GTEST_API_ TestResult {
+ public:
+ // Creates an empty TestResult.
+ TestResult();
+
+ // D'tor. Do not inherit from TestResult.
+ ~TestResult();
+
+ // Gets the number of all test parts. This is the sum of the number
+ // of successful test parts and the number of failed test parts.
+ int total_part_count() const;
+
+ // Returns the number of the test properties.
+ int test_property_count() const;
+
+ // Returns true if and only if the test passed (i.e. no test part failed).
+ bool Passed() const { return !Skipped() && !Failed(); }
+
+ // Returns true if and only if the test was skipped.
+ bool Skipped() const;
+
+ // Returns true if and only if the test failed.
+ bool Failed() const;
+
+ // Returns true if and only if the test fatally failed.
+ bool HasFatalFailure() const;
+
+ // Returns true if and only if the test has a non-fatal failure.
+ bool HasNonfatalFailure() const;
+
+ // Returns the elapsed time, in milliseconds.
+ TimeInMillis elapsed_time() const { return elapsed_time_; }
+
+ // Gets the time of the test case start, in ms from the start of the
+ // UNIX epoch.
+ TimeInMillis start_timestamp() const { return start_timestamp_; }
+
+ // Returns the i-th test part result among all the results. i can range from 0
+ // to total_part_count() - 1. If i is not in that range, aborts the program.
+ const TestPartResult& GetTestPartResult(int i) const;
+
+ // Returns the i-th test property. i can range from 0 to
+ // test_property_count() - 1. If i is not in that range, aborts the
+ // program.
+ const TestProperty& GetTestProperty(int i) const;
+
+ private:
+ friend class TestInfo;
+ friend class TestSuite;
+ friend class UnitTest;
+ friend class internal::DefaultGlobalTestPartResultReporter;
+ friend class internal::ExecDeathTest;
+ friend class internal::TestResultAccessor;
+ friend class internal::UnitTestImpl;
+ friend class internal::WindowsDeathTest;
+ friend class internal::FuchsiaDeathTest;
+
+ // Gets the vector of TestPartResults.
+ const std::vector<TestPartResult>& test_part_results() const {
+ return test_part_results_;
+ }
+
+ // Gets the vector of TestProperties.
+ const std::vector<TestProperty>& test_properties() const {
+ return test_properties_;
+ }
+
+ // Sets the start time.
+ void set_start_timestamp(TimeInMillis start) { start_timestamp_ = start; }
+
+ // Sets the elapsed time.
+ void set_elapsed_time(TimeInMillis elapsed) { elapsed_time_ = elapsed; }
+
+ // Adds a test property to the list. The property is validated and may add
+ // a non-fatal failure if invalid (e.g., if it conflicts with reserved
+ // key names). If a property is already recorded for the same key, the
+ // value will be updated, rather than storing multiple values for the same
+ // key. xml_element specifies the element for which the property is being
+ // recorded and is used for validation.
+ void RecordProperty(const std::string& xml_element,
+ const TestProperty& test_property);
+
+ // Adds a failure if the key is a reserved attribute of Google Test
+ // testsuite tags. Returns true if the property is valid.
+ // FIXME: Validate attribute names are legal and human readable.
+ static bool ValidateTestProperty(const std::string& xml_element,
+ const TestProperty& test_property);
+
+ // Adds a test part result to the list.
+ void AddTestPartResult(const TestPartResult& test_part_result);
+
+ // Returns the death test count.
+ int death_test_count() const { return death_test_count_; }
+
+ // Increments the death test count, returning the new count.
+ int increment_death_test_count() { return ++death_test_count_; }
+
+ // Clears the test part results.
+ void ClearTestPartResults();
+
+ // Clears the object.
+ void Clear();
+
+ // Protects mutable state of the property vector and of owned
+ // properties, whose values may be updated.
+ internal::Mutex test_properties_mutex_;
+
+ // The vector of TestPartResults
+ std::vector<TestPartResult> test_part_results_;
+ // The vector of TestProperties
+ std::vector<TestProperty> test_properties_;
+ // Running count of death tests.
+ int death_test_count_;
+ // The start time, in milliseconds since UNIX Epoch.
+ TimeInMillis start_timestamp_;
+ // The elapsed time, in milliseconds.
+ TimeInMillis elapsed_time_;
+
+ // We disallow copying TestResult.
+ TestResult(const TestResult&) = delete;
+ TestResult& operator=(const TestResult&) = delete;
+}; // class TestResult
+
+// A TestInfo object stores the following information about a test:
+//
+// Test suite name
+// Test name
+// Whether the test should be run
+// A function pointer that creates the test object when invoked
+// Test result
+//
+// The constructor of TestInfo registers itself with the UnitTest
+// singleton such that the RUN_ALL_TESTS() macro knows which tests to
+// run.
+class GTEST_API_ TestInfo {
+ public:
+ // Destructs a TestInfo object. This function is not virtual, so
+ // don't inherit from TestInfo.
+ ~TestInfo();
+
+ // Returns the test suite name.
+ const char* test_suite_name() const { return test_suite_name_.c_str(); }
+
+// Legacy API is deprecated but still available
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+ const char* test_case_name() const { return test_suite_name(); }
+#endif // GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+
+ // Returns the test name.
+ const char* name() const { return name_.c_str(); }
+
+ // Returns the name of the parameter type, or NULL if this is not a typed
+ // or a type-parameterized test.
+ const char* type_param() const {
+ if (type_param_.get() != nullptr) return type_param_->c_str();
+ return nullptr;
+ }
+
+ // Returns the text representation of the value parameter, or NULL if this
+ // is not a value-parameterized test.
+ const char* value_param() const {
+ if (value_param_.get() != nullptr) return value_param_->c_str();
+ return nullptr;
+ }
+
+ // Returns the file name where this test is defined.
+ const char* file() const { return location_.file.c_str(); }
+
+ // Returns the line where this test is defined.
+ int line() const { return location_.line; }
+
+ // Return true if this test should not be run because it's in another shard.
+ bool is_in_another_shard() const { return is_in_another_shard_; }
+
+ // Returns true if this test should run, that is if the test is not
+ // disabled (or it is disabled but the also_run_disabled_tests flag has
+ // been specified) and its full name matches the user-specified filter.
+ //
+ // Google Test allows the user to filter the tests by their full names.
+ // The full name of a test Bar in test suite Foo is defined as
+ // "Foo.Bar". Only the tests that match the filter will run.
+ //
+ // A filter is a colon-separated list of glob (not regex) patterns,
+ // optionally followed by a '-' and a colon-separated list of
+ // negative patterns (tests to exclude). A test is run if it
+ // matches one of the positive patterns and does not match any of
+ // the negative patterns.
+ //
+ // For example, *A*:Foo.* is a filter that matches any string that
+ // contains the character 'A' or starts with "Foo.".
+ bool should_run() const { return should_run_; }
+
+ // Returns true if and only if this test will appear in the XML report.
+ bool is_reportable() const {
+ // The XML report includes tests matching the filter, excluding those
+ // run in other shards.
+ return matches_filter_ && !is_in_another_shard_;
+ }
+
+ // Returns the result of the test.
+ const TestResult* result() const { return &result_; }
+
+ private:
+#if GTEST_HAS_DEATH_TEST
+ friend class internal::DefaultDeathTestFactory;
+#endif // GTEST_HAS_DEATH_TEST
+ friend class Test;
+ friend class TestSuite;
+ friend class internal::UnitTestImpl;
+ friend class internal::StreamingListenerTest;
+ friend TestInfo* internal::MakeAndRegisterTestInfo(
+ const char* test_suite_name, const char* name, const char* type_param,
+ const char* value_param, internal::CodeLocation code_location,
+ internal::TypeId fixture_class_id, internal::SetUpTestSuiteFunc set_up_tc,
+ internal::TearDownTestSuiteFunc tear_down_tc,
+ internal::TestFactoryBase* factory);
+
+ // Constructs a TestInfo object. The newly constructed instance assumes
+ // ownership of the factory object.
+ TestInfo(const std::string& test_suite_name, const std::string& name,
+ const char* a_type_param, // NULL if not a type-parameterized test
+ const char* a_value_param, // NULL if not a value-parameterized test
+ internal::CodeLocation a_code_location,
+ internal::TypeId fixture_class_id,
+ internal::TestFactoryBase* factory);
+
+ // Increments the number of death tests encountered in this test so
+ // far.
+ int increment_death_test_count() {
+ return result_.increment_death_test_count();
+ }
+
+ // Creates the test object, runs it, records its result, and then
+ // deletes it.
+ void Run();
+
+ // Skip and records the test result for this object.
+ void Skip();
+
+ static void ClearTestResult(TestInfo* test_info) {
+ test_info->result_.Clear();
+ }
+
+ // These fields are immutable properties of the test.
+ const std::string test_suite_name_; // test suite name
+ const std::string name_; // Test name
+ // Name of the parameter type, or NULL if this is not a typed or a
+ // type-parameterized test.
+ const std::unique_ptr<const ::std::string> type_param_;
+ // Text representation of the value parameter, or NULL if this is not a
+ // value-parameterized test.
+ const std::unique_ptr<const ::std::string> value_param_;
+ internal::CodeLocation location_;
+ const internal::TypeId fixture_class_id_; // ID of the test fixture class
+ bool should_run_; // True if and only if this test should run
+ bool is_disabled_; // True if and only if this test is disabled
+ bool matches_filter_; // True if this test matches the
+ // user-specified filter.
+ bool is_in_another_shard_; // Will be run in another shard.
+ internal::TestFactoryBase* const factory_; // The factory that creates
+ // the test object
+
+ // This field is mutable and needs to be reset before running the
+ // test for the second time.
+ TestResult result_;
+
+ TestInfo(const TestInfo&) = delete;
+ TestInfo& operator=(const TestInfo&) = delete;
+};
+
+// A test suite, which consists of a vector of TestInfos.
+//
+// TestSuite is not copyable.
+class GTEST_API_ TestSuite {
+ public:
+ // Creates a TestSuite with the given name.
+ //
+ // TestSuite does NOT have a default constructor. Always use this
+ // constructor to create a TestSuite object.
+ //
+ // Arguments:
+ //
+ // name: name of the test suite
+ // a_type_param: the name of the test's type parameter, or NULL if
+ // this is not a type-parameterized test.
+ // set_up_tc: pointer to the function that sets up the test suite
+ // tear_down_tc: pointer to the function that tears down the test suite
+ TestSuite(const char* name, const char* a_type_param,
+ internal::SetUpTestSuiteFunc set_up_tc,
+ internal::TearDownTestSuiteFunc tear_down_tc);
+
+ // Destructor of TestSuite.
+ virtual ~TestSuite();
+
+ // Gets the name of the TestSuite.
+ const char* name() const { return name_.c_str(); }
+
+ // Returns the name of the parameter type, or NULL if this is not a
+ // type-parameterized test suite.
+ const char* type_param() const {
+ if (type_param_.get() != nullptr) return type_param_->c_str();
+ return nullptr;
+ }
+
+ // Returns true if any test in this test suite should run.
+ bool should_run() const { return should_run_; }
+
+ // Gets the number of successful tests in this test suite.
+ int successful_test_count() const;
+
+ // Gets the number of skipped tests in this test suite.
+ int skipped_test_count() const;
+
+ // Gets the number of failed tests in this test suite.
+ int failed_test_count() const;
+
+ // Gets the number of disabled tests that will be reported in the XML report.
+ int reportable_disabled_test_count() const;
+
+ // Gets the number of disabled tests in this test suite.
+ int disabled_test_count() const;
+
+ // Gets the number of tests to be printed in the XML report.
+ int reportable_test_count() const;
+
+ // Get the number of tests in this test suite that should run.
+ int test_to_run_count() const;
+
+ // Gets the number of all tests in this test suite.
+ int total_test_count() const;
+
+ // Returns true if and only if the test suite passed.
+ bool Passed() const { return !Failed(); }
+
+ // Returns true if and only if the test suite failed.
+ bool Failed() const {
+ return failed_test_count() > 0 || ad_hoc_test_result().Failed();
+ }
+
+ // Returns the elapsed time, in milliseconds.
+ TimeInMillis elapsed_time() const { return elapsed_time_; }
+
+ // Gets the time of the test suite start, in ms from the start of the
+ // UNIX epoch.
+ TimeInMillis start_timestamp() const { return start_timestamp_; }
+
+ // Returns the i-th test among all the tests. i can range from 0 to
+ // total_test_count() - 1. If i is not in that range, returns NULL.
+ const TestInfo* GetTestInfo(int i) const;
+
+ // Returns the TestResult that holds test properties recorded during
+ // execution of SetUpTestSuite and TearDownTestSuite.
+ const TestResult& ad_hoc_test_result() const { return ad_hoc_test_result_; }
+
+ private:
+ friend class Test;
+ friend class internal::UnitTestImpl;
+
+ // Gets the (mutable) vector of TestInfos in this TestSuite.
+ std::vector<TestInfo*>& test_info_list() { return test_info_list_; }
+
+ // Gets the (immutable) vector of TestInfos in this TestSuite.
+ const std::vector<TestInfo*>& test_info_list() const {
+ return test_info_list_;
+ }
+
+ // Returns the i-th test among all the tests. i can range from 0 to
+ // total_test_count() - 1. If i is not in that range, returns NULL.
+ TestInfo* GetMutableTestInfo(int i);
+
+ // Sets the should_run member.
+ void set_should_run(bool should) { should_run_ = should; }
+
+ // Adds a TestInfo to this test suite. Will delete the TestInfo upon
+ // destruction of the TestSuite object.
+ void AddTestInfo(TestInfo* test_info);
+
+ // Clears the results of all tests in this test suite.
+ void ClearResult();
+
+ // Clears the results of all tests in the given test suite.
+ static void ClearTestSuiteResult(TestSuite* test_suite) {
+ test_suite->ClearResult();
+ }
+
+ // Runs every test in this TestSuite.
+ void Run();
+
+ // Skips the execution of tests under this TestSuite
+ void Skip();
+
+ // Runs SetUpTestSuite() for this TestSuite. This wrapper is needed
+ // for catching exceptions thrown from SetUpTestSuite().
+ void RunSetUpTestSuite() {
+ if (set_up_tc_ != nullptr) {
+ (*set_up_tc_)();
+ }
+ }
+
+ // Runs TearDownTestSuite() for this TestSuite. This wrapper is
+ // needed for catching exceptions thrown from TearDownTestSuite().
+ void RunTearDownTestSuite() {
+ if (tear_down_tc_ != nullptr) {
+ (*tear_down_tc_)();
+ }
+ }
+
+ // Returns true if and only if test passed.
+ static bool TestPassed(const TestInfo* test_info) {
+ return test_info->should_run() && test_info->result()->Passed();
+ }
+
+ // Returns true if and only if test skipped.
+ static bool TestSkipped(const TestInfo* test_info) {
+ return test_info->should_run() && test_info->result()->Skipped();
+ }
+
+ // Returns true if and only if test failed.
+ static bool TestFailed(const TestInfo* test_info) {
+ return test_info->should_run() && test_info->result()->Failed();
+ }
+
+ // Returns true if and only if the test is disabled and will be reported in
+ // the XML report.
+ static bool TestReportableDisabled(const TestInfo* test_info) {
+ return test_info->is_reportable() && test_info->is_disabled_;
+ }
+
+ // Returns true if and only if test is disabled.
+ static bool TestDisabled(const TestInfo* test_info) {
+ return test_info->is_disabled_;
+ }
+
+ // Returns true if and only if this test will appear in the XML report.
+ static bool TestReportable(const TestInfo* test_info) {
+ return test_info->is_reportable();
+ }
+
+ // Returns true if the given test should run.
+ static bool ShouldRunTest(const TestInfo* test_info) {
+ return test_info->should_run();
+ }
+
+ // Shuffles the tests in this test suite.
+ void ShuffleTests(internal::Random* random);
+
+ // Restores the test order to before the first shuffle.
+ void UnshuffleTests();
+
+ // Name of the test suite.
+ std::string name_;
+ // Name of the parameter type, or NULL if this is not a typed or a
+ // type-parameterized test.
+ const std::unique_ptr<const ::std::string> type_param_;
+ // The vector of TestInfos in their original order. It owns the
+ // elements in the vector.
+ std::vector<TestInfo*> test_info_list_;
+ // Provides a level of indirection for the test list to allow easy
+ // shuffling and restoring the test order. The i-th element in this
+ // vector is the index of the i-th test in the shuffled test list.
+ std::vector<int> test_indices_;
+ // Pointer to the function that sets up the test suite.
+ internal::SetUpTestSuiteFunc set_up_tc_;
+ // Pointer to the function that tears down the test suite.
+ internal::TearDownTestSuiteFunc tear_down_tc_;
+ // True if and only if any test in this test suite should run.
+ bool should_run_;
+ // The start time, in milliseconds since UNIX Epoch.
+ TimeInMillis start_timestamp_;
+ // Elapsed time, in milliseconds.
+ TimeInMillis elapsed_time_;
+ // Holds test properties recorded during execution of SetUpTestSuite and
+ // TearDownTestSuite.
+ TestResult ad_hoc_test_result_;
+
+ // We disallow copying TestSuites.
+ TestSuite(const TestSuite&) = delete;
+ TestSuite& operator=(const TestSuite&) = delete;
+};
+
+// An Environment object is capable of setting up and tearing down an
+// environment. You should subclass this to define your own
+// environment(s).
+//
+// An Environment object does the set-up and tear-down in virtual
+// methods SetUp() and TearDown() instead of the constructor and the
+// destructor, as:
+//
+// 1. You cannot safely throw from a destructor. This is a problem
+// as in some cases Google Test is used where exceptions are enabled, and
+// we may want to implement ASSERT_* using exceptions where they are
+// available.
+// 2. You cannot use ASSERT_* directly in a constructor or
+// destructor.
+class Environment {
+ public:
+ // The d'tor is virtual as we need to subclass Environment.
+ virtual ~Environment() {}
+
+ // Override this to define how to set up the environment.
+ virtual void SetUp() {}
+
+ // Override this to define how to tear down the environment.
+ virtual void TearDown() {}
+
+ private:
+ // If you see an error about overriding the following function or
+ // about it being private, you have mis-spelled SetUp() as Setup().
+ struct Setup_should_be_spelled_SetUp {};
+ virtual Setup_should_be_spelled_SetUp* Setup() { return nullptr; }
+};
+
+#if GTEST_HAS_EXCEPTIONS
+
+// Exception which can be thrown from TestEventListener::OnTestPartResult.
+class GTEST_API_ AssertionException
+ : public internal::GoogleTestFailureException {
+ public:
+ explicit AssertionException(const TestPartResult& result)
+ : GoogleTestFailureException(result) {}
+};
+
+#endif // GTEST_HAS_EXCEPTIONS
+
+// The interface for tracing execution of tests. The methods are organized in
+// the order the corresponding events are fired.
+class TestEventListener {
+ public:
+ virtual ~TestEventListener() {}
+
+ // Fired before any test activity starts.
+ virtual void OnTestProgramStart(const UnitTest& unit_test) = 0;
+
+ // Fired before each iteration of tests starts. There may be more than
+ // one iteration if GTEST_FLAG(repeat) is set. iteration is the iteration
+ // index, starting from 0.
+ virtual void OnTestIterationStart(const UnitTest& unit_test,
+ int iteration) = 0;
+
+ // Fired before environment set-up for each iteration of tests starts.
+ virtual void OnEnvironmentsSetUpStart(const UnitTest& unit_test) = 0;
+
+ // Fired after environment set-up for each iteration of tests ends.
+ virtual void OnEnvironmentsSetUpEnd(const UnitTest& unit_test) = 0;
+
+ // Fired before the test suite starts.
+ virtual void OnTestSuiteStart(const TestSuite& /*test_suite*/) {}
+
+ // Legacy API is deprecated but still available
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+ virtual void OnTestCaseStart(const TestCase& /*test_case*/) {}
+#endif // GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+
+ // Fired before the test starts.
+ virtual void OnTestStart(const TestInfo& test_info) = 0;
+
+ // Fired when a test is disabled
+ virtual void OnTestDisabled(const TestInfo& /*test_info*/) {}
+
+ // Fired after a failed assertion or a SUCCEED() invocation.
+ // If you want to throw an exception from this function to skip to the next
+ // TEST, it must be AssertionException defined above, or inherited from it.
+ virtual void OnTestPartResult(const TestPartResult& test_part_result) = 0;
+
+ // Fired after the test ends.
+ virtual void OnTestEnd(const TestInfo& test_info) = 0;
+
+ // Fired after the test suite ends.
+ virtual void OnTestSuiteEnd(const TestSuite& /*test_suite*/) {}
+
+// Legacy API is deprecated but still available
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+ virtual void OnTestCaseEnd(const TestCase& /*test_case*/) {}
+#endif // GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+
+ // Fired before environment tear-down for each iteration of tests starts.
+ virtual void OnEnvironmentsTearDownStart(const UnitTest& unit_test) = 0;
+
+ // Fired after environment tear-down for each iteration of tests ends.
+ virtual void OnEnvironmentsTearDownEnd(const UnitTest& unit_test) = 0;
+
+ // Fired after each iteration of tests finishes.
+ virtual void OnTestIterationEnd(const UnitTest& unit_test, int iteration) = 0;
+
+ // Fired after all test activities have ended.
+ virtual void OnTestProgramEnd(const UnitTest& unit_test) = 0;
+};
+
+// The convenience class for users who need to override just one or two
+// methods and are not concerned that a possible change to a signature of
+// the methods they override will not be caught during the build. For
+// comments about each method please see the definition of TestEventListener
+// above.
+class EmptyTestEventListener : public TestEventListener {
+ public:
+ void OnTestProgramStart(const UnitTest& /*unit_test*/) override {}
+ void OnTestIterationStart(const UnitTest& /*unit_test*/,
+ int /*iteration*/) override {}
+ void OnEnvironmentsSetUpStart(const UnitTest& /*unit_test*/) override {}
+ void OnEnvironmentsSetUpEnd(const UnitTest& /*unit_test*/) override {}
+ void OnTestSuiteStart(const TestSuite& /*test_suite*/) override {}
+// Legacy API is deprecated but still available
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+ void OnTestCaseStart(const TestCase& /*test_case*/) override {}
+#endif // GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+
+ void OnTestStart(const TestInfo& /*test_info*/) override {}
+ void OnTestDisabled(const TestInfo& /*test_info*/) override {}
+ void OnTestPartResult(const TestPartResult& /*test_part_result*/) override {}
+ void OnTestEnd(const TestInfo& /*test_info*/) override {}
+ void OnTestSuiteEnd(const TestSuite& /*test_suite*/) override {}
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+ void OnTestCaseEnd(const TestCase& /*test_case*/) override {}
+#endif // GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+
+ void OnEnvironmentsTearDownStart(const UnitTest& /*unit_test*/) override {}
+ void OnEnvironmentsTearDownEnd(const UnitTest& /*unit_test*/) override {}
+ void OnTestIterationEnd(const UnitTest& /*unit_test*/,
+ int /*iteration*/) override {}
+ void OnTestProgramEnd(const UnitTest& /*unit_test*/) override {}
+};
+
+// TestEventListeners lets users add listeners to track events in Google Test.
+class GTEST_API_ TestEventListeners {
+ public:
+ TestEventListeners();
+ ~TestEventListeners();
+
+ // Appends an event listener to the end of the list. Google Test assumes
+ // the ownership of the listener (i.e. it will delete the listener when
+ // the test program finishes).
+ void Append(TestEventListener* listener);
+
+ // Removes the given event listener from the list and returns it. It then
+ // becomes the caller's responsibility to delete the listener. Returns
+ // NULL if the listener is not found in the list.
+ TestEventListener* Release(TestEventListener* listener);
+
+ // Returns the standard listener responsible for the default console
+ // output. Can be removed from the listeners list to shut down default
+ // console output. Note that removing this object from the listener list
+ // with Release transfers its ownership to the caller and makes this
+ // function return NULL the next time.
+ TestEventListener* default_result_printer() const {
+ return default_result_printer_;
+ }
+
+ // Returns the standard listener responsible for the default XML output
+ // controlled by the --gtest_output=xml flag. Can be removed from the
+ // listeners list by users who want to shut down the default XML output
+ // controlled by this flag and substitute it with custom one. Note that
+ // removing this object from the listener list with Release transfers its
+ // ownership to the caller and makes this function return NULL the next
+ // time.
+ TestEventListener* default_xml_generator() const {
+ return default_xml_generator_;
+ }
+
+ private:
+ friend class TestSuite;
+ friend class TestInfo;
+ friend class internal::DefaultGlobalTestPartResultReporter;
+ friend class internal::NoExecDeathTest;
+ friend class internal::TestEventListenersAccessor;
+ friend class internal::UnitTestImpl;
+
+ // Returns repeater that broadcasts the TestEventListener events to all
+ // subscribers.
+ TestEventListener* repeater();
+
+ // Sets the default_result_printer attribute to the provided listener.
+ // The listener is also added to the listener list and previous
+ // default_result_printer is removed from it and deleted. The listener can
+ // also be NULL in which case it will not be added to the list. Does
+ // nothing if the previous and the current listener objects are the same.
+ void SetDefaultResultPrinter(TestEventListener* listener);
+
+ // Sets the default_xml_generator attribute to the provided listener. The
+ // listener is also added to the listener list and previous
+ // default_xml_generator is removed from it and deleted. The listener can
+ // also be NULL in which case it will not be added to the list. Does
+ // nothing if the previous and the current listener objects are the same.
+ void SetDefaultXmlGenerator(TestEventListener* listener);
+
+ // Controls whether events will be forwarded by the repeater to the
+ // listeners in the list.
+ bool EventForwardingEnabled() const;
+ void SuppressEventForwarding();
+
+ // The actual list of listeners.
+ internal::TestEventRepeater* repeater_;
+ // Listener responsible for the standard result output.
+ TestEventListener* default_result_printer_;
+ // Listener responsible for the creation of the XML output file.
+ TestEventListener* default_xml_generator_;
+
+ // We disallow copying TestEventListeners.
+ TestEventListeners(const TestEventListeners&) = delete;
+ TestEventListeners& operator=(const TestEventListeners&) = delete;
+};
+
+// A UnitTest consists of a vector of TestSuites.
+//
+// This is a singleton class. The only instance of UnitTest is
+// created when UnitTest::GetInstance() is first called. This
+// instance is never deleted.
+//
+// UnitTest is not copyable.
+//
+// This class is thread-safe as long as the methods are called
+// according to their specification.
+class GTEST_API_ UnitTest {
+ public:
+ // Gets the singleton UnitTest object. The first time this method
+ // is called, a UnitTest object is constructed and returned.
+ // Consecutive calls will return the same object.
+ static UnitTest* GetInstance();
+
+ // Runs all tests in this UnitTest object and prints the result.
+ // Returns 0 if successful, or 1 otherwise.
+ //
+ // This method can only be called from the main thread.
+ //
+ // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+ int Run() GTEST_MUST_USE_RESULT_;
+
+ // Returns the working directory when the first TEST() or TEST_F()
+ // was executed. The UnitTest object owns the string.
+ const char* original_working_dir() const;
+
+ // Returns the TestSuite object for the test that's currently running,
+ // or NULL if no test is running.
+ const TestSuite* current_test_suite() const GTEST_LOCK_EXCLUDED_(mutex_);
+
+// Legacy API is still available but deprecated
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+ const TestCase* current_test_case() const GTEST_LOCK_EXCLUDED_(mutex_);
+#endif
+
+ // Returns the TestInfo object for the test that's currently running,
+ // or NULL if no test is running.
+ const TestInfo* current_test_info() const GTEST_LOCK_EXCLUDED_(mutex_);
+
+ // Returns the random seed used at the start of the current test run.
+ int random_seed() const;
+
+ // Returns the ParameterizedTestSuiteRegistry object used to keep track of
+ // value-parameterized tests and instantiate and register them.
+ //
+ // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+ internal::ParameterizedTestSuiteRegistry& parameterized_test_registry()
+ GTEST_LOCK_EXCLUDED_(mutex_);
+
+ // Gets the number of successful test suites.
+ int successful_test_suite_count() const;
+
+ // Gets the number of failed test suites.
+ int failed_test_suite_count() const;
+
+ // Gets the number of all test suites.
+ int total_test_suite_count() const;
+
+ // Gets the number of all test suites that contain at least one test
+ // that should run.
+ int test_suite_to_run_count() const;
+
+ // Legacy API is deprecated but still available
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+ int successful_test_case_count() const;
+ int failed_test_case_count() const;
+ int total_test_case_count() const;
+ int test_case_to_run_count() const;
+#endif // GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+
+ // Gets the number of successful tests.
+ int successful_test_count() const;
+
+ // Gets the number of skipped tests.
+ int skipped_test_count() const;
+
+ // Gets the number of failed tests.
+ int failed_test_count() const;
+
+ // Gets the number of disabled tests that will be reported in the XML report.
+ int reportable_disabled_test_count() const;
+
+ // Gets the number of disabled tests.
+ int disabled_test_count() const;
+
+ // Gets the number of tests to be printed in the XML report.
+ int reportable_test_count() const;
+
+ // Gets the number of all tests.
+ int total_test_count() const;
+
+ // Gets the number of tests that should run.
+ int test_to_run_count() const;
+
+ // Gets the time of the test program start, in ms from the start of the
+ // UNIX epoch.
+ TimeInMillis start_timestamp() const;
+
+ // Gets the elapsed time, in milliseconds.
+ TimeInMillis elapsed_time() const;
+
+ // Returns true if and only if the unit test passed (i.e. all test suites
+ // passed).
+ bool Passed() const;
+
+ // Returns true if and only if the unit test failed (i.e. some test suite
+ // failed or something outside of all tests failed).
+ bool Failed() const;
+
+ // Gets the i-th test suite among all the test suites. i can range from 0 to
+ // total_test_suite_count() - 1. If i is not in that range, returns NULL.
+ const TestSuite* GetTestSuite(int i) const;
+
+// Legacy API is deprecated but still available
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+ const TestCase* GetTestCase(int i) const;
+#endif // GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+
+ // Returns the TestResult containing information on test failures and
+ // properties logged outside of individual test suites.
+ const TestResult& ad_hoc_test_result() const;
+
+ // Returns the list of event listeners that can be used to track events
+ // inside Google Test.
+ TestEventListeners& listeners();
+
+ private:
+ // Registers and returns a global test environment. When a test
+ // program is run, all global test environments will be set-up in
+ // the order they were registered. After all tests in the program
+ // have finished, all global test environments will be torn-down in
+ // the *reverse* order they were registered.
+ //
+ // The UnitTest object takes ownership of the given environment.
+ //
+ // This method can only be called from the main thread.
+ Environment* AddEnvironment(Environment* env);
+
+ // Adds a TestPartResult to the current TestResult object. All
+ // Google Test assertion macros (e.g. ASSERT_TRUE, EXPECT_EQ, etc)
+ // eventually call this to report their results. The user code
+ // should use the assertion macros instead of calling this directly.
+ void AddTestPartResult(TestPartResult::Type result_type,
+ const char* file_name, int line_number,
+ const std::string& message,
+ const std::string& os_stack_trace)
+ GTEST_LOCK_EXCLUDED_(mutex_);
+
+ // Adds a TestProperty to the current TestResult object when invoked from
+ // inside a test, to current TestSuite's ad_hoc_test_result_ when invoked
+ // from SetUpTestSuite or TearDownTestSuite, or to the global property set
+ // when invoked elsewhere. If the result already contains a property with
+ // the same key, the value will be updated.
+ void RecordProperty(const std::string& key, const std::string& value);
+
+ // Gets the i-th test suite among all the test suites. i can range from 0 to
+ // total_test_suite_count() - 1. If i is not in that range, returns NULL.
+ TestSuite* GetMutableTestSuite(int i);
+
+ // Accessors for the implementation object.
+ internal::UnitTestImpl* impl() { return impl_; }
+ const internal::UnitTestImpl* impl() const { return impl_; }
+
+ // These classes and functions are friends as they need to access private
+ // members of UnitTest.
+ friend class ScopedTrace;
+ friend class Test;
+ friend class internal::AssertHelper;
+ friend class internal::StreamingListenerTest;
+ friend class internal::UnitTestRecordPropertyTestHelper;
+ friend Environment* AddGlobalTestEnvironment(Environment* env);
+ friend std::set<std::string>* internal::GetIgnoredParameterizedTestSuites();
+ friend internal::UnitTestImpl* internal::GetUnitTestImpl();
+ friend void internal::ReportFailureInUnknownLocation(
+ TestPartResult::Type result_type, const std::string& message);
+
+ // Creates an empty UnitTest.
+ UnitTest();
+
+ // D'tor
+ virtual ~UnitTest();
+
+ // Pushes a trace defined by SCOPED_TRACE() on to the per-thread
+ // Google Test trace stack.
+ void PushGTestTrace(const internal::TraceInfo& trace)
+ GTEST_LOCK_EXCLUDED_(mutex_);
+
+ // Pops a trace from the per-thread Google Test trace stack.
+ void PopGTestTrace() GTEST_LOCK_EXCLUDED_(mutex_);
+
+ // Protects mutable state in *impl_. This is mutable as some const
+ // methods need to lock it too.
+ mutable internal::Mutex mutex_;
+
+ // Opaque implementation object. This field is never changed once
+ // the object is constructed. We don't mark it as const here, as
+ // doing so will cause a warning in the constructor of UnitTest.
+ // Mutable state in *impl_ is protected by mutex_.
+ internal::UnitTestImpl* impl_;
+
+ // We disallow copying UnitTest.
+ UnitTest(const UnitTest&) = delete;
+ UnitTest& operator=(const UnitTest&) = delete;
+};
+
+// A convenient wrapper for adding an environment for the test
+// program.
+//
+// You should call this before RUN_ALL_TESTS() is called, probably in
+// main(). If you use gtest_main, you need to call this before main()
+// starts for it to take effect. For example, you can define a global
+// variable like this:
+//
+// testing::Environment* const foo_env =
+// testing::AddGlobalTestEnvironment(new FooEnvironment);
+//
+// However, we strongly recommend you to write your own main() and
+// call AddGlobalTestEnvironment() there, as relying on initialization
+// of global variables makes the code harder to read and may cause
+// problems when you register multiple environments from different
+// translation units and the environments have dependencies among them
+// (remember that the compiler doesn't guarantee the order in which
+// global variables from different translation units are initialized).
+inline Environment* AddGlobalTestEnvironment(Environment* env) {
+ return UnitTest::GetInstance()->AddEnvironment(env);
+}
+
+// Initializes Google Test. This must be called before calling
+// RUN_ALL_TESTS(). In particular, it parses a command line for the
+// flags that Google Test recognizes. Whenever a Google Test flag is
+// seen, it is removed from argv, and *argc is decremented.
+//
+// No value is returned. Instead, the Google Test flag variables are
+// updated.
+//
+// Calling the function for the second time has no user-visible effect.
+GTEST_API_ void InitGoogleTest(int* argc, char** argv);
+
+// This overloaded version can be used in Windows programs compiled in
+// UNICODE mode.
+GTEST_API_ void InitGoogleTest(int* argc, wchar_t** argv);
+
+// This overloaded version can be used on Arduino/embedded platforms where
+// there is no argc/argv.
+GTEST_API_ void InitGoogleTest();
+
+namespace internal {
+
+// Separate the error generating code from the code path to reduce the stack
+// frame size of CmpHelperEQ. This helps reduce the overhead of some sanitizers
+// when calling EXPECT_* in a tight loop.
+template <typename T1, typename T2>
+AssertionResult CmpHelperEQFailure(const char* lhs_expression,
+ const char* rhs_expression, const T1& lhs,
+ const T2& rhs) {
+ return EqFailure(lhs_expression, rhs_expression,
+ FormatForComparisonFailureMessage(lhs, rhs),
+ FormatForComparisonFailureMessage(rhs, lhs), false);
+}
+
+// This block of code defines operator==/!=
+// to block lexical scope lookup.
+// It prevents using invalid operator==/!= defined at namespace scope.
+struct faketype {};
+inline bool operator==(faketype, faketype) { return true; }
+inline bool operator!=(faketype, faketype) { return false; }
+
+// The helper function for {ASSERT|EXPECT}_EQ.
+template <typename T1, typename T2>
+AssertionResult CmpHelperEQ(const char* lhs_expression,
+ const char* rhs_expression, const T1& lhs,
+ const T2& rhs) {
+ if (lhs == rhs) {
+ return AssertionSuccess();
+ }
+
+ return CmpHelperEQFailure(lhs_expression, rhs_expression, lhs, rhs);
+}
+
+class EqHelper {
+ public:
+ // This templatized version is for the general case.
+ template <
+ typename T1, typename T2,
+ // Disable this overload for cases where one argument is a pointer
+ // and the other is the null pointer constant.
+ typename std::enable_if<!std::is_integral<T1>::value ||
+ !std::is_pointer<T2>::value>::type* = nullptr>
+ static AssertionResult Compare(const char* lhs_expression,
+ const char* rhs_expression, const T1& lhs,
+ const T2& rhs) {
+ return CmpHelperEQ(lhs_expression, rhs_expression, lhs, rhs);
+ }
+
+ // With this overloaded version, we allow anonymous enums to be used
+ // in {ASSERT|EXPECT}_EQ when compiled with gcc 4, as anonymous
+ // enums can be implicitly cast to BiggestInt.
+ //
+ // Even though its body looks the same as the above version, we
+ // cannot merge the two, as it will make anonymous enums unhappy.
+ static AssertionResult Compare(const char* lhs_expression,
+ const char* rhs_expression, BiggestInt lhs,
+ BiggestInt rhs) {
+ return CmpHelperEQ(lhs_expression, rhs_expression, lhs, rhs);
+ }
+
+ template <typename T>
+ static AssertionResult Compare(
+ const char* lhs_expression, const char* rhs_expression,
+ // Handle cases where '0' is used as a null pointer literal.
+ std::nullptr_t /* lhs */, T* rhs) {
+ // We already know that 'lhs' is a null pointer.
+ return CmpHelperEQ(lhs_expression, rhs_expression, static_cast<T*>(nullptr),
+ rhs);
+ }
+};
+
+// Separate the error generating code from the code path to reduce the stack
+// frame size of CmpHelperOP. This helps reduce the overhead of some sanitizers
+// when calling EXPECT_OP in a tight loop.
+template <typename T1, typename T2>
+AssertionResult CmpHelperOpFailure(const char* expr1, const char* expr2,
+ const T1& val1, const T2& val2,
+ const char* op) {
+ return AssertionFailure()
+ << "Expected: (" << expr1 << ") " << op << " (" << expr2
+ << "), actual: " << FormatForComparisonFailureMessage(val1, val2)
+ << " vs " << FormatForComparisonFailureMessage(val2, val1);
+}
+
+// A macro for implementing the helper functions needed to implement
+// ASSERT_?? and EXPECT_??. It is here just to avoid copy-and-paste
+// of similar code.
+//
+// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+
+#define GTEST_IMPL_CMP_HELPER_(op_name, op) \
+ template <typename T1, typename T2> \
+ AssertionResult CmpHelper##op_name(const char* expr1, const char* expr2, \
+ const T1& val1, const T2& val2) { \
+ if (val1 op val2) { \
+ return AssertionSuccess(); \
+ } else { \
+ return CmpHelperOpFailure(expr1, expr2, val1, val2, #op); \
+ } \
+ }
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+
+// Implements the helper function for {ASSERT|EXPECT}_NE
+GTEST_IMPL_CMP_HELPER_(NE, !=)
+// Implements the helper function for {ASSERT|EXPECT}_LE
+GTEST_IMPL_CMP_HELPER_(LE, <=)
+// Implements the helper function for {ASSERT|EXPECT}_LT
+GTEST_IMPL_CMP_HELPER_(LT, <)
+// Implements the helper function for {ASSERT|EXPECT}_GE
+GTEST_IMPL_CMP_HELPER_(GE, >=)
+// Implements the helper function for {ASSERT|EXPECT}_GT
+GTEST_IMPL_CMP_HELPER_(GT, >)
+
+#undef GTEST_IMPL_CMP_HELPER_
+
+// The helper function for {ASSERT|EXPECT}_STREQ.
+//
+// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+GTEST_API_ AssertionResult CmpHelperSTREQ(const char* s1_expression,
+ const char* s2_expression,
+ const char* s1, const char* s2);
+
+// The helper function for {ASSERT|EXPECT}_STRCASEEQ.
+//
+// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+GTEST_API_ AssertionResult CmpHelperSTRCASEEQ(const char* s1_expression,
+ const char* s2_expression,
+ const char* s1, const char* s2);
+
+// The helper function for {ASSERT|EXPECT}_STRNE.
+//
+// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+GTEST_API_ AssertionResult CmpHelperSTRNE(const char* s1_expression,
+ const char* s2_expression,
+ const char* s1, const char* s2);
+
+// The helper function for {ASSERT|EXPECT}_STRCASENE.
+//
+// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+GTEST_API_ AssertionResult CmpHelperSTRCASENE(const char* s1_expression,
+ const char* s2_expression,
+ const char* s1, const char* s2);
+
+// Helper function for *_STREQ on wide strings.
+//
+// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+GTEST_API_ AssertionResult CmpHelperSTREQ(const char* s1_expression,
+ const char* s2_expression,
+ const wchar_t* s1, const wchar_t* s2);
+
+// Helper function for *_STRNE on wide strings.
+//
+// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+GTEST_API_ AssertionResult CmpHelperSTRNE(const char* s1_expression,
+ const char* s2_expression,
+ const wchar_t* s1, const wchar_t* s2);
+
+} // namespace internal
+
+// IsSubstring() and IsNotSubstring() are intended to be used as the
+// first argument to {EXPECT,ASSERT}_PRED_FORMAT2(), not by
+// themselves. They check whether needle is a substring of haystack
+// (NULL is considered a substring of itself only), and return an
+// appropriate error message when they fail.
+//
+// The {needle,haystack}_expr arguments are the stringified
+// expressions that generated the two real arguments.
+GTEST_API_ AssertionResult IsSubstring(const char* needle_expr,
+ const char* haystack_expr,
+ const char* needle,
+ const char* haystack);
+GTEST_API_ AssertionResult IsSubstring(const char* needle_expr,
+ const char* haystack_expr,
+ const wchar_t* needle,
+ const wchar_t* haystack);
+GTEST_API_ AssertionResult IsNotSubstring(const char* needle_expr,
+ const char* haystack_expr,
+ const char* needle,
+ const char* haystack);
+GTEST_API_ AssertionResult IsNotSubstring(const char* needle_expr,
+ const char* haystack_expr,
+ const wchar_t* needle,
+ const wchar_t* haystack);
+GTEST_API_ AssertionResult IsSubstring(const char* needle_expr,
+ const char* haystack_expr,
+ const ::std::string& needle,
+ const ::std::string& haystack);
+GTEST_API_ AssertionResult IsNotSubstring(const char* needle_expr,
+ const char* haystack_expr,
+ const ::std::string& needle,
+ const ::std::string& haystack);
+
+#if GTEST_HAS_STD_WSTRING
+GTEST_API_ AssertionResult IsSubstring(const char* needle_expr,
+ const char* haystack_expr,
+ const ::std::wstring& needle,
+ const ::std::wstring& haystack);
+GTEST_API_ AssertionResult IsNotSubstring(const char* needle_expr,
+ const char* haystack_expr,
+ const ::std::wstring& needle,
+ const ::std::wstring& haystack);
+#endif // GTEST_HAS_STD_WSTRING
+
+namespace internal {
+
+// Helper template function for comparing floating-points.
+//
+// Template parameter:
+//
+// RawType: the raw floating-point type (either float or double)
+//
+// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+template <typename RawType>
+AssertionResult CmpHelperFloatingPointEQ(const char* lhs_expression,
+ const char* rhs_expression,
+ RawType lhs_value, RawType rhs_value) {
+ const FloatingPoint<RawType> lhs(lhs_value), rhs(rhs_value);
+
+ if (lhs.AlmostEquals(rhs)) {
+ return AssertionSuccess();
+ }
+
+ ::std::stringstream lhs_ss;
+ lhs_ss << std::setprecision(std::numeric_limits<RawType>::digits10 + 2)
+ << lhs_value;
+
+ ::std::stringstream rhs_ss;
+ rhs_ss << std::setprecision(std::numeric_limits<RawType>::digits10 + 2)
+ << rhs_value;
+
+ return EqFailure(lhs_expression, rhs_expression,
+ StringStreamToString(&lhs_ss), StringStreamToString(&rhs_ss),
+ false);
+}
+
+// Helper function for implementing ASSERT_NEAR.
+//
+// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+GTEST_API_ AssertionResult DoubleNearPredFormat(const char* expr1,
+ const char* expr2,
+ const char* abs_error_expr,
+ double val1, double val2,
+ double abs_error);
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+// A class that enables one to stream messages to assertion macros
+class GTEST_API_ AssertHelper {
+ public:
+ // Constructor.
+ AssertHelper(TestPartResult::Type type, const char* file, int line,
+ const char* message);
+ ~AssertHelper();
+
+ // Message assignment is a semantic trick to enable assertion
+ // streaming; see the GTEST_MESSAGE_ macro below.
+ void operator=(const Message& message) const;
+
+ private:
+ // We put our data in a struct so that the size of the AssertHelper class can
+ // be as small as possible. This is important because gcc is incapable of
+ // re-using stack space even for temporary variables, so every EXPECT_EQ
+ // reserves stack space for another AssertHelper.
+ struct AssertHelperData {
+ AssertHelperData(TestPartResult::Type t, const char* srcfile, int line_num,
+ const char* msg)
+ : type(t), file(srcfile), line(line_num), message(msg) {}
+
+ TestPartResult::Type const type;
+ const char* const file;
+ int const line;
+ std::string const message;
+
+ private:
+ AssertHelperData(const AssertHelperData&) = delete;
+ AssertHelperData& operator=(const AssertHelperData&) = delete;
+ };
+
+ AssertHelperData* const data_;
+
+ AssertHelper(const AssertHelper&) = delete;
+ AssertHelper& operator=(const AssertHelper&) = delete;
+};
+
+} // namespace internal
+
+// The pure interface class that all value-parameterized tests inherit from.
+// A value-parameterized class must inherit from both ::testing::Test and
+// ::testing::WithParamInterface. In most cases that just means inheriting
+// from ::testing::TestWithParam, but more complicated test hierarchies
+// may need to inherit from Test and WithParamInterface at different levels.
+//
+// This interface has support for accessing the test parameter value via
+// the GetParam() method.
+//
+// Use it with one of the parameter generator defining functions, like Range(),
+// Values(), ValuesIn(), Bool(), and Combine().
+//
+// class FooTest : public ::testing::TestWithParam<int> {
+// protected:
+// FooTest() {
+// // Can use GetParam() here.
+// }
+// ~FooTest() override {
+// // Can use GetParam() here.
+// }
+// void SetUp() override {
+// // Can use GetParam() here.
+// }
+// void TearDown override {
+// // Can use GetParam() here.
+// }
+// };
+// TEST_P(FooTest, DoesBar) {
+// // Can use GetParam() method here.
+// Foo foo;
+// ASSERT_TRUE(foo.DoesBar(GetParam()));
+// }
+// INSTANTIATE_TEST_SUITE_P(OneToTenRange, FooTest, ::testing::Range(1, 10));
+
+template <typename T>
+class WithParamInterface {
+ public:
+ typedef T ParamType;
+ virtual ~WithParamInterface() {}
+
+ // The current parameter value. Is also available in the test fixture's
+ // constructor.
+ static const ParamType& GetParam() {
+ GTEST_CHECK_(parameter_ != nullptr)
+ << "GetParam() can only be called inside a value-parameterized test "
+ << "-- did you intend to write TEST_P instead of TEST_F?";
+ return *parameter_;
+ }
+
+ private:
+ // Sets parameter value. The caller is responsible for making sure the value
+ // remains alive and unchanged throughout the current test.
+ static void SetParam(const ParamType* parameter) { parameter_ = parameter; }
+
+ // Static value used for accessing parameter during a test lifetime.
+ static const ParamType* parameter_;
+
+ // TestClass must be a subclass of WithParamInterface<T> and Test.
+ template <class TestClass>
+ friend class internal::ParameterizedTestFactory;
+};
+
+template <typename T>
+const T* WithParamInterface<T>::parameter_ = nullptr;
+
+// Most value-parameterized classes can ignore the existence of
+// WithParamInterface, and can just inherit from ::testing::TestWithParam.
+
+template <typename T>
+class TestWithParam : public Test, public WithParamInterface<T> {};
+
+// Macros for indicating success/failure in test code.
+
+// Skips test in runtime.
+// Skipping test aborts current function.
+// Skipped tests are neither successful nor failed.
+#define GTEST_SKIP() GTEST_SKIP_("")
+
+// ADD_FAILURE unconditionally adds a failure to the current test.
+// SUCCEED generates a success - it doesn't automatically make the
+// current test successful, as a test is only successful when it has
+// no failure.
+//
+// EXPECT_* verifies that a certain condition is satisfied. If not,
+// it behaves like ADD_FAILURE. In particular:
+//
+// EXPECT_TRUE verifies that a Boolean condition is true.
+// EXPECT_FALSE verifies that a Boolean condition is false.
+//
+// FAIL and ASSERT_* are similar to ADD_FAILURE and EXPECT_*, except
+// that they will also abort the current function on failure. People
+// usually want the fail-fast behavior of FAIL and ASSERT_*, but those
+// writing data-driven tests often find themselves using ADD_FAILURE
+// and EXPECT_* more.
+
+// Generates a nonfatal failure with a generic message.
+#define ADD_FAILURE() GTEST_NONFATAL_FAILURE_("Failed")
+
+// Generates a nonfatal failure at the given source file location with
+// a generic message.
+#define ADD_FAILURE_AT(file, line) \
+ GTEST_MESSAGE_AT_(file, line, "Failed", \
+ ::testing::TestPartResult::kNonFatalFailure)
+
+// Generates a fatal failure with a generic message.
+#define GTEST_FAIL() GTEST_FATAL_FAILURE_("Failed")
+
+// Like GTEST_FAIL(), but at the given source file location.
+#define GTEST_FAIL_AT(file, line) \
+ GTEST_MESSAGE_AT_(file, line, "Failed", \
+ ::testing::TestPartResult::kFatalFailure)
+
+// Define this macro to 1 to omit the definition of FAIL(), which is a
+// generic name and clashes with some other libraries.
+#if !GTEST_DONT_DEFINE_FAIL
+#define FAIL() GTEST_FAIL()
+#endif
+
+// Generates a success with a generic message.
+#define GTEST_SUCCEED() GTEST_SUCCESS_("Succeeded")
+
+// Define this macro to 1 to omit the definition of SUCCEED(), which
+// is a generic name and clashes with some other libraries.
+#if !GTEST_DONT_DEFINE_SUCCEED
+#define SUCCEED() GTEST_SUCCEED()
+#endif
+
+// Macros for testing exceptions.
+//
+// * {ASSERT|EXPECT}_THROW(statement, expected_exception):
+// Tests that the statement throws the expected exception.
+// * {ASSERT|EXPECT}_NO_THROW(statement):
+// Tests that the statement doesn't throw any exception.
+// * {ASSERT|EXPECT}_ANY_THROW(statement):
+// Tests that the statement throws an exception.
+
+#define EXPECT_THROW(statement, expected_exception) \
+ GTEST_TEST_THROW_(statement, expected_exception, GTEST_NONFATAL_FAILURE_)
+#define EXPECT_NO_THROW(statement) \
+ GTEST_TEST_NO_THROW_(statement, GTEST_NONFATAL_FAILURE_)
+#define EXPECT_ANY_THROW(statement) \
+ GTEST_TEST_ANY_THROW_(statement, GTEST_NONFATAL_FAILURE_)
+#define ASSERT_THROW(statement, expected_exception) \
+ GTEST_TEST_THROW_(statement, expected_exception, GTEST_FATAL_FAILURE_)
+#define ASSERT_NO_THROW(statement) \
+ GTEST_TEST_NO_THROW_(statement, GTEST_FATAL_FAILURE_)
+#define ASSERT_ANY_THROW(statement) \
+ GTEST_TEST_ANY_THROW_(statement, GTEST_FATAL_FAILURE_)
+
+// Boolean assertions. Condition can be either a Boolean expression or an
+// AssertionResult. For more information on how to use AssertionResult with
+// these macros see comments on that class.
+#define GTEST_EXPECT_TRUE(condition) \
+ GTEST_TEST_BOOLEAN_(condition, #condition, false, true, \
+ GTEST_NONFATAL_FAILURE_)
+#define GTEST_EXPECT_FALSE(condition) \
+ GTEST_TEST_BOOLEAN_(!(condition), #condition, true, false, \
+ GTEST_NONFATAL_FAILURE_)
+#define GTEST_ASSERT_TRUE(condition) \
+ GTEST_TEST_BOOLEAN_(condition, #condition, false, true, GTEST_FATAL_FAILURE_)
+#define GTEST_ASSERT_FALSE(condition) \
+ GTEST_TEST_BOOLEAN_(!(condition), #condition, true, false, \
+ GTEST_FATAL_FAILURE_)
+
+// Define these macros to 1 to omit the definition of the corresponding
+// EXPECT or ASSERT, which clashes with some users' own code.
+
+#if !GTEST_DONT_DEFINE_EXPECT_TRUE
+#define EXPECT_TRUE(condition) GTEST_EXPECT_TRUE(condition)
+#endif
+
+#if !GTEST_DONT_DEFINE_EXPECT_FALSE
+#define EXPECT_FALSE(condition) GTEST_EXPECT_FALSE(condition)
+#endif
+
+#if !GTEST_DONT_DEFINE_ASSERT_TRUE
+#define ASSERT_TRUE(condition) GTEST_ASSERT_TRUE(condition)
+#endif
+
+#if !GTEST_DONT_DEFINE_ASSERT_FALSE
+#define ASSERT_FALSE(condition) GTEST_ASSERT_FALSE(condition)
+#endif
+
+// Macros for testing equalities and inequalities.
+//
+// * {ASSERT|EXPECT}_EQ(v1, v2): Tests that v1 == v2
+// * {ASSERT|EXPECT}_NE(v1, v2): Tests that v1 != v2
+// * {ASSERT|EXPECT}_LT(v1, v2): Tests that v1 < v2
+// * {ASSERT|EXPECT}_LE(v1, v2): Tests that v1 <= v2
+// * {ASSERT|EXPECT}_GT(v1, v2): Tests that v1 > v2
+// * {ASSERT|EXPECT}_GE(v1, v2): Tests that v1 >= v2
+//
+// When they are not, Google Test prints both the tested expressions and
+// their actual values. The values must be compatible built-in types,
+// or you will get a compiler error. By "compatible" we mean that the
+// values can be compared by the respective operator.
+//
+// Note:
+//
+// 1. It is possible to make a user-defined type work with
+// {ASSERT|EXPECT}_??(), but that requires overloading the
+// comparison operators and is thus discouraged by the Google C++
+// Usage Guide. Therefore, you are advised to use the
+// {ASSERT|EXPECT}_TRUE() macro to assert that two objects are
+// equal.
+//
+// 2. The {ASSERT|EXPECT}_??() macros do pointer comparisons on
+// pointers (in particular, C strings). Therefore, if you use it
+// with two C strings, you are testing how their locations in memory
+// are related, not how their content is related. To compare two C
+// strings by content, use {ASSERT|EXPECT}_STR*().
+//
+// 3. {ASSERT|EXPECT}_EQ(v1, v2) is preferred to
+// {ASSERT|EXPECT}_TRUE(v1 == v2), as the former tells you
+// what the actual value is when it fails, and similarly for the
+// other comparisons.
+//
+// 4. Do not depend on the order in which {ASSERT|EXPECT}_??()
+// evaluate their arguments, which is undefined.
+//
+// 5. These macros evaluate their arguments exactly once.
+//
+// Examples:
+//
+// EXPECT_NE(Foo(), 5);
+// EXPECT_EQ(a_pointer, NULL);
+// ASSERT_LT(i, array_size);
+// ASSERT_GT(records.size(), 0) << "There is no record left.";
+
+#define EXPECT_EQ(val1, val2) \
+ EXPECT_PRED_FORMAT2(::testing::internal::EqHelper::Compare, val1, val2)
+#define EXPECT_NE(val1, val2) \
+ EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperNE, val1, val2)
+#define EXPECT_LE(val1, val2) \
+ EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperLE, val1, val2)
+#define EXPECT_LT(val1, val2) \
+ EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperLT, val1, val2)
+#define EXPECT_GE(val1, val2) \
+ EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperGE, val1, val2)
+#define EXPECT_GT(val1, val2) \
+ EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperGT, val1, val2)
+
+#define GTEST_ASSERT_EQ(val1, val2) \
+ ASSERT_PRED_FORMAT2(::testing::internal::EqHelper::Compare, val1, val2)
+#define GTEST_ASSERT_NE(val1, val2) \
+ ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperNE, val1, val2)
+#define GTEST_ASSERT_LE(val1, val2) \
+ ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperLE, val1, val2)
+#define GTEST_ASSERT_LT(val1, val2) \
+ ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperLT, val1, val2)
+#define GTEST_ASSERT_GE(val1, val2) \
+ ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperGE, val1, val2)
+#define GTEST_ASSERT_GT(val1, val2) \
+ ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperGT, val1, val2)
+
+// Define macro GTEST_DONT_DEFINE_ASSERT_XY to 1 to omit the definition of
+// ASSERT_XY(), which clashes with some users' own code.
+
+#if !GTEST_DONT_DEFINE_ASSERT_EQ
+#define ASSERT_EQ(val1, val2) GTEST_ASSERT_EQ(val1, val2)
+#endif
+
+#if !GTEST_DONT_DEFINE_ASSERT_NE
+#define ASSERT_NE(val1, val2) GTEST_ASSERT_NE(val1, val2)
+#endif
+
+#if !GTEST_DONT_DEFINE_ASSERT_LE
+#define ASSERT_LE(val1, val2) GTEST_ASSERT_LE(val1, val2)
+#endif
+
+#if !GTEST_DONT_DEFINE_ASSERT_LT
+#define ASSERT_LT(val1, val2) GTEST_ASSERT_LT(val1, val2)
+#endif
+
+#if !GTEST_DONT_DEFINE_ASSERT_GE
+#define ASSERT_GE(val1, val2) GTEST_ASSERT_GE(val1, val2)
+#endif
+
+#if !GTEST_DONT_DEFINE_ASSERT_GT
+#define ASSERT_GT(val1, val2) GTEST_ASSERT_GT(val1, val2)
+#endif
+
+// C-string Comparisons. All tests treat NULL and any non-NULL string
+// as different. Two NULLs are equal.
+//
+// * {ASSERT|EXPECT}_STREQ(s1, s2): Tests that s1 == s2
+// * {ASSERT|EXPECT}_STRNE(s1, s2): Tests that s1 != s2
+// * {ASSERT|EXPECT}_STRCASEEQ(s1, s2): Tests that s1 == s2, ignoring case
+// * {ASSERT|EXPECT}_STRCASENE(s1, s2): Tests that s1 != s2, ignoring case
+//
+// For wide or narrow string objects, you can use the
+// {ASSERT|EXPECT}_??() macros.
+//
+// Don't depend on the order in which the arguments are evaluated,
+// which is undefined.
+//
+// These macros evaluate their arguments exactly once.
+
+#define EXPECT_STREQ(s1, s2) \
+ EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperSTREQ, s1, s2)
+#define EXPECT_STRNE(s1, s2) \
+ EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperSTRNE, s1, s2)
+#define EXPECT_STRCASEEQ(s1, s2) \
+ EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperSTRCASEEQ, s1, s2)
+#define EXPECT_STRCASENE(s1, s2) \
+ EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperSTRCASENE, s1, s2)
+
+#define ASSERT_STREQ(s1, s2) \
+ ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperSTREQ, s1, s2)
+#define ASSERT_STRNE(s1, s2) \
+ ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperSTRNE, s1, s2)
+#define ASSERT_STRCASEEQ(s1, s2) \
+ ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperSTRCASEEQ, s1, s2)
+#define ASSERT_STRCASENE(s1, s2) \
+ ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperSTRCASENE, s1, s2)
+
+// Macros for comparing floating-point numbers.
+//
+// * {ASSERT|EXPECT}_FLOAT_EQ(val1, val2):
+// Tests that two float values are almost equal.
+// * {ASSERT|EXPECT}_DOUBLE_EQ(val1, val2):
+// Tests that two double values are almost equal.
+// * {ASSERT|EXPECT}_NEAR(v1, v2, abs_error):
+// Tests that v1 and v2 are within the given distance to each other.
+//
+// Google Test uses ULP-based comparison to automatically pick a default
+// error bound that is appropriate for the operands. See the
+// FloatingPoint template class in gtest-internal.h if you are
+// interested in the implementation details.
+
+#define EXPECT_FLOAT_EQ(val1, val2) \
+ EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperFloatingPointEQ<float>, \
+ val1, val2)
+
+#define EXPECT_DOUBLE_EQ(val1, val2) \
+ EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperFloatingPointEQ<double>, \
+ val1, val2)
+
+#define ASSERT_FLOAT_EQ(val1, val2) \
+ ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperFloatingPointEQ<float>, \
+ val1, val2)
+
+#define ASSERT_DOUBLE_EQ(val1, val2) \
+ ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperFloatingPointEQ<double>, \
+ val1, val2)
+
+#define EXPECT_NEAR(val1, val2, abs_error) \
+ EXPECT_PRED_FORMAT3(::testing::internal::DoubleNearPredFormat, val1, val2, \
+ abs_error)
+
+#define ASSERT_NEAR(val1, val2, abs_error) \
+ ASSERT_PRED_FORMAT3(::testing::internal::DoubleNearPredFormat, val1, val2, \
+ abs_error)
+
+// These predicate format functions work on floating-point values, and
+// can be used in {ASSERT|EXPECT}_PRED_FORMAT2*(), e.g.
+//
+// EXPECT_PRED_FORMAT2(testing::DoubleLE, Foo(), 5.0);
+
+// Asserts that val1 is less than, or almost equal to, val2. Fails
+// otherwise. In particular, it fails if either val1 or val2 is NaN.
+GTEST_API_ AssertionResult FloatLE(const char* expr1, const char* expr2,
+ float val1, float val2);
+GTEST_API_ AssertionResult DoubleLE(const char* expr1, const char* expr2,
+ double val1, double val2);
+
+#if GTEST_OS_WINDOWS
+
+// Macros that test for HRESULT failure and success, these are only useful
+// on Windows, and rely on Windows SDK macros and APIs to compile.
+//
+// * {ASSERT|EXPECT}_HRESULT_{SUCCEEDED|FAILED}(expr)
+//
+// When expr unexpectedly fails or succeeds, Google Test prints the
+// expected result and the actual result with both a human-readable
+// string representation of the error, if available, as well as the
+// hex result code.
+#define EXPECT_HRESULT_SUCCEEDED(expr) \
+ EXPECT_PRED_FORMAT1(::testing::internal::IsHRESULTSuccess, (expr))
+
+#define ASSERT_HRESULT_SUCCEEDED(expr) \
+ ASSERT_PRED_FORMAT1(::testing::internal::IsHRESULTSuccess, (expr))
+
+#define EXPECT_HRESULT_FAILED(expr) \
+ EXPECT_PRED_FORMAT1(::testing::internal::IsHRESULTFailure, (expr))
+
+#define ASSERT_HRESULT_FAILED(expr) \
+ ASSERT_PRED_FORMAT1(::testing::internal::IsHRESULTFailure, (expr))
+
+#endif // GTEST_OS_WINDOWS
+
+// Macros that execute statement and check that it doesn't generate new fatal
+// failures in the current thread.
+//
+// * {ASSERT|EXPECT}_NO_FATAL_FAILURE(statement);
+//
+// Examples:
+//
+// EXPECT_NO_FATAL_FAILURE(Process());
+// ASSERT_NO_FATAL_FAILURE(Process()) << "Process() failed";
+//
+#define ASSERT_NO_FATAL_FAILURE(statement) \
+ GTEST_TEST_NO_FATAL_FAILURE_(statement, GTEST_FATAL_FAILURE_)
+#define EXPECT_NO_FATAL_FAILURE(statement) \
+ GTEST_TEST_NO_FATAL_FAILURE_(statement, GTEST_NONFATAL_FAILURE_)
+
+// Causes a trace (including the given source file path and line number,
+// and the given message) to be included in every test failure message generated
+// by code in the scope of the lifetime of an instance of this class. The effect
+// is undone with the destruction of the instance.
+//
+// The message argument can be anything streamable to std::ostream.
+//
+// Example:
+// testing::ScopedTrace trace("file.cc", 123, "message");
+//
+class GTEST_API_ ScopedTrace {
+ public:
+ // The c'tor pushes the given source file location and message onto
+ // a trace stack maintained by Google Test.
+
+ // Template version. Uses Message() to convert the values into strings.
+ // Slow, but flexible.
+ template <typename T>
+ ScopedTrace(const char* file, int line, const T& message) {
+ PushTrace(file, line, (Message() << message).GetString());
+ }
+
+ // Optimize for some known types.
+ ScopedTrace(const char* file, int line, const char* message) {
+ PushTrace(file, line, message ? message : "(null)");
+ }
+
+ ScopedTrace(const char* file, int line, const std::string& message) {
+ PushTrace(file, line, message);
+ }
+
+ // The d'tor pops the info pushed by the c'tor.
+ //
+ // Note that the d'tor is not virtual in order to be efficient.
+ // Don't inherit from ScopedTrace!
+ ~ScopedTrace();
+
+ private:
+ void PushTrace(const char* file, int line, std::string message);
+
+ ScopedTrace(const ScopedTrace&) = delete;
+ ScopedTrace& operator=(const ScopedTrace&) = delete;
+} GTEST_ATTRIBUTE_UNUSED_; // A ScopedTrace object does its job in its
+ // c'tor and d'tor. Therefore it doesn't
+ // need to be used otherwise.
+
+// Causes a trace (including the source file path, the current line
+// number, and the given message) to be included in every test failure
+// message generated by code in the current scope. The effect is
+// undone when the control leaves the current scope.
+//
+// The message argument can be anything streamable to std::ostream.
+//
+// In the implementation, we include the current line number as part
+// of the dummy variable name, thus allowing multiple SCOPED_TRACE()s
+// to appear in the same block - as long as they are on different
+// lines.
+//
+// Assuming that each thread maintains its own stack of traces.
+// Therefore, a SCOPED_TRACE() would (correctly) only affect the
+// assertions in its own thread.
+#define SCOPED_TRACE(message) \
+ ::testing::ScopedTrace GTEST_CONCAT_TOKEN_(gtest_trace_, __LINE__)( \
+ __FILE__, __LINE__, (message))
+
+// Compile-time assertion for type equality.
+// StaticAssertTypeEq<type1, type2>() compiles if and only if type1 and type2
+// are the same type. The value it returns is not interesting.
+//
+// Instead of making StaticAssertTypeEq a class template, we make it a
+// function template that invokes a helper class template. This
+// prevents a user from misusing StaticAssertTypeEq<T1, T2> by
+// defining objects of that type.
+//
+// CAVEAT:
+//
+// When used inside a method of a class template,
+// StaticAssertTypeEq<T1, T2>() is effective ONLY IF the method is
+// instantiated. For example, given:
+//
+// template <typename T> class Foo {
+// public:
+// void Bar() { testing::StaticAssertTypeEq<int, T>(); }
+// };
+//
+// the code:
+//
+// void Test1() { Foo<bool> foo; }
+//
+// will NOT generate a compiler error, as Foo<bool>::Bar() is never
+// actually instantiated. Instead, you need:
+//
+// void Test2() { Foo<bool> foo; foo.Bar(); }
+//
+// to cause a compiler error.
+template <typename T1, typename T2>
+constexpr bool StaticAssertTypeEq() noexcept {
+ static_assert(std::is_same<T1, T2>::value, "T1 and T2 are not the same type");
+ return true;
+}
+
+// Defines a test.
+//
+// The first parameter is the name of the test suite, and the second
+// parameter is the name of the test within the test suite.
+//
+// The convention is to end the test suite name with "Test". For
+// example, a test suite for the Foo class can be named FooTest.
+//
+// Test code should appear between braces after an invocation of
+// this macro. Example:
+//
+// TEST(FooTest, InitializesCorrectly) {
+// Foo foo;
+// EXPECT_TRUE(foo.StatusIsOK());
+// }
+
+// Note that we call GetTestTypeId() instead of GetTypeId<
+// ::testing::Test>() here to get the type ID of testing::Test. This
+// is to work around a suspected linker bug when using Google Test as
+// a framework on Mac OS X. The bug causes GetTypeId<
+// ::testing::Test>() to return different values depending on whether
+// the call is from the Google Test framework itself or from user test
+// code. GetTestTypeId() is guaranteed to always return the same
+// value, as it always calls GetTypeId<>() from the Google Test
+// framework.
+#define GTEST_TEST(test_suite_name, test_name) \
+ GTEST_TEST_(test_suite_name, test_name, ::testing::Test, \
+ ::testing::internal::GetTestTypeId())
+
+// Define this macro to 1 to omit the definition of TEST(), which
+// is a generic name and clashes with some other libraries.
+#if !GTEST_DONT_DEFINE_TEST
+#define TEST(test_suite_name, test_name) GTEST_TEST(test_suite_name, test_name)
+#endif
+
+// Defines a test that uses a test fixture.
+//
+// The first parameter is the name of the test fixture class, which
+// also doubles as the test suite name. The second parameter is the
+// name of the test within the test suite.
+//
+// A test fixture class must be declared earlier. The user should put
+// the test code between braces after using this macro. Example:
+//
+// class FooTest : public testing::Test {
+// protected:
+// void SetUp() override { b_.AddElement(3); }
+//
+// Foo a_;
+// Foo b_;
+// };
+//
+// TEST_F(FooTest, InitializesCorrectly) {
+// EXPECT_TRUE(a_.StatusIsOK());
+// }
+//
+// TEST_F(FooTest, ReturnsElementCountCorrectly) {
+// EXPECT_EQ(a_.size(), 0);
+// EXPECT_EQ(b_.size(), 1);
+// }
+#define GTEST_TEST_F(test_fixture, test_name) \
+ GTEST_TEST_(test_fixture, test_name, test_fixture, \
+ ::testing::internal::GetTypeId<test_fixture>())
+#if !GTEST_DONT_DEFINE_TEST_F
+#define TEST_F(test_fixture, test_name) GTEST_TEST_F(test_fixture, test_name)
+#endif
+
+// Returns a path to temporary directory.
+// Tries to determine an appropriate directory for the platform.
+GTEST_API_ std::string TempDir();
+
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+
+// Dynamically registers a test with the framework.
+//
+// This is an advanced API only to be used when the `TEST` macros are
+// insufficient. The macros should be preferred when possible, as they avoid
+// most of the complexity of calling this function.
+//
+// The `factory` argument is a factory callable (move-constructible) object or
+// function pointer that creates a new instance of the Test object. It
+// handles ownership to the caller. The signature of the callable is
+// `Fixture*()`, where `Fixture` is the test fixture class for the test. All
+// tests registered with the same `test_suite_name` must return the same
+// fixture type. This is checked at runtime.
+//
+// The framework will infer the fixture class from the factory and will call
+// the `SetUpTestSuite` and `TearDownTestSuite` for it.
+//
+// Must be called before `RUN_ALL_TESTS()` is invoked, otherwise behavior is
+// undefined.
+//
+// Use case example:
+//
+// class MyFixture : public ::testing::Test {
+// public:
+// // All of these optional, just like in regular macro usage.
+// static void SetUpTestSuite() { ... }
+// static void TearDownTestSuite() { ... }
+// void SetUp() override { ... }
+// void TearDown() override { ... }
+// };
+//
+// class MyTest : public MyFixture {
+// public:
+// explicit MyTest(int data) : data_(data) {}
+// void TestBody() override { ... }
+//
+// private:
+// int data_;
+// };
+//
+// void RegisterMyTests(const std::vector<int>& values) {
+// for (int v : values) {
+// ::testing::RegisterTest(
+// "MyFixture", ("Test" + std::to_string(v)).c_str(), nullptr,
+// std::to_string(v).c_str(),
+// __FILE__, __LINE__,
+// // Important to use the fixture type as the return type here.
+// [=]() -> MyFixture* { return new MyTest(v); });
+// }
+// }
+// ...
+// int main(int argc, char** argv) {
+// ::testing::InitGoogleTest(&argc, argv);
+// std::vector<int> values_to_test = LoadValuesFromConfig();
+// RegisterMyTests(values_to_test);
+// ...
+// return RUN_ALL_TESTS();
+// }
+//
+template <int&... ExplicitParameterBarrier, typename Factory>
+TestInfo* RegisterTest(const char* test_suite_name, const char* test_name,
+ const char* type_param, const char* value_param,
+ const char* file, int line, Factory factory) {
+ using TestT = typename std::remove_pointer<decltype(factory())>::type;
+
+ class FactoryImpl : public internal::TestFactoryBase {
+ public:
+ explicit FactoryImpl(Factory f) : factory_(std::move(f)) {}
+ Test* CreateTest() override { return factory_(); }
+
+ private:
+ Factory factory_;
+ };
+
+ return internal::MakeAndRegisterTestInfo(
+ test_suite_name, test_name, type_param, value_param,
+ internal::CodeLocation(file, line), internal::GetTypeId<TestT>(),
+ internal::SuiteApiResolver<TestT>::GetSetUpCaseOrSuite(file, line),
+ internal::SuiteApiResolver<TestT>::GetTearDownCaseOrSuite(file, line),
+ new FactoryImpl{std::move(factory)});
+}
+
+} // namespace testing
+
+// Use this function in main() to run all tests. It returns 0 if all
+// tests are successful, or 1 otherwise.
+//
+// RUN_ALL_TESTS() should be invoked after the command line has been
+// parsed by InitGoogleTest().
+//
+// This function was formerly a macro; thus, it is in the global
+// namespace and has an all-caps name.
+int RUN_ALL_TESTS() GTEST_MUST_USE_RESULT_;
+
+inline int RUN_ALL_TESTS() { return ::testing::UnitTest::GetInstance()->Run(); }
+
+GTEST_DISABLE_MSC_WARNINGS_POP_() // 4251
+
+#endif // GOOGLETEST_INCLUDE_GTEST_GTEST_H_
diff --git a/media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest_pred_impl.h b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest_pred_impl.h
new file mode 100644
index 0000000000..47a24aa687
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest_pred_impl.h
@@ -0,0 +1,279 @@
+// Copyright 2006, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Implements a family of generic predicate assertion macros.
+
+// IWYU pragma: private, include "gtest/gtest.h"
+// IWYU pragma: friend gtest/.*
+// IWYU pragma: friend gmock/.*
+
+#ifndef GOOGLETEST_INCLUDE_GTEST_GTEST_PRED_IMPL_H_
+#define GOOGLETEST_INCLUDE_GTEST_GTEST_PRED_IMPL_H_
+
+#include "gtest/gtest-assertion-result.h"
+#include "gtest/internal/gtest-internal.h"
+#include "gtest/internal/gtest-port.h"
+
+namespace testing {
+
+// This header implements a family of generic predicate assertion
+// macros:
+//
+// ASSERT_PRED_FORMAT1(pred_format, v1)
+// ASSERT_PRED_FORMAT2(pred_format, v1, v2)
+// ...
+//
+// where pred_format is a function or functor that takes n (in the
+// case of ASSERT_PRED_FORMATn) values and their source expression
+// text, and returns a testing::AssertionResult. See the definition
+// of ASSERT_EQ in gtest.h for an example.
+//
+// If you don't care about formatting, you can use the more
+// restrictive version:
+//
+// ASSERT_PRED1(pred, v1)
+// ASSERT_PRED2(pred, v1, v2)
+// ...
+//
+// where pred is an n-ary function or functor that returns bool,
+// and the values v1, v2, ..., must support the << operator for
+// streaming to std::ostream.
+//
+// We also define the EXPECT_* variations.
+//
+// For now we only support predicates whose arity is at most 5.
+// Please email googletestframework@googlegroups.com if you need
+// support for higher arities.
+
+// GTEST_ASSERT_ is the basic statement to which all of the assertions
+// in this file reduce. Don't use this in your code.
+
+#define GTEST_ASSERT_(expression, on_failure) \
+ GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
+ if (const ::testing::AssertionResult gtest_ar = (expression)) \
+ ; \
+ else \
+ on_failure(gtest_ar.failure_message())
+
+// Helper function for implementing {EXPECT|ASSERT}_PRED1. Don't use
+// this in your code.
+template <typename Pred, typename T1>
+AssertionResult AssertPred1Helper(const char* pred_text, const char* e1,
+ Pred pred, const T1& v1) {
+ if (pred(v1)) return AssertionSuccess();
+
+ return AssertionFailure()
+ << pred_text << "(" << e1 << ") evaluates to false, where"
+ << "\n"
+ << e1 << " evaluates to " << ::testing::PrintToString(v1);
+}
+
+// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT1.
+// Don't use this in your code.
+#define GTEST_PRED_FORMAT1_(pred_format, v1, on_failure) \
+ GTEST_ASSERT_(pred_format(#v1, v1), on_failure)
+
+// Internal macro for implementing {EXPECT|ASSERT}_PRED1. Don't use
+// this in your code.
+#define GTEST_PRED1_(pred, v1, on_failure) \
+ GTEST_ASSERT_(::testing::AssertPred1Helper(#pred, #v1, pred, v1), on_failure)
+
+// Unary predicate assertion macros.
+#define EXPECT_PRED_FORMAT1(pred_format, v1) \
+ GTEST_PRED_FORMAT1_(pred_format, v1, GTEST_NONFATAL_FAILURE_)
+#define EXPECT_PRED1(pred, v1) GTEST_PRED1_(pred, v1, GTEST_NONFATAL_FAILURE_)
+#define ASSERT_PRED_FORMAT1(pred_format, v1) \
+ GTEST_PRED_FORMAT1_(pred_format, v1, GTEST_FATAL_FAILURE_)
+#define ASSERT_PRED1(pred, v1) GTEST_PRED1_(pred, v1, GTEST_FATAL_FAILURE_)
+
+// Helper function for implementing {EXPECT|ASSERT}_PRED2. Don't use
+// this in your code.
+template <typename Pred, typename T1, typename T2>
+AssertionResult AssertPred2Helper(const char* pred_text, const char* e1,
+ const char* e2, Pred pred, const T1& v1,
+ const T2& v2) {
+ if (pred(v1, v2)) return AssertionSuccess();
+
+ return AssertionFailure()
+ << pred_text << "(" << e1 << ", " << e2
+ << ") evaluates to false, where"
+ << "\n"
+ << e1 << " evaluates to " << ::testing::PrintToString(v1) << "\n"
+ << e2 << " evaluates to " << ::testing::PrintToString(v2);
+}
+
+// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT2.
+// Don't use this in your code.
+#define GTEST_PRED_FORMAT2_(pred_format, v1, v2, on_failure) \
+ GTEST_ASSERT_(pred_format(#v1, #v2, v1, v2), on_failure)
+
+// Internal macro for implementing {EXPECT|ASSERT}_PRED2. Don't use
+// this in your code.
+#define GTEST_PRED2_(pred, v1, v2, on_failure) \
+ GTEST_ASSERT_(::testing::AssertPred2Helper(#pred, #v1, #v2, pred, v1, v2), \
+ on_failure)
+
+// Binary predicate assertion macros.
+#define EXPECT_PRED_FORMAT2(pred_format, v1, v2) \
+ GTEST_PRED_FORMAT2_(pred_format, v1, v2, GTEST_NONFATAL_FAILURE_)
+#define EXPECT_PRED2(pred, v1, v2) \
+ GTEST_PRED2_(pred, v1, v2, GTEST_NONFATAL_FAILURE_)
+#define ASSERT_PRED_FORMAT2(pred_format, v1, v2) \
+ GTEST_PRED_FORMAT2_(pred_format, v1, v2, GTEST_FATAL_FAILURE_)
+#define ASSERT_PRED2(pred, v1, v2) \
+ GTEST_PRED2_(pred, v1, v2, GTEST_FATAL_FAILURE_)
+
+// Helper function for implementing {EXPECT|ASSERT}_PRED3. Don't use
+// this in your code.
+template <typename Pred, typename T1, typename T2, typename T3>
+AssertionResult AssertPred3Helper(const char* pred_text, const char* e1,
+ const char* e2, const char* e3, Pred pred,
+ const T1& v1, const T2& v2, const T3& v3) {
+ if (pred(v1, v2, v3)) return AssertionSuccess();
+
+ return AssertionFailure()
+ << pred_text << "(" << e1 << ", " << e2 << ", " << e3
+ << ") evaluates to false, where"
+ << "\n"
+ << e1 << " evaluates to " << ::testing::PrintToString(v1) << "\n"
+ << e2 << " evaluates to " << ::testing::PrintToString(v2) << "\n"
+ << e3 << " evaluates to " << ::testing::PrintToString(v3);
+}
+
+// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT3.
+// Don't use this in your code.
+#define GTEST_PRED_FORMAT3_(pred_format, v1, v2, v3, on_failure) \
+ GTEST_ASSERT_(pred_format(#v1, #v2, #v3, v1, v2, v3), on_failure)
+
+// Internal macro for implementing {EXPECT|ASSERT}_PRED3. Don't use
+// this in your code.
+#define GTEST_PRED3_(pred, v1, v2, v3, on_failure) \
+ GTEST_ASSERT_( \
+ ::testing::AssertPred3Helper(#pred, #v1, #v2, #v3, pred, v1, v2, v3), \
+ on_failure)
+
+// Ternary predicate assertion macros.
+#define EXPECT_PRED_FORMAT3(pred_format, v1, v2, v3) \
+ GTEST_PRED_FORMAT3_(pred_format, v1, v2, v3, GTEST_NONFATAL_FAILURE_)
+#define EXPECT_PRED3(pred, v1, v2, v3) \
+ GTEST_PRED3_(pred, v1, v2, v3, GTEST_NONFATAL_FAILURE_)
+#define ASSERT_PRED_FORMAT3(pred_format, v1, v2, v3) \
+ GTEST_PRED_FORMAT3_(pred_format, v1, v2, v3, GTEST_FATAL_FAILURE_)
+#define ASSERT_PRED3(pred, v1, v2, v3) \
+ GTEST_PRED3_(pred, v1, v2, v3, GTEST_FATAL_FAILURE_)
+
+// Helper function for implementing {EXPECT|ASSERT}_PRED4. Don't use
+// this in your code.
+template <typename Pred, typename T1, typename T2, typename T3, typename T4>
+AssertionResult AssertPred4Helper(const char* pred_text, const char* e1,
+ const char* e2, const char* e3,
+ const char* e4, Pred pred, const T1& v1,
+ const T2& v2, const T3& v3, const T4& v4) {
+ if (pred(v1, v2, v3, v4)) return AssertionSuccess();
+
+ return AssertionFailure()
+ << pred_text << "(" << e1 << ", " << e2 << ", " << e3 << ", " << e4
+ << ") evaluates to false, where"
+ << "\n"
+ << e1 << " evaluates to " << ::testing::PrintToString(v1) << "\n"
+ << e2 << " evaluates to " << ::testing::PrintToString(v2) << "\n"
+ << e3 << " evaluates to " << ::testing::PrintToString(v3) << "\n"
+ << e4 << " evaluates to " << ::testing::PrintToString(v4);
+}
+
+// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT4.
+// Don't use this in your code.
+#define GTEST_PRED_FORMAT4_(pred_format, v1, v2, v3, v4, on_failure) \
+ GTEST_ASSERT_(pred_format(#v1, #v2, #v3, #v4, v1, v2, v3, v4), on_failure)
+
+// Internal macro for implementing {EXPECT|ASSERT}_PRED4. Don't use
+// this in your code.
+#define GTEST_PRED4_(pred, v1, v2, v3, v4, on_failure) \
+ GTEST_ASSERT_(::testing::AssertPred4Helper(#pred, #v1, #v2, #v3, #v4, pred, \
+ v1, v2, v3, v4), \
+ on_failure)
+
+// 4-ary predicate assertion macros.
+#define EXPECT_PRED_FORMAT4(pred_format, v1, v2, v3, v4) \
+ GTEST_PRED_FORMAT4_(pred_format, v1, v2, v3, v4, GTEST_NONFATAL_FAILURE_)
+#define EXPECT_PRED4(pred, v1, v2, v3, v4) \
+ GTEST_PRED4_(pred, v1, v2, v3, v4, GTEST_NONFATAL_FAILURE_)
+#define ASSERT_PRED_FORMAT4(pred_format, v1, v2, v3, v4) \
+ GTEST_PRED_FORMAT4_(pred_format, v1, v2, v3, v4, GTEST_FATAL_FAILURE_)
+#define ASSERT_PRED4(pred, v1, v2, v3, v4) \
+ GTEST_PRED4_(pred, v1, v2, v3, v4, GTEST_FATAL_FAILURE_)
+
+// Helper function for implementing {EXPECT|ASSERT}_PRED5. Don't use
+// this in your code.
+template <typename Pred, typename T1, typename T2, typename T3, typename T4,
+ typename T5>
+AssertionResult AssertPred5Helper(const char* pred_text, const char* e1,
+ const char* e2, const char* e3,
+ const char* e4, const char* e5, Pred pred,
+ const T1& v1, const T2& v2, const T3& v3,
+ const T4& v4, const T5& v5) {
+ if (pred(v1, v2, v3, v4, v5)) return AssertionSuccess();
+
+ return AssertionFailure()
+ << pred_text << "(" << e1 << ", " << e2 << ", " << e3 << ", " << e4
+ << ", " << e5 << ") evaluates to false, where"
+ << "\n"
+ << e1 << " evaluates to " << ::testing::PrintToString(v1) << "\n"
+ << e2 << " evaluates to " << ::testing::PrintToString(v2) << "\n"
+ << e3 << " evaluates to " << ::testing::PrintToString(v3) << "\n"
+ << e4 << " evaluates to " << ::testing::PrintToString(v4) << "\n"
+ << e5 << " evaluates to " << ::testing::PrintToString(v5);
+}
+
+// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT5.
+// Don't use this in your code.
+#define GTEST_PRED_FORMAT5_(pred_format, v1, v2, v3, v4, v5, on_failure) \
+ GTEST_ASSERT_(pred_format(#v1, #v2, #v3, #v4, #v5, v1, v2, v3, v4, v5), \
+ on_failure)
+
+// Internal macro for implementing {EXPECT|ASSERT}_PRED5. Don't use
+// this in your code.
+#define GTEST_PRED5_(pred, v1, v2, v3, v4, v5, on_failure) \
+ GTEST_ASSERT_(::testing::AssertPred5Helper(#pred, #v1, #v2, #v3, #v4, #v5, \
+ pred, v1, v2, v3, v4, v5), \
+ on_failure)
+
+// 5-ary predicate assertion macros.
+#define EXPECT_PRED_FORMAT5(pred_format, v1, v2, v3, v4, v5) \
+ GTEST_PRED_FORMAT5_(pred_format, v1, v2, v3, v4, v5, GTEST_NONFATAL_FAILURE_)
+#define EXPECT_PRED5(pred, v1, v2, v3, v4, v5) \
+ GTEST_PRED5_(pred, v1, v2, v3, v4, v5, GTEST_NONFATAL_FAILURE_)
+#define ASSERT_PRED_FORMAT5(pred_format, v1, v2, v3, v4, v5) \
+ GTEST_PRED_FORMAT5_(pred_format, v1, v2, v3, v4, v5, GTEST_FATAL_FAILURE_)
+#define ASSERT_PRED5(pred, v1, v2, v3, v4, v5) \
+ GTEST_PRED5_(pred, v1, v2, v3, v4, v5, GTEST_FATAL_FAILURE_)
+
+} // namespace testing
+
+#endif // GOOGLETEST_INCLUDE_GTEST_GTEST_PRED_IMPL_H_
diff --git a/media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest_prod.h b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest_prod.h
new file mode 100644
index 0000000000..1f37dc31c3
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest_prod.h
@@ -0,0 +1,60 @@
+// Copyright 2006, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Google C++ Testing and Mocking Framework definitions useful in production
+// code.
+
+#ifndef GOOGLETEST_INCLUDE_GTEST_GTEST_PROD_H_
+#define GOOGLETEST_INCLUDE_GTEST_GTEST_PROD_H_
+
+// When you need to test the private or protected members of a class,
+// use the FRIEND_TEST macro to declare your tests as friends of the
+// class. For example:
+//
+// class MyClass {
+// private:
+// void PrivateMethod();
+// FRIEND_TEST(MyClassTest, PrivateMethodWorks);
+// };
+//
+// class MyClassTest : public testing::Test {
+// // ...
+// };
+//
+// TEST_F(MyClassTest, PrivateMethodWorks) {
+// // Can call MyClass::PrivateMethod() here.
+// }
+//
+// Note: The test class must be in the same namespace as the class being tested.
+// For example, putting MyClassTest in an anonymous namespace will not work.
+
+#define FRIEND_TEST(test_case_name, test_name) \
+ friend class test_case_name##_##test_name##_Test
+
+#endif // GOOGLETEST_INCLUDE_GTEST_GTEST_PROD_H_
diff --git a/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/custom/README.md b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/custom/README.md
new file mode 100644
index 0000000000..cb49e2c754
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/custom/README.md
@@ -0,0 +1,44 @@
+# Customization Points
+
+The custom directory is an injection point for custom user configurations.
+
+## Header `gtest.h`
+
+### The following macros can be defined:
+
+* `GTEST_OS_STACK_TRACE_GETTER_` - The name of an implementation of
+ `OsStackTraceGetterInterface`.
+* `GTEST_CUSTOM_TEMPDIR_FUNCTION_` - An override for `testing::TempDir()`. See
+ `testing::TempDir` for semantics and signature.
+
+## Header `gtest-port.h`
+
+The following macros can be defined:
+
+### Logging:
+
+* `GTEST_LOG_(severity)`
+* `GTEST_CHECK_(condition)`
+* Functions `LogToStderr()` and `FlushInfoLog()` have to be provided too.
+
+### Threading:
+
+* `GTEST_HAS_NOTIFICATION_` - Enabled if Notification is already provided.
+* `GTEST_HAS_MUTEX_AND_THREAD_LOCAL_` - Enabled if `Mutex` and `ThreadLocal`
+ are already provided. Must also provide `GTEST_DECLARE_STATIC_MUTEX_(mutex)`
+ and `GTEST_DEFINE_STATIC_MUTEX_(mutex)`
+* `GTEST_EXCLUSIVE_LOCK_REQUIRED_(locks)`
+* `GTEST_LOCK_EXCLUDED_(locks)`
+
+### Underlying library support features
+
+* `GTEST_HAS_CXXABI_H_`
+
+### Exporting API symbols:
+
+* `GTEST_API_` - Specifier for exported symbols.
+
+## Header `gtest-printers.h`
+
+* See documentation at `gtest/gtest-printers.h` for details on how to define a
+ custom printer.
diff --git a/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/custom/gtest-port.h b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/custom/gtest-port.h
new file mode 100644
index 0000000000..9b7fb4261a
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/custom/gtest-port.h
@@ -0,0 +1,68 @@
+// Copyright 2015, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Injection point for custom user configurations. See README for details
+//
+// ** Custom implementation starts here **
+
+#ifndef GOOGLETEST_INCLUDE_GTEST_INTERNAL_CUSTOM_GTEST_PORT_H_
+#define GOOGLETEST_INCLUDE_GTEST_INTERNAL_CUSTOM_GTEST_PORT_H_
+
+// Use a stub Notification class.
+//
+// The built-in Notification class in GoogleTest v1.12.1 uses std::mutex and
+// std::condition_variable. The <mutex> and <condition_variable> headers of
+// mingw32 g++ (GNU 10.0.0) define std::mutex and std::condition_variable only
+// when configured with the posix threads option but don't define them when
+// configured with the win32 threads option. The Notification class is only
+// used in GoogleTest's internal tests. Since we don't build GoogleTest's
+// internal tests, we don't need a working Notification class. Although it's
+// not hard to fix the mingw32 g++ compilation errors by implementing the
+// Notification class using Windows CRITICAL_SECTION and CONDITION_VARIABLE,
+// it's simpler to just use a stub Notification class on all platforms.
+//
+// The default constructor of the stub class is deleted and the declaration of
+// the Notify() method is commented out, so that compilation will fail if any
+// code actually uses the Notification class.
+
+#define GTEST_HAS_NOTIFICATION_ 1
+namespace testing {
+namespace internal {
+class Notification {
+ public:
+ Notification() = delete;
+ Notification(const Notification&) = delete;
+ Notification& operator=(const Notification&) = delete;
+ // void Notify();
+ void WaitForNotification() {}
+};
+} // namespace internal
+} // namespace testing
+
+#endif // GOOGLETEST_INCLUDE_GTEST_INTERNAL_CUSTOM_GTEST_PORT_H_
diff --git a/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/custom/gtest-printers.h b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/custom/gtest-printers.h
new file mode 100644
index 0000000000..b9495d8378
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/custom/gtest-printers.h
@@ -0,0 +1,42 @@
+// Copyright 2015, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// This file provides an injection point for custom printers in a local
+// installation of gTest.
+// It will be included from gtest-printers.h and the overrides in this file
+// will be visible to everyone.
+//
+// Injection point for custom user configurations. See README for details
+//
+// ** Custom implementation starts here **
+
+#ifndef GOOGLETEST_INCLUDE_GTEST_INTERNAL_CUSTOM_GTEST_PRINTERS_H_
+#define GOOGLETEST_INCLUDE_GTEST_INTERNAL_CUSTOM_GTEST_PRINTERS_H_
+
+#endif // GOOGLETEST_INCLUDE_GTEST_INTERNAL_CUSTOM_GTEST_PRINTERS_H_
diff --git a/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/custom/gtest.h b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/custom/gtest.h
new file mode 100644
index 0000000000..afaaf17ba2
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/custom/gtest.h
@@ -0,0 +1,37 @@
+// Copyright 2015, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Injection point for custom user configurations. See README for details
+//
+// ** Custom implementation starts here **
+
+#ifndef GOOGLETEST_INCLUDE_GTEST_INTERNAL_CUSTOM_GTEST_H_
+#define GOOGLETEST_INCLUDE_GTEST_INTERNAL_CUSTOM_GTEST_H_
+
+#endif // GOOGLETEST_INCLUDE_GTEST_INTERNAL_CUSTOM_GTEST_H_
diff --git a/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-death-test-internal.h b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-death-test-internal.h
new file mode 100644
index 0000000000..45580ae805
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-death-test-internal.h
@@ -0,0 +1,306 @@
+// Copyright 2005, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// The Google C++ Testing and Mocking Framework (Google Test)
+//
+// This header file defines internal utilities needed for implementing
+// death tests. They are subject to change without notice.
+
+// IWYU pragma: private, include "gtest/gtest.h"
+// IWYU pragma: friend gtest/.*
+// IWYU pragma: friend gmock/.*
+
+#ifndef GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_DEATH_TEST_INTERNAL_H_
+#define GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_DEATH_TEST_INTERNAL_H_
+
+#include <stdio.h>
+
+#include <memory>
+
+#include "gtest/gtest-matchers.h"
+#include "gtest/internal/gtest-internal.h"
+
+GTEST_DECLARE_string_(internal_run_death_test);
+
+namespace testing {
+namespace internal {
+
+// Names of the flags (needed for parsing Google Test flags).
+const char kDeathTestStyleFlag[] = "death_test_style";
+const char kDeathTestUseFork[] = "death_test_use_fork";
+const char kInternalRunDeathTestFlag[] = "internal_run_death_test";
+
+#if GTEST_HAS_DEATH_TEST
+
+GTEST_DISABLE_MSC_WARNINGS_PUSH_(4251 \
+/* class A needs to have dll-interface to be used by clients of class B */)
+
+// DeathTest is a class that hides much of the complexity of the
+// GTEST_DEATH_TEST_ macro. It is abstract; its static Create method
+// returns a concrete class that depends on the prevailing death test
+// style, as defined by the --gtest_death_test_style and/or
+// --gtest_internal_run_death_test flags.
+
+// In describing the results of death tests, these terms are used with
+// the corresponding definitions:
+//
+// exit status: The integer exit information in the format specified
+// by wait(2)
+// exit code: The integer code passed to exit(3), _exit(2), or
+// returned from main()
+class GTEST_API_ DeathTest {
+ public:
+ // Create returns false if there was an error determining the
+ // appropriate action to take for the current death test; for example,
+ // if the gtest_death_test_style flag is set to an invalid value.
+ // The LastMessage method will return a more detailed message in that
+ // case. Otherwise, the DeathTest pointer pointed to by the "test"
+ // argument is set. If the death test should be skipped, the pointer
+ // is set to NULL; otherwise, it is set to the address of a new concrete
+ // DeathTest object that controls the execution of the current test.
+ static bool Create(const char* statement, Matcher<const std::string&> matcher,
+ const char* file, int line, DeathTest** test);
+ DeathTest();
+ virtual ~DeathTest() {}
+
+ // A helper class that aborts a death test when it's deleted.
+ class ReturnSentinel {
+ public:
+ explicit ReturnSentinel(DeathTest* test) : test_(test) {}
+ ~ReturnSentinel() { test_->Abort(TEST_ENCOUNTERED_RETURN_STATEMENT); }
+
+ private:
+ DeathTest* const test_;
+ ReturnSentinel(const ReturnSentinel&) = delete;
+ ReturnSentinel& operator=(const ReturnSentinel&) = delete;
+ } GTEST_ATTRIBUTE_UNUSED_;
+
+ // An enumeration of possible roles that may be taken when a death
+ // test is encountered. EXECUTE means that the death test logic should
+ // be executed immediately. OVERSEE means that the program should prepare
+ // the appropriate environment for a child process to execute the death
+ // test, then wait for it to complete.
+ enum TestRole { OVERSEE_TEST, EXECUTE_TEST };
+
+ // An enumeration of the three reasons that a test might be aborted.
+ enum AbortReason {
+ TEST_ENCOUNTERED_RETURN_STATEMENT,
+ TEST_THREW_EXCEPTION,
+ TEST_DID_NOT_DIE
+ };
+
+ // Assumes one of the above roles.
+ virtual TestRole AssumeRole() = 0;
+
+ // Waits for the death test to finish and returns its status.
+ virtual int Wait() = 0;
+
+ // Returns true if the death test passed; that is, the test process
+ // exited during the test, its exit status matches a user-supplied
+ // predicate, and its stderr output matches a user-supplied regular
+ // expression.
+ // The user-supplied predicate may be a macro expression rather
+ // than a function pointer or functor, or else Wait and Passed could
+ // be combined.
+ virtual bool Passed(bool exit_status_ok) = 0;
+
+ // Signals that the death test did not die as expected.
+ virtual void Abort(AbortReason reason) = 0;
+
+ // Returns a human-readable outcome message regarding the outcome of
+ // the last death test.
+ static const char* LastMessage();
+
+ static void set_last_death_test_message(const std::string& message);
+
+ private:
+ // A string containing a description of the outcome of the last death test.
+ static std::string last_death_test_message_;
+
+ DeathTest(const DeathTest&) = delete;
+ DeathTest& operator=(const DeathTest&) = delete;
+};
+
+GTEST_DISABLE_MSC_WARNINGS_POP_() // 4251
+
+// Factory interface for death tests. May be mocked out for testing.
+class DeathTestFactory {
+ public:
+ virtual ~DeathTestFactory() {}
+ virtual bool Create(const char* statement,
+ Matcher<const std::string&> matcher, const char* file,
+ int line, DeathTest** test) = 0;
+};
+
+// A concrete DeathTestFactory implementation for normal use.
+class DefaultDeathTestFactory : public DeathTestFactory {
+ public:
+ bool Create(const char* statement, Matcher<const std::string&> matcher,
+ const char* file, int line, DeathTest** test) override;
+};
+
+// Returns true if exit_status describes a process that was terminated
+// by a signal, or exited normally with a nonzero exit code.
+GTEST_API_ bool ExitedUnsuccessfully(int exit_status);
+
+// A string passed to EXPECT_DEATH (etc.) is caught by one of these overloads
+// and interpreted as a regex (rather than an Eq matcher) for legacy
+// compatibility.
+inline Matcher<const ::std::string&> MakeDeathTestMatcher(
+ ::testing::internal::RE regex) {
+ return ContainsRegex(regex.pattern());
+}
+inline Matcher<const ::std::string&> MakeDeathTestMatcher(const char* regex) {
+ return ContainsRegex(regex);
+}
+inline Matcher<const ::std::string&> MakeDeathTestMatcher(
+ const ::std::string& regex) {
+ return ContainsRegex(regex);
+}
+
+// If a Matcher<const ::std::string&> is passed to EXPECT_DEATH (etc.), it's
+// used directly.
+inline Matcher<const ::std::string&> MakeDeathTestMatcher(
+ Matcher<const ::std::string&> matcher) {
+ return matcher;
+}
+
+// Traps C++ exceptions escaping statement and reports them as test
+// failures. Note that trapping SEH exceptions is not implemented here.
+#if GTEST_HAS_EXCEPTIONS
+#define GTEST_EXECUTE_DEATH_TEST_STATEMENT_(statement, death_test) \
+ try { \
+ GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \
+ } catch (const ::std::exception& gtest_exception) { \
+ fprintf( \
+ stderr, \
+ "\n%s: Caught std::exception-derived exception escaping the " \
+ "death test statement. Exception message: %s\n", \
+ ::testing::internal::FormatFileLocation(__FILE__, __LINE__).c_str(), \
+ gtest_exception.what()); \
+ fflush(stderr); \
+ death_test->Abort(::testing::internal::DeathTest::TEST_THREW_EXCEPTION); \
+ } catch (...) { \
+ death_test->Abort(::testing::internal::DeathTest::TEST_THREW_EXCEPTION); \
+ }
+
+#else
+#define GTEST_EXECUTE_DEATH_TEST_STATEMENT_(statement, death_test) \
+ GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement)
+
+#endif
+
+// This macro is for implementing ASSERT_DEATH*, EXPECT_DEATH*,
+// ASSERT_EXIT*, and EXPECT_EXIT*.
+#define GTEST_DEATH_TEST_(statement, predicate, regex_or_matcher, fail) \
+ GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
+ if (::testing::internal::AlwaysTrue()) { \
+ ::testing::internal::DeathTest* gtest_dt; \
+ if (!::testing::internal::DeathTest::Create( \
+ #statement, \
+ ::testing::internal::MakeDeathTestMatcher(regex_or_matcher), \
+ __FILE__, __LINE__, &gtest_dt)) { \
+ goto GTEST_CONCAT_TOKEN_(gtest_label_, __LINE__); \
+ } \
+ if (gtest_dt != nullptr) { \
+ std::unique_ptr< ::testing::internal::DeathTest> gtest_dt_ptr(gtest_dt); \
+ switch (gtest_dt->AssumeRole()) { \
+ case ::testing::internal::DeathTest::OVERSEE_TEST: \
+ if (!gtest_dt->Passed(predicate(gtest_dt->Wait()))) { \
+ goto GTEST_CONCAT_TOKEN_(gtest_label_, __LINE__); \
+ } \
+ break; \
+ case ::testing::internal::DeathTest::EXECUTE_TEST: { \
+ ::testing::internal::DeathTest::ReturnSentinel gtest_sentinel( \
+ gtest_dt); \
+ GTEST_EXECUTE_DEATH_TEST_STATEMENT_(statement, gtest_dt); \
+ gtest_dt->Abort(::testing::internal::DeathTest::TEST_DID_NOT_DIE); \
+ break; \
+ } \
+ } \
+ } \
+ } else \
+ GTEST_CONCAT_TOKEN_(gtest_label_, __LINE__) \
+ : fail(::testing::internal::DeathTest::LastMessage())
+// The symbol "fail" here expands to something into which a message
+// can be streamed.
+
+// This macro is for implementing ASSERT/EXPECT_DEBUG_DEATH when compiled in
+// NDEBUG mode. In this case we need the statements to be executed and the macro
+// must accept a streamed message even though the message is never printed.
+// The regex object is not evaluated, but it is used to prevent "unused"
+// warnings and to avoid an expression that doesn't compile in debug mode.
+#define GTEST_EXECUTE_STATEMENT_(statement, regex_or_matcher) \
+ GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
+ if (::testing::internal::AlwaysTrue()) { \
+ GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \
+ } else if (!::testing::internal::AlwaysTrue()) { \
+ ::testing::internal::MakeDeathTestMatcher(regex_or_matcher); \
+ } else \
+ ::testing::Message()
+
+// A class representing the parsed contents of the
+// --gtest_internal_run_death_test flag, as it existed when
+// RUN_ALL_TESTS was called.
+class InternalRunDeathTestFlag {
+ public:
+ InternalRunDeathTestFlag(const std::string& a_file, int a_line, int an_index,
+ int a_write_fd)
+ : file_(a_file), line_(a_line), index_(an_index), write_fd_(a_write_fd) {}
+
+ ~InternalRunDeathTestFlag() {
+ if (write_fd_ >= 0) posix::Close(write_fd_);
+ }
+
+ const std::string& file() const { return file_; }
+ int line() const { return line_; }
+ int index() const { return index_; }
+ int write_fd() const { return write_fd_; }
+
+ private:
+ std::string file_;
+ int line_;
+ int index_;
+ int write_fd_;
+
+ InternalRunDeathTestFlag(const InternalRunDeathTestFlag&) = delete;
+ InternalRunDeathTestFlag& operator=(const InternalRunDeathTestFlag&) = delete;
+};
+
+// Returns a newly created InternalRunDeathTestFlag object with fields
+// initialized from the GTEST_FLAG(internal_run_death_test) flag if
+// the flag is specified; otherwise returns NULL.
+InternalRunDeathTestFlag* ParseInternalRunDeathTestFlag();
+
+#endif // GTEST_HAS_DEATH_TEST
+
+} // namespace internal
+} // namespace testing
+
+#endif // GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_DEATH_TEST_INTERNAL_H_
diff --git a/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-filepath.h b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-filepath.h
new file mode 100644
index 0000000000..a2a60a962b
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-filepath.h
@@ -0,0 +1,210 @@
+// Copyright 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Google Test filepath utilities
+//
+// This header file declares classes and functions used internally by
+// Google Test. They are subject to change without notice.
+//
+// This file is #included in gtest/internal/gtest-internal.h.
+// Do not include this header file separately!
+
+// IWYU pragma: private, include "gtest/gtest.h"
+// IWYU pragma: friend gtest/.*
+// IWYU pragma: friend gmock/.*
+
+#ifndef GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_FILEPATH_H_
+#define GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_FILEPATH_H_
+
+#include "gtest/internal/gtest-string.h"
+
+GTEST_DISABLE_MSC_WARNINGS_PUSH_(4251 \
+/* class A needs to have dll-interface to be used by clients of class B */)
+
+namespace testing {
+namespace internal {
+
+// FilePath - a class for file and directory pathname manipulation which
+// handles platform-specific conventions (like the pathname separator).
+// Used for helper functions for naming files in a directory for xml output.
+// Except for Set methods, all methods are const or static, which provides an
+// "immutable value object" -- useful for peace of mind.
+// A FilePath with a value ending in a path separator ("like/this/") represents
+// a directory, otherwise it is assumed to represent a file. In either case,
+// it may or may not represent an actual file or directory in the file system.
+// Names are NOT checked for syntax correctness -- no checking for illegal
+// characters, malformed paths, etc.
+
+class GTEST_API_ FilePath {
+ public:
+ FilePath() : pathname_("") {}
+ FilePath(const FilePath& rhs) : pathname_(rhs.pathname_) {}
+
+ explicit FilePath(const std::string& pathname) : pathname_(pathname) {
+ Normalize();
+ }
+
+ FilePath& operator=(const FilePath& rhs) {
+ Set(rhs);
+ return *this;
+ }
+
+ void Set(const FilePath& rhs) { pathname_ = rhs.pathname_; }
+
+ const std::string& string() const { return pathname_; }
+ const char* c_str() const { return pathname_.c_str(); }
+
+ // Returns the current working directory, or "" if unsuccessful.
+ static FilePath GetCurrentDir();
+
+ // Given directory = "dir", base_name = "test", number = 0,
+ // extension = "xml", returns "dir/test.xml". If number is greater
+ // than zero (e.g., 12), returns "dir/test_12.xml".
+ // On Windows platform, uses \ as the separator rather than /.
+ static FilePath MakeFileName(const FilePath& directory,
+ const FilePath& base_name, int number,
+ const char* extension);
+
+ // Given directory = "dir", relative_path = "test.xml",
+ // returns "dir/test.xml".
+ // On Windows, uses \ as the separator rather than /.
+ static FilePath ConcatPaths(const FilePath& directory,
+ const FilePath& relative_path);
+
+ // Returns a pathname for a file that does not currently exist. The pathname
+ // will be directory/base_name.extension or
+ // directory/base_name_<number>.extension if directory/base_name.extension
+ // already exists. The number will be incremented until a pathname is found
+ // that does not already exist.
+ // Examples: 'dir/foo_test.xml' or 'dir/foo_test_1.xml'.
+ // There could be a race condition if two or more processes are calling this
+ // function at the same time -- they could both pick the same filename.
+ static FilePath GenerateUniqueFileName(const FilePath& directory,
+ const FilePath& base_name,
+ const char* extension);
+
+ // Returns true if and only if the path is "".
+ bool IsEmpty() const { return pathname_.empty(); }
+
+ // If input name has a trailing separator character, removes it and returns
+ // the name, otherwise return the name string unmodified.
+ // On Windows platform, uses \ as the separator, other platforms use /.
+ FilePath RemoveTrailingPathSeparator() const;
+
+ // Returns a copy of the FilePath with the directory part removed.
+ // Example: FilePath("path/to/file").RemoveDirectoryName() returns
+ // FilePath("file"). If there is no directory part ("just_a_file"), it returns
+ // the FilePath unmodified. If there is no file part ("just_a_dir/") it
+ // returns an empty FilePath ("").
+ // On Windows platform, '\' is the path separator, otherwise it is '/'.
+ FilePath RemoveDirectoryName() const;
+
+ // RemoveFileName returns the directory path with the filename removed.
+ // Example: FilePath("path/to/file").RemoveFileName() returns "path/to/".
+ // If the FilePath is "a_file" or "/a_file", RemoveFileName returns
+ // FilePath("./") or, on Windows, FilePath(".\\"). If the filepath does
+ // not have a file, like "just/a/dir/", it returns the FilePath unmodified.
+ // On Windows platform, '\' is the path separator, otherwise it is '/'.
+ FilePath RemoveFileName() const;
+
+ // Returns a copy of the FilePath with the case-insensitive extension removed.
+ // Example: FilePath("dir/file.exe").RemoveExtension("EXE") returns
+ // FilePath("dir/file"). If a case-insensitive extension is not
+ // found, returns a copy of the original FilePath.
+ FilePath RemoveExtension(const char* extension) const;
+
+ // Creates directories so that path exists. Returns true if successful or if
+ // the directories already exist; returns false if unable to create
+ // directories for any reason. Will also return false if the FilePath does
+ // not represent a directory (that is, it doesn't end with a path separator).
+ bool CreateDirectoriesRecursively() const;
+
+ // Create the directory so that path exists. Returns true if successful or
+ // if the directory already exists; returns false if unable to create the
+ // directory for any reason, including if the parent directory does not
+ // exist. Not named "CreateDirectory" because that's a macro on Windows.
+ bool CreateFolder() const;
+
+ // Returns true if FilePath describes something in the file-system,
+ // either a file, directory, or whatever, and that something exists.
+ bool FileOrDirectoryExists() const;
+
+ // Returns true if pathname describes a directory in the file-system
+ // that exists.
+ bool DirectoryExists() const;
+
+ // Returns true if FilePath ends with a path separator, which indicates that
+ // it is intended to represent a directory. Returns false otherwise.
+ // This does NOT check that a directory (or file) actually exists.
+ bool IsDirectory() const;
+
+ // Returns true if pathname describes a root directory. (Windows has one
+ // root directory per disk drive.)
+ bool IsRootDirectory() const;
+
+ // Returns true if pathname describes an absolute path.
+ bool IsAbsolutePath() const;
+
+ private:
+ // Replaces multiple consecutive separators with a single separator.
+ // For example, "bar///foo" becomes "bar/foo". Does not eliminate other
+ // redundancies that might be in a pathname involving "." or "..".
+ //
+ // A pathname with multiple consecutive separators may occur either through
+ // user error or as a result of some scripts or APIs that generate a pathname
+ // with a trailing separator. On other platforms the same API or script
+ // may NOT generate a pathname with a trailing "/". Then elsewhere that
+ // pathname may have another "/" and pathname components added to it,
+ // without checking for the separator already being there.
+ // The script language and operating system may allow paths like "foo//bar"
+ // but some of the functions in FilePath will not handle that correctly. In
+ // particular, RemoveTrailingPathSeparator() only removes one separator, and
+ // it is called in CreateDirectoriesRecursively() assuming that it will change
+ // a pathname from directory syntax (trailing separator) to filename syntax.
+ //
+ // On Windows this method also replaces the alternate path separator '/' with
+ // the primary path separator '\\', so that for example "bar\\/\\foo" becomes
+ // "bar\\foo".
+
+ void Normalize();
+
+ // Returns a pointer to the last occurrence of a valid path separator in
+ // the FilePath. On Windows, for example, both '/' and '\' are valid path
+ // separators. Returns NULL if no path separator was found.
+ const char* FindLastPathSeparator() const;
+
+ std::string pathname_;
+}; // class FilePath
+
+} // namespace internal
+} // namespace testing
+
+GTEST_DISABLE_MSC_WARNINGS_POP_() // 4251
+
+#endif // GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_FILEPATH_H_
diff --git a/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-internal.h b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-internal.h
new file mode 100644
index 0000000000..9b04e4c85f
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-internal.h
@@ -0,0 +1,1570 @@
+// Copyright 2005, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// The Google C++ Testing and Mocking Framework (Google Test)
+//
+// This header file declares functions and macros used internally by
+// Google Test. They are subject to change without notice.
+
+// IWYU pragma: private, include "gtest/gtest.h"
+// IWYU pragma: friend gtest/.*
+// IWYU pragma: friend gmock/.*
+
+#ifndef GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_INTERNAL_H_
+#define GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_INTERNAL_H_
+
+#include "gtest/internal/gtest-port.h"
+
+#if GTEST_OS_LINUX
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#endif // GTEST_OS_LINUX
+
+#if GTEST_HAS_EXCEPTIONS
+#include <stdexcept>
+#endif
+
+#include <ctype.h>
+#include <float.h>
+#include <string.h>
+
+#include <cstdint>
+#include <iomanip>
+#include <limits>
+#include <map>
+#include <set>
+#include <string>
+#include <type_traits>
+#include <vector>
+
+#include "gtest/gtest-message.h"
+#include "gtest/internal/gtest-filepath.h"
+#include "gtest/internal/gtest-string.h"
+#include "gtest/internal/gtest-type-util.h"
+
+// Due to C++ preprocessor weirdness, we need double indirection to
+// concatenate two tokens when one of them is __LINE__. Writing
+//
+// foo ## __LINE__
+//
+// will result in the token foo__LINE__, instead of foo followed by
+// the current line number. For more details, see
+// http://www.parashift.com/c++-faq-lite/misc-technical-issues.html#faq-39.6
+#define GTEST_CONCAT_TOKEN_(foo, bar) GTEST_CONCAT_TOKEN_IMPL_(foo, bar)
+#define GTEST_CONCAT_TOKEN_IMPL_(foo, bar) foo##bar
+
+// Stringifies its argument.
+// Work around a bug in visual studio which doesn't accept code like this:
+//
+// #define GTEST_STRINGIFY_(name) #name
+// #define MACRO(a, b, c) ... GTEST_STRINGIFY_(a) ...
+// MACRO(, x, y)
+//
+// Complaining about the argument to GTEST_STRINGIFY_ being empty.
+// This is allowed by the spec.
+#define GTEST_STRINGIFY_HELPER_(name, ...) #name
+#define GTEST_STRINGIFY_(...) GTEST_STRINGIFY_HELPER_(__VA_ARGS__, )
+
+namespace proto2 {
+class MessageLite;
+}
+
+namespace testing {
+
+// Forward declarations.
+
+class AssertionResult; // Result of an assertion.
+class Message; // Represents a failure message.
+class Test; // Represents a test.
+class TestInfo; // Information about a test.
+class TestPartResult; // Result of a test part.
+class UnitTest; // A collection of test suites.
+
+template <typename T>
+::std::string PrintToString(const T& value);
+
+namespace internal {
+
+struct TraceInfo; // Information about a trace point.
+class TestInfoImpl; // Opaque implementation of TestInfo
+class UnitTestImpl; // Opaque implementation of UnitTest
+
+// The text used in failure messages to indicate the start of the
+// stack trace.
+GTEST_API_ extern const char kStackTraceMarker[];
+
+// An IgnoredValue object can be implicitly constructed from ANY value.
+class IgnoredValue {
+ struct Sink {};
+
+ public:
+ // This constructor template allows any value to be implicitly
+ // converted to IgnoredValue. The object has no data member and
+ // doesn't try to remember anything about the argument. We
+ // deliberately omit the 'explicit' keyword in order to allow the
+ // conversion to be implicit.
+ // Disable the conversion if T already has a magical conversion operator.
+ // Otherwise we get ambiguity.
+ template <typename T,
+ typename std::enable_if<!std::is_convertible<T, Sink>::value,
+ int>::type = 0>
+ IgnoredValue(const T& /* ignored */) {} // NOLINT(runtime/explicit)
+};
+
+// Appends the user-supplied message to the Google-Test-generated message.
+GTEST_API_ std::string AppendUserMessage(const std::string& gtest_msg,
+ const Message& user_msg);
+
+#if GTEST_HAS_EXCEPTIONS
+
+GTEST_DISABLE_MSC_WARNINGS_PUSH_(
+ 4275 /* an exported class was derived from a class that was not exported */)
+
+// This exception is thrown by (and only by) a failed Google Test
+// assertion when GTEST_FLAG(throw_on_failure) is true (if exceptions
+// are enabled). We derive it from std::runtime_error, which is for
+// errors presumably detectable only at run time. Since
+// std::runtime_error inherits from std::exception, many testing
+// frameworks know how to extract and print the message inside it.
+class GTEST_API_ GoogleTestFailureException : public ::std::runtime_error {
+ public:
+ explicit GoogleTestFailureException(const TestPartResult& failure);
+};
+
+GTEST_DISABLE_MSC_WARNINGS_POP_() // 4275
+
+#endif // GTEST_HAS_EXCEPTIONS
+
+namespace edit_distance {
+// Returns the optimal edits to go from 'left' to 'right'.
+// All edits cost the same, with replace having lower priority than
+// add/remove.
+// Simple implementation of the Wagner-Fischer algorithm.
+// See http://en.wikipedia.org/wiki/Wagner-Fischer_algorithm
+enum EditType { kMatch, kAdd, kRemove, kReplace };
+GTEST_API_ std::vector<EditType> CalculateOptimalEdits(
+ const std::vector<size_t>& left, const std::vector<size_t>& right);
+
+// Same as above, but the input is represented as strings.
+GTEST_API_ std::vector<EditType> CalculateOptimalEdits(
+ const std::vector<std::string>& left,
+ const std::vector<std::string>& right);
+
+// Create a diff of the input strings in Unified diff format.
+GTEST_API_ std::string CreateUnifiedDiff(const std::vector<std::string>& left,
+ const std::vector<std::string>& right,
+ size_t context = 2);
+
+} // namespace edit_distance
+
+// Constructs and returns the message for an equality assertion
+// (e.g. ASSERT_EQ, EXPECT_STREQ, etc) failure.
+//
+// The first four parameters are the expressions used in the assertion
+// and their values, as strings. For example, for ASSERT_EQ(foo, bar)
+// where foo is 5 and bar is 6, we have:
+//
+// expected_expression: "foo"
+// actual_expression: "bar"
+// expected_value: "5"
+// actual_value: "6"
+//
+// The ignoring_case parameter is true if and only if the assertion is a
+// *_STRCASEEQ*. When it's true, the string " (ignoring case)" will
+// be inserted into the message.
+GTEST_API_ AssertionResult EqFailure(const char* expected_expression,
+ const char* actual_expression,
+ const std::string& expected_value,
+ const std::string& actual_value,
+ bool ignoring_case);
+
+// Constructs a failure message for Boolean assertions such as EXPECT_TRUE.
+GTEST_API_ std::string GetBoolAssertionFailureMessage(
+ const AssertionResult& assertion_result, const char* expression_text,
+ const char* actual_predicate_value, const char* expected_predicate_value);
+
+// This template class represents an IEEE floating-point number
+// (either single-precision or double-precision, depending on the
+// template parameters).
+//
+// The purpose of this class is to do more sophisticated number
+// comparison. (Due to round-off error, etc, it's very unlikely that
+// two floating-points will be equal exactly. Hence a naive
+// comparison by the == operation often doesn't work.)
+//
+// Format of IEEE floating-point:
+//
+// The most-significant bit being the leftmost, an IEEE
+// floating-point looks like
+//
+// sign_bit exponent_bits fraction_bits
+//
+// Here, sign_bit is a single bit that designates the sign of the
+// number.
+//
+// For float, there are 8 exponent bits and 23 fraction bits.
+//
+// For double, there are 11 exponent bits and 52 fraction bits.
+//
+// More details can be found at
+// http://en.wikipedia.org/wiki/IEEE_floating-point_standard.
+//
+// Template parameter:
+//
+// RawType: the raw floating-point type (either float or double)
+template <typename RawType>
+class FloatingPoint {
+ public:
+ // Defines the unsigned integer type that has the same size as the
+ // floating point number.
+ typedef typename TypeWithSize<sizeof(RawType)>::UInt Bits;
+
+ // Constants.
+
+ // # of bits in a number.
+ static const size_t kBitCount = 8 * sizeof(RawType);
+
+ // # of fraction bits in a number.
+ static const size_t kFractionBitCount =
+ std::numeric_limits<RawType>::digits - 1;
+
+ // # of exponent bits in a number.
+ static const size_t kExponentBitCount = kBitCount - 1 - kFractionBitCount;
+
+ // The mask for the sign bit.
+ static const Bits kSignBitMask = static_cast<Bits>(1) << (kBitCount - 1);
+
+ // The mask for the fraction bits.
+ static const Bits kFractionBitMask = ~static_cast<Bits>(0) >>
+ (kExponentBitCount + 1);
+
+ // The mask for the exponent bits.
+ static const Bits kExponentBitMask = ~(kSignBitMask | kFractionBitMask);
+
+ // How many ULP's (Units in the Last Place) we want to tolerate when
+ // comparing two numbers. The larger the value, the more error we
+ // allow. A 0 value means that two numbers must be exactly the same
+ // to be considered equal.
+ //
+ // The maximum error of a single floating-point operation is 0.5
+ // units in the last place. On Intel CPU's, all floating-point
+ // calculations are done with 80-bit precision, while double has 64
+ // bits. Therefore, 4 should be enough for ordinary use.
+ //
+ // See the following article for more details on ULP:
+ // http://randomascii.wordpress.com/2012/02/25/comparing-floating-point-numbers-2012-edition/
+ static const uint32_t kMaxUlps = 4;
+
+ // Constructs a FloatingPoint from a raw floating-point number.
+ //
+ // On an Intel CPU, passing a non-normalized NAN (Not a Number)
+ // around may change its bits, although the new value is guaranteed
+ // to be also a NAN. Therefore, don't expect this constructor to
+ // preserve the bits in x when x is a NAN.
+ explicit FloatingPoint(const RawType& x) { u_.value_ = x; }
+
+ // Static methods
+
+ // Reinterprets a bit pattern as a floating-point number.
+ //
+ // This function is needed to test the AlmostEquals() method.
+ static RawType ReinterpretBits(const Bits bits) {
+ FloatingPoint fp(0);
+ fp.u_.bits_ = bits;
+ return fp.u_.value_;
+ }
+
+ // Returns the floating-point number that represent positive infinity.
+ static RawType Infinity() { return ReinterpretBits(kExponentBitMask); }
+
+ // Returns the maximum representable finite floating-point number.
+ static RawType Max();
+
+ // Non-static methods
+
+ // Returns the bits that represents this number.
+ const Bits& bits() const { return u_.bits_; }
+
+ // Returns the exponent bits of this number.
+ Bits exponent_bits() const { return kExponentBitMask & u_.bits_; }
+
+ // Returns the fraction bits of this number.
+ Bits fraction_bits() const { return kFractionBitMask & u_.bits_; }
+
+ // Returns the sign bit of this number.
+ Bits sign_bit() const { return kSignBitMask & u_.bits_; }
+
+ // Returns true if and only if this is NAN (not a number).
+ bool is_nan() const {
+ // It's a NAN if the exponent bits are all ones and the fraction
+ // bits are not entirely zeros.
+ return (exponent_bits() == kExponentBitMask) && (fraction_bits() != 0);
+ }
+
+ // Returns true if and only if this number is at most kMaxUlps ULP's away
+ // from rhs. In particular, this function:
+ //
+ // - returns false if either number is (or both are) NAN.
+ // - treats really large numbers as almost equal to infinity.
+ // - thinks +0.0 and -0.0 are 0 DLP's apart.
+ bool AlmostEquals(const FloatingPoint& rhs) const {
+ // The IEEE standard says that any comparison operation involving
+ // a NAN must return false.
+ if (is_nan() || rhs.is_nan()) return false;
+
+ return DistanceBetweenSignAndMagnitudeNumbers(u_.bits_, rhs.u_.bits_) <=
+ kMaxUlps;
+ }
+
+ private:
+ // The data type used to store the actual floating-point number.
+ union FloatingPointUnion {
+ RawType value_; // The raw floating-point number.
+ Bits bits_; // The bits that represent the number.
+ };
+
+ // Converts an integer from the sign-and-magnitude representation to
+ // the biased representation. More precisely, let N be 2 to the
+ // power of (kBitCount - 1), an integer x is represented by the
+ // unsigned number x + N.
+ //
+ // For instance,
+ //
+ // -N + 1 (the most negative number representable using
+ // sign-and-magnitude) is represented by 1;
+ // 0 is represented by N; and
+ // N - 1 (the biggest number representable using
+ // sign-and-magnitude) is represented by 2N - 1.
+ //
+ // Read http://en.wikipedia.org/wiki/Signed_number_representations
+ // for more details on signed number representations.
+ static Bits SignAndMagnitudeToBiased(const Bits& sam) {
+ if (kSignBitMask & sam) {
+ // sam represents a negative number.
+ return ~sam + 1;
+ } else {
+ // sam represents a positive number.
+ return kSignBitMask | sam;
+ }
+ }
+
+ // Given two numbers in the sign-and-magnitude representation,
+ // returns the distance between them as an unsigned number.
+ static Bits DistanceBetweenSignAndMagnitudeNumbers(const Bits& sam1,
+ const Bits& sam2) {
+ const Bits biased1 = SignAndMagnitudeToBiased(sam1);
+ const Bits biased2 = SignAndMagnitudeToBiased(sam2);
+ return (biased1 >= biased2) ? (biased1 - biased2) : (biased2 - biased1);
+ }
+
+ FloatingPointUnion u_;
+};
+
+// We cannot use std::numeric_limits<T>::max() as it clashes with the max()
+// macro defined by <windows.h>.
+template <>
+inline float FloatingPoint<float>::Max() {
+ return FLT_MAX;
+}
+template <>
+inline double FloatingPoint<double>::Max() {
+ return DBL_MAX;
+}
+
+// Typedefs the instances of the FloatingPoint template class that we
+// care to use.
+typedef FloatingPoint<float> Float;
+typedef FloatingPoint<double> Double;
+
+// In order to catch the mistake of putting tests that use different
+// test fixture classes in the same test suite, we need to assign
+// unique IDs to fixture classes and compare them. The TypeId type is
+// used to hold such IDs. The user should treat TypeId as an opaque
+// type: the only operation allowed on TypeId values is to compare
+// them for equality using the == operator.
+typedef const void* TypeId;
+
+template <typename T>
+class TypeIdHelper {
+ public:
+ // dummy_ must not have a const type. Otherwise an overly eager
+ // compiler (e.g. MSVC 7.1 & 8.0) may try to merge
+ // TypeIdHelper<T>::dummy_ for different Ts as an "optimization".
+ static bool dummy_;
+};
+
+template <typename T>
+bool TypeIdHelper<T>::dummy_ = false;
+
+// GetTypeId<T>() returns the ID of type T. Different values will be
+// returned for different types. Calling the function twice with the
+// same type argument is guaranteed to return the same ID.
+template <typename T>
+TypeId GetTypeId() {
+ // The compiler is required to allocate a different
+ // TypeIdHelper<T>::dummy_ variable for each T used to instantiate
+ // the template. Therefore, the address of dummy_ is guaranteed to
+ // be unique.
+ return &(TypeIdHelper<T>::dummy_);
+}
+
+// Returns the type ID of ::testing::Test. Always call this instead
+// of GetTypeId< ::testing::Test>() to get the type ID of
+// ::testing::Test, as the latter may give the wrong result due to a
+// suspected linker bug when compiling Google Test as a Mac OS X
+// framework.
+GTEST_API_ TypeId GetTestTypeId();
+
+// Defines the abstract factory interface that creates instances
+// of a Test object.
+class TestFactoryBase {
+ public:
+ virtual ~TestFactoryBase() {}
+
+ // Creates a test instance to run. The instance is both created and destroyed
+ // within TestInfoImpl::Run()
+ virtual Test* CreateTest() = 0;
+
+ protected:
+ TestFactoryBase() {}
+
+ private:
+ TestFactoryBase(const TestFactoryBase&) = delete;
+ TestFactoryBase& operator=(const TestFactoryBase&) = delete;
+};
+
+// This class provides implementation of TeastFactoryBase interface.
+// It is used in TEST and TEST_F macros.
+template <class TestClass>
+class TestFactoryImpl : public TestFactoryBase {
+ public:
+ Test* CreateTest() override { return new TestClass; }
+};
+
+#if GTEST_OS_WINDOWS
+
+// Predicate-formatters for implementing the HRESULT checking macros
+// {ASSERT|EXPECT}_HRESULT_{SUCCEEDED|FAILED}
+// We pass a long instead of HRESULT to avoid causing an
+// include dependency for the HRESULT type.
+GTEST_API_ AssertionResult IsHRESULTSuccess(const char* expr,
+ long hr); // NOLINT
+GTEST_API_ AssertionResult IsHRESULTFailure(const char* expr,
+ long hr); // NOLINT
+
+#endif // GTEST_OS_WINDOWS
+
+// Types of SetUpTestSuite() and TearDownTestSuite() functions.
+using SetUpTestSuiteFunc = void (*)();
+using TearDownTestSuiteFunc = void (*)();
+
+struct CodeLocation {
+ CodeLocation(const std::string& a_file, int a_line)
+ : file(a_file), line(a_line) {}
+
+ std::string file;
+ int line;
+};
+
+// Helper to identify which setup function for TestCase / TestSuite to call.
+// Only one function is allowed, either TestCase or TestSute but not both.
+
+// Utility functions to help SuiteApiResolver
+using SetUpTearDownSuiteFuncType = void (*)();
+
+inline SetUpTearDownSuiteFuncType GetNotDefaultOrNull(
+ SetUpTearDownSuiteFuncType a, SetUpTearDownSuiteFuncType def) {
+ return a == def ? nullptr : a;
+}
+
+template <typename T>
+// Note that SuiteApiResolver inherits from T because
+// SetUpTestSuite()/TearDownTestSuite() could be protected. This way
+// SuiteApiResolver can access them.
+struct SuiteApiResolver : T {
+ // testing::Test is only forward declared at this point. So we make it a
+ // dependent class for the compiler to be OK with it.
+ using Test =
+ typename std::conditional<sizeof(T) != 0, ::testing::Test, void>::type;
+
+ static SetUpTearDownSuiteFuncType GetSetUpCaseOrSuite(const char* filename,
+ int line_num) {
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+ SetUpTearDownSuiteFuncType test_case_fp =
+ GetNotDefaultOrNull(&T::SetUpTestCase, &Test::SetUpTestCase);
+ SetUpTearDownSuiteFuncType test_suite_fp =
+ GetNotDefaultOrNull(&T::SetUpTestSuite, &Test::SetUpTestSuite);
+
+ GTEST_CHECK_(!test_case_fp || !test_suite_fp)
+ << "Test can not provide both SetUpTestSuite and SetUpTestCase, please "
+ "make sure there is only one present at "
+ << filename << ":" << line_num;
+
+ return test_case_fp != nullptr ? test_case_fp : test_suite_fp;
+#else
+ (void)(filename);
+ (void)(line_num);
+ return &T::SetUpTestSuite;
+#endif
+ }
+
+ static SetUpTearDownSuiteFuncType GetTearDownCaseOrSuite(const char* filename,
+ int line_num) {
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+ SetUpTearDownSuiteFuncType test_case_fp =
+ GetNotDefaultOrNull(&T::TearDownTestCase, &Test::TearDownTestCase);
+ SetUpTearDownSuiteFuncType test_suite_fp =
+ GetNotDefaultOrNull(&T::TearDownTestSuite, &Test::TearDownTestSuite);
+
+ GTEST_CHECK_(!test_case_fp || !test_suite_fp)
+ << "Test can not provide both TearDownTestSuite and TearDownTestCase,"
+ " please make sure there is only one present at"
+ << filename << ":" << line_num;
+
+ return test_case_fp != nullptr ? test_case_fp : test_suite_fp;
+#else
+ (void)(filename);
+ (void)(line_num);
+ return &T::TearDownTestSuite;
+#endif
+ }
+};
+
+// Creates a new TestInfo object and registers it with Google Test;
+// returns the created object.
+//
+// Arguments:
+//
+// test_suite_name: name of the test suite
+// name: name of the test
+// type_param: the name of the test's type parameter, or NULL if
+// this is not a typed or a type-parameterized test.
+// value_param: text representation of the test's value parameter,
+// or NULL if this is not a type-parameterized test.
+// code_location: code location where the test is defined
+// fixture_class_id: ID of the test fixture class
+// set_up_tc: pointer to the function that sets up the test suite
+// tear_down_tc: pointer to the function that tears down the test suite
+// factory: pointer to the factory that creates a test object.
+// The newly created TestInfo instance will assume
+// ownership of the factory object.
+GTEST_API_ TestInfo* MakeAndRegisterTestInfo(
+ const char* test_suite_name, const char* name, const char* type_param,
+ const char* value_param, CodeLocation code_location,
+ TypeId fixture_class_id, SetUpTestSuiteFunc set_up_tc,
+ TearDownTestSuiteFunc tear_down_tc, TestFactoryBase* factory);
+
+// If *pstr starts with the given prefix, modifies *pstr to be right
+// past the prefix and returns true; otherwise leaves *pstr unchanged
+// and returns false. None of pstr, *pstr, and prefix can be NULL.
+GTEST_API_ bool SkipPrefix(const char* prefix, const char** pstr);
+
+GTEST_DISABLE_MSC_WARNINGS_PUSH_(4251 \
+/* class A needs to have dll-interface to be used by clients of class B */)
+
+// State of the definition of a type-parameterized test suite.
+class GTEST_API_ TypedTestSuitePState {
+ public:
+ TypedTestSuitePState() : registered_(false) {}
+
+ // Adds the given test name to defined_test_names_ and return true
+ // if the test suite hasn't been registered; otherwise aborts the
+ // program.
+ bool AddTestName(const char* file, int line, const char* case_name,
+ const char* test_name) {
+ if (registered_) {
+ fprintf(stderr,
+ "%s Test %s must be defined before "
+ "REGISTER_TYPED_TEST_SUITE_P(%s, ...).\n",
+ FormatFileLocation(file, line).c_str(), test_name, case_name);
+ fflush(stderr);
+ posix::Abort();
+ }
+ registered_tests_.insert(
+ ::std::make_pair(test_name, CodeLocation(file, line)));
+ return true;
+ }
+
+ bool TestExists(const std::string& test_name) const {
+ return registered_tests_.count(test_name) > 0;
+ }
+
+ const CodeLocation& GetCodeLocation(const std::string& test_name) const {
+ RegisteredTestsMap::const_iterator it = registered_tests_.find(test_name);
+ GTEST_CHECK_(it != registered_tests_.end());
+ return it->second;
+ }
+
+ // Verifies that registered_tests match the test names in
+ // defined_test_names_; returns registered_tests if successful, or
+ // aborts the program otherwise.
+ const char* VerifyRegisteredTestNames(const char* test_suite_name,
+ const char* file, int line,
+ const char* registered_tests);
+
+ private:
+ typedef ::std::map<std::string, CodeLocation> RegisteredTestsMap;
+
+ bool registered_;
+ RegisteredTestsMap registered_tests_;
+};
+
+// Legacy API is deprecated but still available
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+using TypedTestCasePState = TypedTestSuitePState;
+#endif // GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+
+GTEST_DISABLE_MSC_WARNINGS_POP_() // 4251
+
+// Skips to the first non-space char after the first comma in 'str';
+// returns NULL if no comma is found in 'str'.
+inline const char* SkipComma(const char* str) {
+ const char* comma = strchr(str, ',');
+ if (comma == nullptr) {
+ return nullptr;
+ }
+ while (IsSpace(*(++comma))) {
+ }
+ return comma;
+}
+
+// Returns the prefix of 'str' before the first comma in it; returns
+// the entire string if it contains no comma.
+inline std::string GetPrefixUntilComma(const char* str) {
+ const char* comma = strchr(str, ',');
+ return comma == nullptr ? str : std::string(str, comma);
+}
+
+// Splits a given string on a given delimiter, populating a given
+// vector with the fields.
+void SplitString(const ::std::string& str, char delimiter,
+ ::std::vector<::std::string>* dest);
+
+// The default argument to the template below for the case when the user does
+// not provide a name generator.
+struct DefaultNameGenerator {
+ template <typename T>
+ static std::string GetName(int i) {
+ return StreamableToString(i);
+ }
+};
+
+template <typename Provided = DefaultNameGenerator>
+struct NameGeneratorSelector {
+ typedef Provided type;
+};
+
+template <typename NameGenerator>
+void GenerateNamesRecursively(internal::None, std::vector<std::string>*, int) {}
+
+template <typename NameGenerator, typename Types>
+void GenerateNamesRecursively(Types, std::vector<std::string>* result, int i) {
+ result->push_back(NameGenerator::template GetName<typename Types::Head>(i));
+ GenerateNamesRecursively<NameGenerator>(typename Types::Tail(), result,
+ i + 1);
+}
+
+template <typename NameGenerator, typename Types>
+std::vector<std::string> GenerateNames() {
+ std::vector<std::string> result;
+ GenerateNamesRecursively<NameGenerator>(Types(), &result, 0);
+ return result;
+}
+
+// TypeParameterizedTest<Fixture, TestSel, Types>::Register()
+// registers a list of type-parameterized tests with Google Test. The
+// return value is insignificant - we just need to return something
+// such that we can call this function in a namespace scope.
+//
+// Implementation note: The GTEST_TEMPLATE_ macro declares a template
+// template parameter. It's defined in gtest-type-util.h.
+template <GTEST_TEMPLATE_ Fixture, class TestSel, typename Types>
+class TypeParameterizedTest {
+ public:
+ // 'index' is the index of the test in the type list 'Types'
+ // specified in INSTANTIATE_TYPED_TEST_SUITE_P(Prefix, TestSuite,
+ // Types). Valid values for 'index' are [0, N - 1] where N is the
+ // length of Types.
+ static bool Register(const char* prefix, const CodeLocation& code_location,
+ const char* case_name, const char* test_names, int index,
+ const std::vector<std::string>& type_names =
+ GenerateNames<DefaultNameGenerator, Types>()) {
+ typedef typename Types::Head Type;
+ typedef Fixture<Type> FixtureClass;
+ typedef typename GTEST_BIND_(TestSel, Type) TestClass;
+
+ // First, registers the first type-parameterized test in the type
+ // list.
+ MakeAndRegisterTestInfo(
+ (std::string(prefix) + (prefix[0] == '\0' ? "" : "/") + case_name +
+ "/" + type_names[static_cast<size_t>(index)])
+ .c_str(),
+ StripTrailingSpaces(GetPrefixUntilComma(test_names)).c_str(),
+ GetTypeName<Type>().c_str(),
+ nullptr, // No value parameter.
+ code_location, GetTypeId<FixtureClass>(),
+ SuiteApiResolver<TestClass>::GetSetUpCaseOrSuite(
+ code_location.file.c_str(), code_location.line),
+ SuiteApiResolver<TestClass>::GetTearDownCaseOrSuite(
+ code_location.file.c_str(), code_location.line),
+ new TestFactoryImpl<TestClass>);
+
+ // Next, recurses (at compile time) with the tail of the type list.
+ return TypeParameterizedTest<Fixture, TestSel,
+ typename Types::Tail>::Register(prefix,
+ code_location,
+ case_name,
+ test_names,
+ index + 1,
+ type_names);
+ }
+};
+
+// The base case for the compile time recursion.
+template <GTEST_TEMPLATE_ Fixture, class TestSel>
+class TypeParameterizedTest<Fixture, TestSel, internal::None> {
+ public:
+ static bool Register(const char* /*prefix*/, const CodeLocation&,
+ const char* /*case_name*/, const char* /*test_names*/,
+ int /*index*/,
+ const std::vector<std::string>& =
+ std::vector<std::string>() /*type_names*/) {
+ return true;
+ }
+};
+
+GTEST_API_ void RegisterTypeParameterizedTestSuite(const char* test_suite_name,
+ CodeLocation code_location);
+GTEST_API_ void RegisterTypeParameterizedTestSuiteInstantiation(
+ const char* case_name);
+
+// TypeParameterizedTestSuite<Fixture, Tests, Types>::Register()
+// registers *all combinations* of 'Tests' and 'Types' with Google
+// Test. The return value is insignificant - we just need to return
+// something such that we can call this function in a namespace scope.
+template <GTEST_TEMPLATE_ Fixture, typename Tests, typename Types>
+class TypeParameterizedTestSuite {
+ public:
+ static bool Register(const char* prefix, CodeLocation code_location,
+ const TypedTestSuitePState* state, const char* case_name,
+ const char* test_names,
+ const std::vector<std::string>& type_names =
+ GenerateNames<DefaultNameGenerator, Types>()) {
+ RegisterTypeParameterizedTestSuiteInstantiation(case_name);
+ std::string test_name =
+ StripTrailingSpaces(GetPrefixUntilComma(test_names));
+ if (!state->TestExists(test_name)) {
+ fprintf(stderr, "Failed to get code location for test %s.%s at %s.",
+ case_name, test_name.c_str(),
+ FormatFileLocation(code_location.file.c_str(), code_location.line)
+ .c_str());
+ fflush(stderr);
+ posix::Abort();
+ }
+ const CodeLocation& test_location = state->GetCodeLocation(test_name);
+
+ typedef typename Tests::Head Head;
+
+ // First, register the first test in 'Test' for each type in 'Types'.
+ TypeParameterizedTest<Fixture, Head, Types>::Register(
+ prefix, test_location, case_name, test_names, 0, type_names);
+
+ // Next, recurses (at compile time) with the tail of the test list.
+ return TypeParameterizedTestSuite<Fixture, typename Tests::Tail,
+ Types>::Register(prefix, code_location,
+ state, case_name,
+ SkipComma(test_names),
+ type_names);
+ }
+};
+
+// The base case for the compile time recursion.
+template <GTEST_TEMPLATE_ Fixture, typename Types>
+class TypeParameterizedTestSuite<Fixture, internal::None, Types> {
+ public:
+ static bool Register(const char* /*prefix*/, const CodeLocation&,
+ const TypedTestSuitePState* /*state*/,
+ const char* /*case_name*/, const char* /*test_names*/,
+ const std::vector<std::string>& =
+ std::vector<std::string>() /*type_names*/) {
+ return true;
+ }
+};
+
+// Returns the current OS stack trace as an std::string.
+//
+// The maximum number of stack frames to be included is specified by
+// the gtest_stack_trace_depth flag. The skip_count parameter
+// specifies the number of top frames to be skipped, which doesn't
+// count against the number of frames to be included.
+//
+// For example, if Foo() calls Bar(), which in turn calls
+// GetCurrentOsStackTraceExceptTop(..., 1), Foo() will be included in
+// the trace but Bar() and GetCurrentOsStackTraceExceptTop() won't.
+GTEST_API_ std::string GetCurrentOsStackTraceExceptTop(UnitTest* unit_test,
+ int skip_count);
+
+// Helpers for suppressing warnings on unreachable code or constant
+// condition.
+
+// Always returns true.
+GTEST_API_ bool AlwaysTrue();
+
+// Always returns false.
+inline bool AlwaysFalse() { return !AlwaysTrue(); }
+
+// Helper for suppressing false warning from Clang on a const char*
+// variable declared in a conditional expression always being NULL in
+// the else branch.
+struct GTEST_API_ ConstCharPtr {
+ ConstCharPtr(const char* str) : value(str) {}
+ operator bool() const { return true; }
+ const char* value;
+};
+
+// Helper for declaring std::string within 'if' statement
+// in pre C++17 build environment.
+struct TrueWithString {
+ TrueWithString() = default;
+ explicit TrueWithString(const char* str) : value(str) {}
+ explicit TrueWithString(const std::string& str) : value(str) {}
+ explicit operator bool() const { return true; }
+ std::string value;
+};
+
+// A simple Linear Congruential Generator for generating random
+// numbers with a uniform distribution. Unlike rand() and srand(), it
+// doesn't use global state (and therefore can't interfere with user
+// code). Unlike rand_r(), it's portable. An LCG isn't very random,
+// but it's good enough for our purposes.
+class GTEST_API_ Random {
+ public:
+ static const uint32_t kMaxRange = 1u << 31;
+
+ explicit Random(uint32_t seed) : state_(seed) {}
+
+ void Reseed(uint32_t seed) { state_ = seed; }
+
+ // Generates a random number from [0, range). Crashes if 'range' is
+ // 0 or greater than kMaxRange.
+ uint32_t Generate(uint32_t range);
+
+ private:
+ uint32_t state_;
+ Random(const Random&) = delete;
+ Random& operator=(const Random&) = delete;
+};
+
+// Turns const U&, U&, const U, and U all into U.
+#define GTEST_REMOVE_REFERENCE_AND_CONST_(T) \
+ typename std::remove_const<typename std::remove_reference<T>::type>::type
+
+// HasDebugStringAndShortDebugString<T>::value is a compile-time bool constant
+// that's true if and only if T has methods DebugString() and ShortDebugString()
+// that return std::string.
+template <typename T>
+class HasDebugStringAndShortDebugString {
+ private:
+ template <typename C>
+ static auto CheckDebugString(C*) -> typename std::is_same<
+ std::string, decltype(std::declval<const C>().DebugString())>::type;
+ template <typename>
+ static std::false_type CheckDebugString(...);
+
+ template <typename C>
+ static auto CheckShortDebugString(C*) -> typename std::is_same<
+ std::string, decltype(std::declval<const C>().ShortDebugString())>::type;
+ template <typename>
+ static std::false_type CheckShortDebugString(...);
+
+ using HasDebugStringType = decltype(CheckDebugString<T>(nullptr));
+ using HasShortDebugStringType = decltype(CheckShortDebugString<T>(nullptr));
+
+ public:
+ static constexpr bool value =
+ HasDebugStringType::value && HasShortDebugStringType::value;
+};
+
+template <typename T>
+constexpr bool HasDebugStringAndShortDebugString<T>::value;
+
+// When the compiler sees expression IsContainerTest<C>(0), if C is an
+// STL-style container class, the first overload of IsContainerTest
+// will be viable (since both C::iterator* and C::const_iterator* are
+// valid types and NULL can be implicitly converted to them). It will
+// be picked over the second overload as 'int' is a perfect match for
+// the type of argument 0. If C::iterator or C::const_iterator is not
+// a valid type, the first overload is not viable, and the second
+// overload will be picked. Therefore, we can determine whether C is
+// a container class by checking the type of IsContainerTest<C>(0).
+// The value of the expression is insignificant.
+//
+// In C++11 mode we check the existence of a const_iterator and that an
+// iterator is properly implemented for the container.
+//
+// For pre-C++11 that we look for both C::iterator and C::const_iterator.
+// The reason is that C++ injects the name of a class as a member of the
+// class itself (e.g. you can refer to class iterator as either
+// 'iterator' or 'iterator::iterator'). If we look for C::iterator
+// only, for example, we would mistakenly think that a class named
+// iterator is an STL container.
+//
+// Also note that the simpler approach of overloading
+// IsContainerTest(typename C::const_iterator*) and
+// IsContainerTest(...) doesn't work with Visual Age C++ and Sun C++.
+typedef int IsContainer;
+template <class C,
+ class Iterator = decltype(::std::declval<const C&>().begin()),
+ class = decltype(::std::declval<const C&>().end()),
+ class = decltype(++::std::declval<Iterator&>()),
+ class = decltype(*::std::declval<Iterator>()),
+ class = typename C::const_iterator>
+IsContainer IsContainerTest(int /* dummy */) {
+ return 0;
+}
+
+typedef char IsNotContainer;
+template <class C>
+IsNotContainer IsContainerTest(long /* dummy */) {
+ return '\0';
+}
+
+// Trait to detect whether a type T is a hash table.
+// The heuristic used is that the type contains an inner type `hasher` and does
+// not contain an inner type `reverse_iterator`.
+// If the container is iterable in reverse, then order might actually matter.
+template <typename T>
+struct IsHashTable {
+ private:
+ template <typename U>
+ static char test(typename U::hasher*, typename U::reverse_iterator*);
+ template <typename U>
+ static int test(typename U::hasher*, ...);
+ template <typename U>
+ static char test(...);
+
+ public:
+ static const bool value = sizeof(test<T>(nullptr, nullptr)) == sizeof(int);
+};
+
+template <typename T>
+const bool IsHashTable<T>::value;
+
+template <typename C,
+ bool = sizeof(IsContainerTest<C>(0)) == sizeof(IsContainer)>
+struct IsRecursiveContainerImpl;
+
+template <typename C>
+struct IsRecursiveContainerImpl<C, false> : public std::false_type {};
+
+// Since the IsRecursiveContainerImpl depends on the IsContainerTest we need to
+// obey the same inconsistencies as the IsContainerTest, namely check if
+// something is a container is relying on only const_iterator in C++11 and
+// is relying on both const_iterator and iterator otherwise
+template <typename C>
+struct IsRecursiveContainerImpl<C, true> {
+ using value_type = decltype(*std::declval<typename C::const_iterator>());
+ using type =
+ std::is_same<typename std::remove_const<
+ typename std::remove_reference<value_type>::type>::type,
+ C>;
+};
+
+// IsRecursiveContainer<Type> is a unary compile-time predicate that
+// evaluates whether C is a recursive container type. A recursive container
+// type is a container type whose value_type is equal to the container type
+// itself. An example for a recursive container type is
+// boost::filesystem::path, whose iterator has a value_type that is equal to
+// boost::filesystem::path.
+template <typename C>
+struct IsRecursiveContainer : public IsRecursiveContainerImpl<C>::type {};
+
+// Utilities for native arrays.
+
+// ArrayEq() compares two k-dimensional native arrays using the
+// elements' operator==, where k can be any integer >= 0. When k is
+// 0, ArrayEq() degenerates into comparing a single pair of values.
+
+template <typename T, typename U>
+bool ArrayEq(const T* lhs, size_t size, const U* rhs);
+
+// This generic version is used when k is 0.
+template <typename T, typename U>
+inline bool ArrayEq(const T& lhs, const U& rhs) {
+ return lhs == rhs;
+}
+
+// This overload is used when k >= 1.
+template <typename T, typename U, size_t N>
+inline bool ArrayEq(const T (&lhs)[N], const U (&rhs)[N]) {
+ return internal::ArrayEq(lhs, N, rhs);
+}
+
+// This helper reduces code bloat. If we instead put its logic inside
+// the previous ArrayEq() function, arrays with different sizes would
+// lead to different copies of the template code.
+template <typename T, typename U>
+bool ArrayEq(const T* lhs, size_t size, const U* rhs) {
+ for (size_t i = 0; i != size; i++) {
+ if (!internal::ArrayEq(lhs[i], rhs[i])) return false;
+ }
+ return true;
+}
+
+// Finds the first element in the iterator range [begin, end) that
+// equals elem. Element may be a native array type itself.
+template <typename Iter, typename Element>
+Iter ArrayAwareFind(Iter begin, Iter end, const Element& elem) {
+ for (Iter it = begin; it != end; ++it) {
+ if (internal::ArrayEq(*it, elem)) return it;
+ }
+ return end;
+}
+
+// CopyArray() copies a k-dimensional native array using the elements'
+// operator=, where k can be any integer >= 0. When k is 0,
+// CopyArray() degenerates into copying a single value.
+
+template <typename T, typename U>
+void CopyArray(const T* from, size_t size, U* to);
+
+// This generic version is used when k is 0.
+template <typename T, typename U>
+inline void CopyArray(const T& from, U* to) {
+ *to = from;
+}
+
+// This overload is used when k >= 1.
+template <typename T, typename U, size_t N>
+inline void CopyArray(const T (&from)[N], U (*to)[N]) {
+ internal::CopyArray(from, N, *to);
+}
+
+// This helper reduces code bloat. If we instead put its logic inside
+// the previous CopyArray() function, arrays with different sizes
+// would lead to different copies of the template code.
+template <typename T, typename U>
+void CopyArray(const T* from, size_t size, U* to) {
+ for (size_t i = 0; i != size; i++) {
+ internal::CopyArray(from[i], to + i);
+ }
+}
+
+// The relation between an NativeArray object (see below) and the
+// native array it represents.
+// We use 2 different structs to allow non-copyable types to be used, as long
+// as RelationToSourceReference() is passed.
+struct RelationToSourceReference {};
+struct RelationToSourceCopy {};
+
+// Adapts a native array to a read-only STL-style container. Instead
+// of the complete STL container concept, this adaptor only implements
+// members useful for Google Mock's container matchers. New members
+// should be added as needed. To simplify the implementation, we only
+// support Element being a raw type (i.e. having no top-level const or
+// reference modifier). It's the client's responsibility to satisfy
+// this requirement. Element can be an array type itself (hence
+// multi-dimensional arrays are supported).
+template <typename Element>
+class NativeArray {
+ public:
+ // STL-style container typedefs.
+ typedef Element value_type;
+ typedef Element* iterator;
+ typedef const Element* const_iterator;
+
+ // Constructs from a native array. References the source.
+ NativeArray(const Element* array, size_t count, RelationToSourceReference) {
+ InitRef(array, count);
+ }
+
+ // Constructs from a native array. Copies the source.
+ NativeArray(const Element* array, size_t count, RelationToSourceCopy) {
+ InitCopy(array, count);
+ }
+
+ // Copy constructor.
+ NativeArray(const NativeArray& rhs) {
+ (this->*rhs.clone_)(rhs.array_, rhs.size_);
+ }
+
+ ~NativeArray() {
+ if (clone_ != &NativeArray::InitRef) delete[] array_;
+ }
+
+ // STL-style container methods.
+ size_t size() const { return size_; }
+ const_iterator begin() const { return array_; }
+ const_iterator end() const { return array_ + size_; }
+ bool operator==(const NativeArray& rhs) const {
+ return size() == rhs.size() && ArrayEq(begin(), size(), rhs.begin());
+ }
+
+ private:
+ static_assert(!std::is_const<Element>::value, "Type must not be const");
+ static_assert(!std::is_reference<Element>::value,
+ "Type must not be a reference");
+
+ // Initializes this object with a copy of the input.
+ void InitCopy(const Element* array, size_t a_size) {
+ Element* const copy = new Element[a_size];
+ CopyArray(array, a_size, copy);
+ array_ = copy;
+ size_ = a_size;
+ clone_ = &NativeArray::InitCopy;
+ }
+
+ // Initializes this object with a reference of the input.
+ void InitRef(const Element* array, size_t a_size) {
+ array_ = array;
+ size_ = a_size;
+ clone_ = &NativeArray::InitRef;
+ }
+
+ const Element* array_;
+ size_t size_;
+ void (NativeArray::*clone_)(const Element*, size_t);
+};
+
+// Backport of std::index_sequence.
+template <size_t... Is>
+struct IndexSequence {
+ using type = IndexSequence;
+};
+
+// Double the IndexSequence, and one if plus_one is true.
+template <bool plus_one, typename T, size_t sizeofT>
+struct DoubleSequence;
+template <size_t... I, size_t sizeofT>
+struct DoubleSequence<true, IndexSequence<I...>, sizeofT> {
+ using type = IndexSequence<I..., (sizeofT + I)..., 2 * sizeofT>;
+};
+template <size_t... I, size_t sizeofT>
+struct DoubleSequence<false, IndexSequence<I...>, sizeofT> {
+ using type = IndexSequence<I..., (sizeofT + I)...>;
+};
+
+// Backport of std::make_index_sequence.
+// It uses O(ln(N)) instantiation depth.
+template <size_t N>
+struct MakeIndexSequenceImpl
+ : DoubleSequence<N % 2 == 1, typename MakeIndexSequenceImpl<N / 2>::type,
+ N / 2>::type {};
+
+template <>
+struct MakeIndexSequenceImpl<0> : IndexSequence<> {};
+
+template <size_t N>
+using MakeIndexSequence = typename MakeIndexSequenceImpl<N>::type;
+
+template <typename... T>
+using IndexSequenceFor = typename MakeIndexSequence<sizeof...(T)>::type;
+
+template <size_t>
+struct Ignore {
+ Ignore(...); // NOLINT
+};
+
+template <typename>
+struct ElemFromListImpl;
+template <size_t... I>
+struct ElemFromListImpl<IndexSequence<I...>> {
+ // We make Ignore a template to solve a problem with MSVC.
+ // A non-template Ignore would work fine with `decltype(Ignore(I))...`, but
+ // MSVC doesn't understand how to deal with that pack expansion.
+ // Use `0 * I` to have a single instantiation of Ignore.
+ template <typename R>
+ static R Apply(Ignore<0 * I>..., R (*)(), ...);
+};
+
+template <size_t N, typename... T>
+struct ElemFromList {
+ using type =
+ decltype(ElemFromListImpl<typename MakeIndexSequence<N>::type>::Apply(
+ static_cast<T (*)()>(nullptr)...));
+};
+
+struct FlatTupleConstructTag {};
+
+template <typename... T>
+class FlatTuple;
+
+template <typename Derived, size_t I>
+struct FlatTupleElemBase;
+
+template <typename... T, size_t I>
+struct FlatTupleElemBase<FlatTuple<T...>, I> {
+ using value_type = typename ElemFromList<I, T...>::type;
+ FlatTupleElemBase() = default;
+ template <typename Arg>
+ explicit FlatTupleElemBase(FlatTupleConstructTag, Arg&& t)
+ : value(std::forward<Arg>(t)) {}
+ value_type value;
+};
+
+template <typename Derived, typename Idx>
+struct FlatTupleBase;
+
+template <size_t... Idx, typename... T>
+struct FlatTupleBase<FlatTuple<T...>, IndexSequence<Idx...>>
+ : FlatTupleElemBase<FlatTuple<T...>, Idx>... {
+ using Indices = IndexSequence<Idx...>;
+ FlatTupleBase() = default;
+ template <typename... Args>
+ explicit FlatTupleBase(FlatTupleConstructTag, Args&&... args)
+ : FlatTupleElemBase<FlatTuple<T...>, Idx>(FlatTupleConstructTag{},
+ std::forward<Args>(args))... {}
+
+ template <size_t I>
+ const typename ElemFromList<I, T...>::type& Get() const {
+ return FlatTupleElemBase<FlatTuple<T...>, I>::value;
+ }
+
+ template <size_t I>
+ typename ElemFromList<I, T...>::type& Get() {
+ return FlatTupleElemBase<FlatTuple<T...>, I>::value;
+ }
+
+ template <typename F>
+ auto Apply(F&& f) -> decltype(std::forward<F>(f)(this->Get<Idx>()...)) {
+ return std::forward<F>(f)(Get<Idx>()...);
+ }
+
+ template <typename F>
+ auto Apply(F&& f) const -> decltype(std::forward<F>(f)(this->Get<Idx>()...)) {
+ return std::forward<F>(f)(Get<Idx>()...);
+ }
+};
+
+// Analog to std::tuple but with different tradeoffs.
+// This class minimizes the template instantiation depth, thus allowing more
+// elements than std::tuple would. std::tuple has been seen to require an
+// instantiation depth of more than 10x the number of elements in some
+// implementations.
+// FlatTuple and ElemFromList are not recursive and have a fixed depth
+// regardless of T...
+// MakeIndexSequence, on the other hand, it is recursive but with an
+// instantiation depth of O(ln(N)).
+template <typename... T>
+class FlatTuple
+ : private FlatTupleBase<FlatTuple<T...>,
+ typename MakeIndexSequence<sizeof...(T)>::type> {
+ using Indices = typename FlatTupleBase<
+ FlatTuple<T...>, typename MakeIndexSequence<sizeof...(T)>::type>::Indices;
+
+ public:
+ FlatTuple() = default;
+ template <typename... Args>
+ explicit FlatTuple(FlatTupleConstructTag tag, Args&&... args)
+ : FlatTuple::FlatTupleBase(tag, std::forward<Args>(args)...) {}
+
+ using FlatTuple::FlatTupleBase::Apply;
+ using FlatTuple::FlatTupleBase::Get;
+};
+
+// Utility functions to be called with static_assert to induce deprecation
+// warnings.
+GTEST_INTERNAL_DEPRECATED(
+ "INSTANTIATE_TEST_CASE_P is deprecated, please use "
+ "INSTANTIATE_TEST_SUITE_P")
+constexpr bool InstantiateTestCase_P_IsDeprecated() { return true; }
+
+GTEST_INTERNAL_DEPRECATED(
+ "TYPED_TEST_CASE_P is deprecated, please use "
+ "TYPED_TEST_SUITE_P")
+constexpr bool TypedTestCase_P_IsDeprecated() { return true; }
+
+GTEST_INTERNAL_DEPRECATED(
+ "TYPED_TEST_CASE is deprecated, please use "
+ "TYPED_TEST_SUITE")
+constexpr bool TypedTestCaseIsDeprecated() { return true; }
+
+GTEST_INTERNAL_DEPRECATED(
+ "REGISTER_TYPED_TEST_CASE_P is deprecated, please use "
+ "REGISTER_TYPED_TEST_SUITE_P")
+constexpr bool RegisterTypedTestCase_P_IsDeprecated() { return true; }
+
+GTEST_INTERNAL_DEPRECATED(
+ "INSTANTIATE_TYPED_TEST_CASE_P is deprecated, please use "
+ "INSTANTIATE_TYPED_TEST_SUITE_P")
+constexpr bool InstantiateTypedTestCase_P_IsDeprecated() { return true; }
+
+} // namespace internal
+} // namespace testing
+
+namespace std {
+// Some standard library implementations use `struct tuple_size` and some use
+// `class tuple_size`. Clang warns about the mismatch.
+// https://reviews.llvm.org/D55466
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wmismatched-tags"
+#endif
+template <typename... Ts>
+struct tuple_size<testing::internal::FlatTuple<Ts...>>
+ : std::integral_constant<size_t, sizeof...(Ts)> {};
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif
+} // namespace std
+
+#define GTEST_MESSAGE_AT_(file, line, message, result_type) \
+ ::testing::internal::AssertHelper(result_type, file, line, message) = \
+ ::testing::Message()
+
+#define GTEST_MESSAGE_(message, result_type) \
+ GTEST_MESSAGE_AT_(__FILE__, __LINE__, message, result_type)
+
+#define GTEST_FATAL_FAILURE_(message) \
+ return GTEST_MESSAGE_(message, ::testing::TestPartResult::kFatalFailure)
+
+#define GTEST_NONFATAL_FAILURE_(message) \
+ GTEST_MESSAGE_(message, ::testing::TestPartResult::kNonFatalFailure)
+
+#define GTEST_SUCCESS_(message) \
+ GTEST_MESSAGE_(message, ::testing::TestPartResult::kSuccess)
+
+#define GTEST_SKIP_(message) \
+ return GTEST_MESSAGE_(message, ::testing::TestPartResult::kSkip)
+
+// Suppress MSVC warning 4072 (unreachable code) for the code following
+// statement if it returns or throws (or doesn't return or throw in some
+// situations).
+// NOTE: The "else" is important to keep this expansion to prevent a top-level
+// "else" from attaching to our "if".
+#define GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement) \
+ if (::testing::internal::AlwaysTrue()) { \
+ statement; \
+ } else /* NOLINT */ \
+ static_assert(true, "") // User must have a semicolon after expansion.
+
+#if GTEST_HAS_EXCEPTIONS
+
+namespace testing {
+namespace internal {
+
+class NeverThrown {
+ public:
+ const char* what() const noexcept {
+ return "this exception should never be thrown";
+ }
+};
+
+} // namespace internal
+} // namespace testing
+
+#if GTEST_HAS_RTTI
+
+#define GTEST_EXCEPTION_TYPE_(e) ::testing::internal::GetTypeName(typeid(e))
+
+#else // GTEST_HAS_RTTI
+
+#define GTEST_EXCEPTION_TYPE_(e) \
+ std::string { "an std::exception-derived error" }
+
+#endif // GTEST_HAS_RTTI
+
+#define GTEST_TEST_THROW_CATCH_STD_EXCEPTION_(statement, expected_exception) \
+ catch (typename std::conditional< \
+ std::is_same<typename std::remove_cv<typename std::remove_reference< \
+ expected_exception>::type>::type, \
+ std::exception>::value, \
+ const ::testing::internal::NeverThrown&, const std::exception&>::type \
+ e) { \
+ gtest_msg.value = "Expected: " #statement \
+ " throws an exception of type " #expected_exception \
+ ".\n Actual: it throws "; \
+ gtest_msg.value += GTEST_EXCEPTION_TYPE_(e); \
+ gtest_msg.value += " with description \""; \
+ gtest_msg.value += e.what(); \
+ gtest_msg.value += "\"."; \
+ goto GTEST_CONCAT_TOKEN_(gtest_label_testthrow_, __LINE__); \
+ }
+
+#else // GTEST_HAS_EXCEPTIONS
+
+#define GTEST_TEST_THROW_CATCH_STD_EXCEPTION_(statement, expected_exception)
+
+#endif // GTEST_HAS_EXCEPTIONS
+
+#define GTEST_TEST_THROW_(statement, expected_exception, fail) \
+ GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
+ if (::testing::internal::TrueWithString gtest_msg{}) { \
+ bool gtest_caught_expected = false; \
+ try { \
+ GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \
+ } catch (expected_exception const&) { \
+ gtest_caught_expected = true; \
+ } \
+ GTEST_TEST_THROW_CATCH_STD_EXCEPTION_(statement, expected_exception) \
+ catch (...) { \
+ gtest_msg.value = "Expected: " #statement \
+ " throws an exception of type " #expected_exception \
+ ".\n Actual: it throws a different type."; \
+ goto GTEST_CONCAT_TOKEN_(gtest_label_testthrow_, __LINE__); \
+ } \
+ if (!gtest_caught_expected) { \
+ gtest_msg.value = "Expected: " #statement \
+ " throws an exception of type " #expected_exception \
+ ".\n Actual: it throws nothing."; \
+ goto GTEST_CONCAT_TOKEN_(gtest_label_testthrow_, __LINE__); \
+ } \
+ } else /*NOLINT*/ \
+ GTEST_CONCAT_TOKEN_(gtest_label_testthrow_, __LINE__) \
+ : fail(gtest_msg.value.c_str())
+
+#if GTEST_HAS_EXCEPTIONS
+
+#define GTEST_TEST_NO_THROW_CATCH_STD_EXCEPTION_() \
+ catch (std::exception const& e) { \
+ gtest_msg.value = "it throws "; \
+ gtest_msg.value += GTEST_EXCEPTION_TYPE_(e); \
+ gtest_msg.value += " with description \""; \
+ gtest_msg.value += e.what(); \
+ gtest_msg.value += "\"."; \
+ goto GTEST_CONCAT_TOKEN_(gtest_label_testnothrow_, __LINE__); \
+ }
+
+#else // GTEST_HAS_EXCEPTIONS
+
+#define GTEST_TEST_NO_THROW_CATCH_STD_EXCEPTION_()
+
+#endif // GTEST_HAS_EXCEPTIONS
+
+#define GTEST_TEST_NO_THROW_(statement, fail) \
+ GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
+ if (::testing::internal::TrueWithString gtest_msg{}) { \
+ try { \
+ GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \
+ } \
+ GTEST_TEST_NO_THROW_CATCH_STD_EXCEPTION_() \
+ catch (...) { \
+ gtest_msg.value = "it throws."; \
+ goto GTEST_CONCAT_TOKEN_(gtest_label_testnothrow_, __LINE__); \
+ } \
+ } else \
+ GTEST_CONCAT_TOKEN_(gtest_label_testnothrow_, __LINE__) \
+ : fail(("Expected: " #statement " doesn't throw an exception.\n" \
+ " Actual: " + \
+ gtest_msg.value) \
+ .c_str())
+
+#define GTEST_TEST_ANY_THROW_(statement, fail) \
+ GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
+ if (::testing::internal::AlwaysTrue()) { \
+ bool gtest_caught_any = false; \
+ try { \
+ GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \
+ } catch (...) { \
+ gtest_caught_any = true; \
+ } \
+ if (!gtest_caught_any) { \
+ goto GTEST_CONCAT_TOKEN_(gtest_label_testanythrow_, __LINE__); \
+ } \
+ } else \
+ GTEST_CONCAT_TOKEN_(gtest_label_testanythrow_, __LINE__) \
+ : fail("Expected: " #statement \
+ " throws an exception.\n" \
+ " Actual: it doesn't.")
+
+// Implements Boolean test assertions such as EXPECT_TRUE. expression can be
+// either a boolean expression or an AssertionResult. text is a textual
+// representation of expression as it was passed into the EXPECT_TRUE.
+#define GTEST_TEST_BOOLEAN_(expression, text, actual, expected, fail) \
+ GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
+ if (const ::testing::AssertionResult gtest_ar_ = \
+ ::testing::AssertionResult(expression)) \
+ ; \
+ else \
+ fail(::testing::internal::GetBoolAssertionFailureMessage( \
+ gtest_ar_, text, #actual, #expected) \
+ .c_str())
+
+#define GTEST_TEST_NO_FATAL_FAILURE_(statement, fail) \
+ GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
+ if (::testing::internal::AlwaysTrue()) { \
+ ::testing::internal::HasNewFatalFailureHelper gtest_fatal_failure_checker; \
+ GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \
+ if (gtest_fatal_failure_checker.has_new_fatal_failure()) { \
+ goto GTEST_CONCAT_TOKEN_(gtest_label_testnofatal_, __LINE__); \
+ } \
+ } else \
+ GTEST_CONCAT_TOKEN_(gtest_label_testnofatal_, __LINE__) \
+ : fail("Expected: " #statement \
+ " doesn't generate new fatal " \
+ "failures in the current thread.\n" \
+ " Actual: it does.")
+
+// Expands to the name of the class that implements the given test.
+#define GTEST_TEST_CLASS_NAME_(test_suite_name, test_name) \
+ test_suite_name##_##test_name##_Test
+
+// Helper macro for defining tests.
+#define GTEST_TEST_(test_suite_name, test_name, parent_class, parent_id) \
+ static_assert(sizeof(GTEST_STRINGIFY_(test_suite_name)) > 1, \
+ "test_suite_name must not be empty"); \
+ static_assert(sizeof(GTEST_STRINGIFY_(test_name)) > 1, \
+ "test_name must not be empty"); \
+ class GTEST_TEST_CLASS_NAME_(test_suite_name, test_name) \
+ : public parent_class { \
+ public: \
+ GTEST_TEST_CLASS_NAME_(test_suite_name, test_name)() = default; \
+ ~GTEST_TEST_CLASS_NAME_(test_suite_name, test_name)() override = default; \
+ GTEST_TEST_CLASS_NAME_(test_suite_name, test_name) \
+ (const GTEST_TEST_CLASS_NAME_(test_suite_name, test_name) &) = delete; \
+ GTEST_TEST_CLASS_NAME_(test_suite_name, test_name) & operator=( \
+ const GTEST_TEST_CLASS_NAME_(test_suite_name, \
+ test_name) &) = delete; /* NOLINT */ \
+ GTEST_TEST_CLASS_NAME_(test_suite_name, test_name) \
+ (GTEST_TEST_CLASS_NAME_(test_suite_name, test_name) &&) noexcept = delete; \
+ GTEST_TEST_CLASS_NAME_(test_suite_name, test_name) & operator=( \
+ GTEST_TEST_CLASS_NAME_(test_suite_name, \
+ test_name) &&) noexcept = delete; /* NOLINT */ \
+ \
+ private: \
+ void TestBody() override; \
+ static ::testing::TestInfo* const test_info_ GTEST_ATTRIBUTE_UNUSED_; \
+ }; \
+ \
+ ::testing::TestInfo* const GTEST_TEST_CLASS_NAME_(test_suite_name, \
+ test_name)::test_info_ = \
+ ::testing::internal::MakeAndRegisterTestInfo( \
+ #test_suite_name, #test_name, nullptr, nullptr, \
+ ::testing::internal::CodeLocation(__FILE__, __LINE__), (parent_id), \
+ ::testing::internal::SuiteApiResolver< \
+ parent_class>::GetSetUpCaseOrSuite(__FILE__, __LINE__), \
+ ::testing::internal::SuiteApiResolver< \
+ parent_class>::GetTearDownCaseOrSuite(__FILE__, __LINE__), \
+ new ::testing::internal::TestFactoryImpl<GTEST_TEST_CLASS_NAME_( \
+ test_suite_name, test_name)>); \
+ void GTEST_TEST_CLASS_NAME_(test_suite_name, test_name)::TestBody()
+
+#endif // GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_INTERNAL_H_
diff --git a/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-param-util.h b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-param-util.h
new file mode 100644
index 0000000000..e7af2f904a
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-param-util.h
@@ -0,0 +1,956 @@
+// Copyright 2008 Google Inc.
+// All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Type and function utilities for implementing parameterized tests.
+
+// IWYU pragma: private, include "gtest/gtest.h"
+// IWYU pragma: friend gtest/.*
+// IWYU pragma: friend gmock/.*
+
+#ifndef GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_H_
+#define GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_H_
+
+#include <ctype.h>
+
+#include <cassert>
+#include <iterator>
+#include <memory>
+#include <set>
+#include <tuple>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#include "gtest/gtest-printers.h"
+#include "gtest/gtest-test-part.h"
+#include "gtest/internal/gtest-internal.h"
+#include "gtest/internal/gtest-port.h"
+
+namespace testing {
+// Input to a parameterized test name generator, describing a test parameter.
+// Consists of the parameter value and the integer parameter index.
+template <class ParamType>
+struct TestParamInfo {
+ TestParamInfo(const ParamType& a_param, size_t an_index)
+ : param(a_param), index(an_index) {}
+ ParamType param;
+ size_t index;
+};
+
+// A builtin parameterized test name generator which returns the result of
+// testing::PrintToString.
+struct PrintToStringParamName {
+ template <class ParamType>
+ std::string operator()(const TestParamInfo<ParamType>& info) const {
+ return PrintToString(info.param);
+ }
+};
+
+namespace internal {
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+// Utility Functions
+
+// Outputs a message explaining invalid registration of different
+// fixture class for the same test suite. This may happen when
+// TEST_P macro is used to define two tests with the same name
+// but in different namespaces.
+GTEST_API_ void ReportInvalidTestSuiteType(const char* test_suite_name,
+ CodeLocation code_location);
+
+template <typename>
+class ParamGeneratorInterface;
+template <typename>
+class ParamGenerator;
+
+// Interface for iterating over elements provided by an implementation
+// of ParamGeneratorInterface<T>.
+template <typename T>
+class ParamIteratorInterface {
+ public:
+ virtual ~ParamIteratorInterface() {}
+ // A pointer to the base generator instance.
+ // Used only for the purposes of iterator comparison
+ // to make sure that two iterators belong to the same generator.
+ virtual const ParamGeneratorInterface<T>* BaseGenerator() const = 0;
+ // Advances iterator to point to the next element
+ // provided by the generator. The caller is responsible
+ // for not calling Advance() on an iterator equal to
+ // BaseGenerator()->End().
+ virtual void Advance() = 0;
+ // Clones the iterator object. Used for implementing copy semantics
+ // of ParamIterator<T>.
+ virtual ParamIteratorInterface* Clone() const = 0;
+ // Dereferences the current iterator and provides (read-only) access
+ // to the pointed value. It is the caller's responsibility not to call
+ // Current() on an iterator equal to BaseGenerator()->End().
+ // Used for implementing ParamGenerator<T>::operator*().
+ virtual const T* Current() const = 0;
+ // Determines whether the given iterator and other point to the same
+ // element in the sequence generated by the generator.
+ // Used for implementing ParamGenerator<T>::operator==().
+ virtual bool Equals(const ParamIteratorInterface& other) const = 0;
+};
+
+// Class iterating over elements provided by an implementation of
+// ParamGeneratorInterface<T>. It wraps ParamIteratorInterface<T>
+// and implements the const forward iterator concept.
+template <typename T>
+class ParamIterator {
+ public:
+ typedef T value_type;
+ typedef const T& reference;
+ typedef ptrdiff_t difference_type;
+
+ // ParamIterator assumes ownership of the impl_ pointer.
+ ParamIterator(const ParamIterator& other) : impl_(other.impl_->Clone()) {}
+ ParamIterator& operator=(const ParamIterator& other) {
+ if (this != &other) impl_.reset(other.impl_->Clone());
+ return *this;
+ }
+
+ const T& operator*() const { return *impl_->Current(); }
+ const T* operator->() const { return impl_->Current(); }
+ // Prefix version of operator++.
+ ParamIterator& operator++() {
+ impl_->Advance();
+ return *this;
+ }
+ // Postfix version of operator++.
+ ParamIterator operator++(int /*unused*/) {
+ ParamIteratorInterface<T>* clone = impl_->Clone();
+ impl_->Advance();
+ return ParamIterator(clone);
+ }
+ bool operator==(const ParamIterator& other) const {
+ return impl_.get() == other.impl_.get() || impl_->Equals(*other.impl_);
+ }
+ bool operator!=(const ParamIterator& other) const {
+ return !(*this == other);
+ }
+
+ private:
+ friend class ParamGenerator<T>;
+ explicit ParamIterator(ParamIteratorInterface<T>* impl) : impl_(impl) {}
+ std::unique_ptr<ParamIteratorInterface<T>> impl_;
+};
+
+// ParamGeneratorInterface<T> is the binary interface to access generators
+// defined in other translation units.
+template <typename T>
+class ParamGeneratorInterface {
+ public:
+ typedef T ParamType;
+
+ virtual ~ParamGeneratorInterface() {}
+
+ // Generator interface definition
+ virtual ParamIteratorInterface<T>* Begin() const = 0;
+ virtual ParamIteratorInterface<T>* End() const = 0;
+};
+
+// Wraps ParamGeneratorInterface<T> and provides general generator syntax
+// compatible with the STL Container concept.
+// This class implements copy initialization semantics and the contained
+// ParamGeneratorInterface<T> instance is shared among all copies
+// of the original object. This is possible because that instance is immutable.
+template <typename T>
+class ParamGenerator {
+ public:
+ typedef ParamIterator<T> iterator;
+
+ explicit ParamGenerator(ParamGeneratorInterface<T>* impl) : impl_(impl) {}
+ ParamGenerator(const ParamGenerator& other) : impl_(other.impl_) {}
+
+ ParamGenerator& operator=(const ParamGenerator& other) {
+ impl_ = other.impl_;
+ return *this;
+ }
+
+ iterator begin() const { return iterator(impl_->Begin()); }
+ iterator end() const { return iterator(impl_->End()); }
+
+ private:
+ std::shared_ptr<const ParamGeneratorInterface<T>> impl_;
+};
+
+// Generates values from a range of two comparable values. Can be used to
+// generate sequences of user-defined types that implement operator+() and
+// operator<().
+// This class is used in the Range() function.
+template <typename T, typename IncrementT>
+class RangeGenerator : public ParamGeneratorInterface<T> {
+ public:
+ RangeGenerator(T begin, T end, IncrementT step)
+ : begin_(begin),
+ end_(end),
+ step_(step),
+ end_index_(CalculateEndIndex(begin, end, step)) {}
+ ~RangeGenerator() override {}
+
+ ParamIteratorInterface<T>* Begin() const override {
+ return new Iterator(this, begin_, 0, step_);
+ }
+ ParamIteratorInterface<T>* End() const override {
+ return new Iterator(this, end_, end_index_, step_);
+ }
+
+ private:
+ class Iterator : public ParamIteratorInterface<T> {
+ public:
+ Iterator(const ParamGeneratorInterface<T>* base, T value, int index,
+ IncrementT step)
+ : base_(base), value_(value), index_(index), step_(step) {}
+ ~Iterator() override {}
+
+ const ParamGeneratorInterface<T>* BaseGenerator() const override {
+ return base_;
+ }
+ void Advance() override {
+ value_ = static_cast<T>(value_ + step_);
+ index_++;
+ }
+ ParamIteratorInterface<T>* Clone() const override {
+ return new Iterator(*this);
+ }
+ const T* Current() const override { return &value_; }
+ bool Equals(const ParamIteratorInterface<T>& other) const override {
+ // Having the same base generator guarantees that the other
+ // iterator is of the same type and we can downcast.
+ GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
+ << "The program attempted to compare iterators "
+ << "from different generators." << std::endl;
+ const int other_index =
+ CheckedDowncastToActualType<const Iterator>(&other)->index_;
+ return index_ == other_index;
+ }
+
+ private:
+ Iterator(const Iterator& other)
+ : ParamIteratorInterface<T>(),
+ base_(other.base_),
+ value_(other.value_),
+ index_(other.index_),
+ step_(other.step_) {}
+
+ // No implementation - assignment is unsupported.
+ void operator=(const Iterator& other);
+
+ const ParamGeneratorInterface<T>* const base_;
+ T value_;
+ int index_;
+ const IncrementT step_;
+ }; // class RangeGenerator::Iterator
+
+ static int CalculateEndIndex(const T& begin, const T& end,
+ const IncrementT& step) {
+ int end_index = 0;
+ for (T i = begin; i < end; i = static_cast<T>(i + step)) end_index++;
+ return end_index;
+ }
+
+ // No implementation - assignment is unsupported.
+ void operator=(const RangeGenerator& other);
+
+ const T begin_;
+ const T end_;
+ const IncrementT step_;
+ // The index for the end() iterator. All the elements in the generated
+ // sequence are indexed (0-based) to aid iterator comparison.
+ const int end_index_;
+}; // class RangeGenerator
+
+// Generates values from a pair of STL-style iterators. Used in the
+// ValuesIn() function. The elements are copied from the source range
+// since the source can be located on the stack, and the generator
+// is likely to persist beyond that stack frame.
+template <typename T>
+class ValuesInIteratorRangeGenerator : public ParamGeneratorInterface<T> {
+ public:
+ template <typename ForwardIterator>
+ ValuesInIteratorRangeGenerator(ForwardIterator begin, ForwardIterator end)
+ : container_(begin, end) {}
+ ~ValuesInIteratorRangeGenerator() override {}
+
+ ParamIteratorInterface<T>* Begin() const override {
+ return new Iterator(this, container_.begin());
+ }
+ ParamIteratorInterface<T>* End() const override {
+ return new Iterator(this, container_.end());
+ }
+
+ private:
+ typedef typename ::std::vector<T> ContainerType;
+
+ class Iterator : public ParamIteratorInterface<T> {
+ public:
+ Iterator(const ParamGeneratorInterface<T>* base,
+ typename ContainerType::const_iterator iterator)
+ : base_(base), iterator_(iterator) {}
+ ~Iterator() override {}
+
+ const ParamGeneratorInterface<T>* BaseGenerator() const override {
+ return base_;
+ }
+ void Advance() override {
+ ++iterator_;
+ value_.reset();
+ }
+ ParamIteratorInterface<T>* Clone() const override {
+ return new Iterator(*this);
+ }
+ // We need to use cached value referenced by iterator_ because *iterator_
+ // can return a temporary object (and of type other then T), so just
+ // having "return &*iterator_;" doesn't work.
+ // value_ is updated here and not in Advance() because Advance()
+ // can advance iterator_ beyond the end of the range, and we cannot
+ // detect that fact. The client code, on the other hand, is
+ // responsible for not calling Current() on an out-of-range iterator.
+ const T* Current() const override {
+ if (value_.get() == nullptr) value_.reset(new T(*iterator_));
+ return value_.get();
+ }
+ bool Equals(const ParamIteratorInterface<T>& other) const override {
+ // Having the same base generator guarantees that the other
+ // iterator is of the same type and we can downcast.
+ GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
+ << "The program attempted to compare iterators "
+ << "from different generators." << std::endl;
+ return iterator_ ==
+ CheckedDowncastToActualType<const Iterator>(&other)->iterator_;
+ }
+
+ private:
+ Iterator(const Iterator& other)
+ // The explicit constructor call suppresses a false warning
+ // emitted by gcc when supplied with the -Wextra option.
+ : ParamIteratorInterface<T>(),
+ base_(other.base_),
+ iterator_(other.iterator_) {}
+
+ const ParamGeneratorInterface<T>* const base_;
+ typename ContainerType::const_iterator iterator_;
+ // A cached value of *iterator_. We keep it here to allow access by
+ // pointer in the wrapping iterator's operator->().
+ // value_ needs to be mutable to be accessed in Current().
+ // Use of std::unique_ptr helps manage cached value's lifetime,
+ // which is bound by the lifespan of the iterator itself.
+ mutable std::unique_ptr<const T> value_;
+ }; // class ValuesInIteratorRangeGenerator::Iterator
+
+ // No implementation - assignment is unsupported.
+ void operator=(const ValuesInIteratorRangeGenerator& other);
+
+ const ContainerType container_;
+}; // class ValuesInIteratorRangeGenerator
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// Default parameterized test name generator, returns a string containing the
+// integer test parameter index.
+template <class ParamType>
+std::string DefaultParamName(const TestParamInfo<ParamType>& info) {
+ Message name_stream;
+ name_stream << info.index;
+ return name_stream.GetString();
+}
+
+template <typename T = int>
+void TestNotEmpty() {
+ static_assert(sizeof(T) == 0, "Empty arguments are not allowed.");
+}
+template <typename T = int>
+void TestNotEmpty(const T&) {}
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// Stores a parameter value and later creates tests parameterized with that
+// value.
+template <class TestClass>
+class ParameterizedTestFactory : public TestFactoryBase {
+ public:
+ typedef typename TestClass::ParamType ParamType;
+ explicit ParameterizedTestFactory(ParamType parameter)
+ : parameter_(parameter) {}
+ Test* CreateTest() override {
+ TestClass::SetParam(&parameter_);
+ return new TestClass();
+ }
+
+ private:
+ const ParamType parameter_;
+
+ ParameterizedTestFactory(const ParameterizedTestFactory&) = delete;
+ ParameterizedTestFactory& operator=(const ParameterizedTestFactory&) = delete;
+};
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// TestMetaFactoryBase is a base class for meta-factories that create
+// test factories for passing into MakeAndRegisterTestInfo function.
+template <class ParamType>
+class TestMetaFactoryBase {
+ public:
+ virtual ~TestMetaFactoryBase() {}
+
+ virtual TestFactoryBase* CreateTestFactory(ParamType parameter) = 0;
+};
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// TestMetaFactory creates test factories for passing into
+// MakeAndRegisterTestInfo function. Since MakeAndRegisterTestInfo receives
+// ownership of test factory pointer, same factory object cannot be passed
+// into that method twice. But ParameterizedTestSuiteInfo is going to call
+// it for each Test/Parameter value combination. Thus it needs meta factory
+// creator class.
+template <class TestSuite>
+class TestMetaFactory
+ : public TestMetaFactoryBase<typename TestSuite::ParamType> {
+ public:
+ using ParamType = typename TestSuite::ParamType;
+
+ TestMetaFactory() {}
+
+ TestFactoryBase* CreateTestFactory(ParamType parameter) override {
+ return new ParameterizedTestFactory<TestSuite>(parameter);
+ }
+
+ private:
+ TestMetaFactory(const TestMetaFactory&) = delete;
+ TestMetaFactory& operator=(const TestMetaFactory&) = delete;
+};
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// ParameterizedTestSuiteInfoBase is a generic interface
+// to ParameterizedTestSuiteInfo classes. ParameterizedTestSuiteInfoBase
+// accumulates test information provided by TEST_P macro invocations
+// and generators provided by INSTANTIATE_TEST_SUITE_P macro invocations
+// and uses that information to register all resulting test instances
+// in RegisterTests method. The ParameterizeTestSuiteRegistry class holds
+// a collection of pointers to the ParameterizedTestSuiteInfo objects
+// and calls RegisterTests() on each of them when asked.
+class ParameterizedTestSuiteInfoBase {
+ public:
+ virtual ~ParameterizedTestSuiteInfoBase() {}
+
+ // Base part of test suite name for display purposes.
+ virtual const std::string& GetTestSuiteName() const = 0;
+ // Test suite id to verify identity.
+ virtual TypeId GetTestSuiteTypeId() const = 0;
+ // UnitTest class invokes this method to register tests in this
+ // test suite right before running them in RUN_ALL_TESTS macro.
+ // This method should not be called more than once on any single
+ // instance of a ParameterizedTestSuiteInfoBase derived class.
+ virtual void RegisterTests() = 0;
+
+ protected:
+ ParameterizedTestSuiteInfoBase() {}
+
+ private:
+ ParameterizedTestSuiteInfoBase(const ParameterizedTestSuiteInfoBase&) =
+ delete;
+ ParameterizedTestSuiteInfoBase& operator=(
+ const ParameterizedTestSuiteInfoBase&) = delete;
+};
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// Report a the name of a test_suit as safe to ignore
+// as the side effect of construction of this type.
+struct GTEST_API_ MarkAsIgnored {
+ explicit MarkAsIgnored(const char* test_suite);
+};
+
+GTEST_API_ void InsertSyntheticTestCase(const std::string& name,
+ CodeLocation location, bool has_test_p);
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// ParameterizedTestSuiteInfo accumulates tests obtained from TEST_P
+// macro invocations for a particular test suite and generators
+// obtained from INSTANTIATE_TEST_SUITE_P macro invocations for that
+// test suite. It registers tests with all values generated by all
+// generators when asked.
+template <class TestSuite>
+class ParameterizedTestSuiteInfo : public ParameterizedTestSuiteInfoBase {
+ public:
+ // ParamType and GeneratorCreationFunc are private types but are required
+ // for declarations of public methods AddTestPattern() and
+ // AddTestSuiteInstantiation().
+ using ParamType = typename TestSuite::ParamType;
+ // A function that returns an instance of appropriate generator type.
+ typedef ParamGenerator<ParamType>(GeneratorCreationFunc)();
+ using ParamNameGeneratorFunc = std::string(const TestParamInfo<ParamType>&);
+
+ explicit ParameterizedTestSuiteInfo(const char* name,
+ CodeLocation code_location)
+ : test_suite_name_(name), code_location_(code_location) {}
+
+ // Test suite base name for display purposes.
+ const std::string& GetTestSuiteName() const override {
+ return test_suite_name_;
+ }
+ // Test suite id to verify identity.
+ TypeId GetTestSuiteTypeId() const override { return GetTypeId<TestSuite>(); }
+ // TEST_P macro uses AddTestPattern() to record information
+ // about a single test in a LocalTestInfo structure.
+ // test_suite_name is the base name of the test suite (without invocation
+ // prefix). test_base_name is the name of an individual test without
+ // parameter index. For the test SequenceA/FooTest.DoBar/1 FooTest is
+ // test suite base name and DoBar is test base name.
+ void AddTestPattern(const char* test_suite_name, const char* test_base_name,
+ TestMetaFactoryBase<ParamType>* meta_factory,
+ CodeLocation code_location) {
+ tests_.push_back(std::shared_ptr<TestInfo>(new TestInfo(
+ test_suite_name, test_base_name, meta_factory, code_location)));
+ }
+ // INSTANTIATE_TEST_SUITE_P macro uses AddGenerator() to record information
+ // about a generator.
+ int AddTestSuiteInstantiation(const std::string& instantiation_name,
+ GeneratorCreationFunc* func,
+ ParamNameGeneratorFunc* name_func,
+ const char* file, int line) {
+ instantiations_.push_back(
+ InstantiationInfo(instantiation_name, func, name_func, file, line));
+ return 0; // Return value used only to run this method in namespace scope.
+ }
+ // UnitTest class invokes this method to register tests in this test suite
+ // right before running tests in RUN_ALL_TESTS macro.
+ // This method should not be called more than once on any single
+ // instance of a ParameterizedTestSuiteInfoBase derived class.
+ // UnitTest has a guard to prevent from calling this method more than once.
+ void RegisterTests() override {
+ bool generated_instantiations = false;
+
+ for (typename TestInfoContainer::iterator test_it = tests_.begin();
+ test_it != tests_.end(); ++test_it) {
+ std::shared_ptr<TestInfo> test_info = *test_it;
+ for (typename InstantiationContainer::iterator gen_it =
+ instantiations_.begin();
+ gen_it != instantiations_.end(); ++gen_it) {
+ const std::string& instantiation_name = gen_it->name;
+ ParamGenerator<ParamType> generator((*gen_it->generator)());
+ ParamNameGeneratorFunc* name_func = gen_it->name_func;
+ const char* file = gen_it->file;
+ int line = gen_it->line;
+
+ std::string test_suite_name;
+ if (!instantiation_name.empty())
+ test_suite_name = instantiation_name + "/";
+ test_suite_name += test_info->test_suite_base_name;
+
+ size_t i = 0;
+ std::set<std::string> test_param_names;
+ for (typename ParamGenerator<ParamType>::iterator param_it =
+ generator.begin();
+ param_it != generator.end(); ++param_it, ++i) {
+ generated_instantiations = true;
+
+ Message test_name_stream;
+
+ std::string param_name =
+ name_func(TestParamInfo<ParamType>(*param_it, i));
+
+ GTEST_CHECK_(IsValidParamName(param_name))
+ << "Parameterized test name '" << param_name
+ << "' is invalid, in " << file << " line " << line << std::endl;
+
+ GTEST_CHECK_(test_param_names.count(param_name) == 0)
+ << "Duplicate parameterized test name '" << param_name << "', in "
+ << file << " line " << line << std::endl;
+
+ test_param_names.insert(param_name);
+
+ if (!test_info->test_base_name.empty()) {
+ test_name_stream << test_info->test_base_name << "/";
+ }
+ test_name_stream << param_name;
+ MakeAndRegisterTestInfo(
+ test_suite_name.c_str(), test_name_stream.GetString().c_str(),
+ nullptr, // No type parameter.
+ PrintToString(*param_it).c_str(), test_info->code_location,
+ GetTestSuiteTypeId(),
+ SuiteApiResolver<TestSuite>::GetSetUpCaseOrSuite(file, line),
+ SuiteApiResolver<TestSuite>::GetTearDownCaseOrSuite(file, line),
+ test_info->test_meta_factory->CreateTestFactory(*param_it));
+ } // for param_it
+ } // for gen_it
+ } // for test_it
+
+ if (!generated_instantiations) {
+ // There are no generaotrs, or they all generate nothing ...
+ InsertSyntheticTestCase(GetTestSuiteName(), code_location_,
+ !tests_.empty());
+ }
+ } // RegisterTests
+
+ private:
+ // LocalTestInfo structure keeps information about a single test registered
+ // with TEST_P macro.
+ struct TestInfo {
+ TestInfo(const char* a_test_suite_base_name, const char* a_test_base_name,
+ TestMetaFactoryBase<ParamType>* a_test_meta_factory,
+ CodeLocation a_code_location)
+ : test_suite_base_name(a_test_suite_base_name),
+ test_base_name(a_test_base_name),
+ test_meta_factory(a_test_meta_factory),
+ code_location(a_code_location) {}
+
+ const std::string test_suite_base_name;
+ const std::string test_base_name;
+ const std::unique_ptr<TestMetaFactoryBase<ParamType>> test_meta_factory;
+ const CodeLocation code_location;
+ };
+ using TestInfoContainer = ::std::vector<std::shared_ptr<TestInfo>>;
+ // Records data received from INSTANTIATE_TEST_SUITE_P macros:
+ // <Instantiation name, Sequence generator creation function,
+ // Name generator function, Source file, Source line>
+ struct InstantiationInfo {
+ InstantiationInfo(const std::string& name_in,
+ GeneratorCreationFunc* generator_in,
+ ParamNameGeneratorFunc* name_func_in, const char* file_in,
+ int line_in)
+ : name(name_in),
+ generator(generator_in),
+ name_func(name_func_in),
+ file(file_in),
+ line(line_in) {}
+
+ std::string name;
+ GeneratorCreationFunc* generator;
+ ParamNameGeneratorFunc* name_func;
+ const char* file;
+ int line;
+ };
+ typedef ::std::vector<InstantiationInfo> InstantiationContainer;
+
+ static bool IsValidParamName(const std::string& name) {
+ // Check for empty string
+ if (name.empty()) return false;
+
+ // Check for invalid characters
+ for (std::string::size_type index = 0; index < name.size(); ++index) {
+ if (!IsAlNum(name[index]) && name[index] != '_') return false;
+ }
+
+ return true;
+ }
+
+ const std::string test_suite_name_;
+ CodeLocation code_location_;
+ TestInfoContainer tests_;
+ InstantiationContainer instantiations_;
+
+ ParameterizedTestSuiteInfo(const ParameterizedTestSuiteInfo&) = delete;
+ ParameterizedTestSuiteInfo& operator=(const ParameterizedTestSuiteInfo&) =
+ delete;
+}; // class ParameterizedTestSuiteInfo
+
+// Legacy API is deprecated but still available
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+template <class TestCase>
+using ParameterizedTestCaseInfo = ParameterizedTestSuiteInfo<TestCase>;
+#endif // GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// ParameterizedTestSuiteRegistry contains a map of
+// ParameterizedTestSuiteInfoBase classes accessed by test suite names. TEST_P
+// and INSTANTIATE_TEST_SUITE_P macros use it to locate their corresponding
+// ParameterizedTestSuiteInfo descriptors.
+class ParameterizedTestSuiteRegistry {
+ public:
+ ParameterizedTestSuiteRegistry() {}
+ ~ParameterizedTestSuiteRegistry() {
+ for (auto& test_suite_info : test_suite_infos_) {
+ delete test_suite_info;
+ }
+ }
+
+ // Looks up or creates and returns a structure containing information about
+ // tests and instantiations of a particular test suite.
+ template <class TestSuite>
+ ParameterizedTestSuiteInfo<TestSuite>* GetTestSuitePatternHolder(
+ const char* test_suite_name, CodeLocation code_location) {
+ ParameterizedTestSuiteInfo<TestSuite>* typed_test_info = nullptr;
+ for (auto& test_suite_info : test_suite_infos_) {
+ if (test_suite_info->GetTestSuiteName() == test_suite_name) {
+ if (test_suite_info->GetTestSuiteTypeId() != GetTypeId<TestSuite>()) {
+ // Complain about incorrect usage of Google Test facilities
+ // and terminate the program since we cannot guaranty correct
+ // test suite setup and tear-down in this case.
+ ReportInvalidTestSuiteType(test_suite_name, code_location);
+ posix::Abort();
+ } else {
+ // At this point we are sure that the object we found is of the same
+ // type we are looking for, so we downcast it to that type
+ // without further checks.
+ typed_test_info = CheckedDowncastToActualType<
+ ParameterizedTestSuiteInfo<TestSuite>>(test_suite_info);
+ }
+ break;
+ }
+ }
+ if (typed_test_info == nullptr) {
+ typed_test_info = new ParameterizedTestSuiteInfo<TestSuite>(
+ test_suite_name, code_location);
+ test_suite_infos_.push_back(typed_test_info);
+ }
+ return typed_test_info;
+ }
+ void RegisterTests() {
+ for (auto& test_suite_info : test_suite_infos_) {
+ test_suite_info->RegisterTests();
+ }
+ }
+// Legacy API is deprecated but still available
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+ template <class TestCase>
+ ParameterizedTestCaseInfo<TestCase>* GetTestCasePatternHolder(
+ const char* test_case_name, CodeLocation code_location) {
+ return GetTestSuitePatternHolder<TestCase>(test_case_name, code_location);
+ }
+
+#endif // GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+
+ private:
+ using TestSuiteInfoContainer = ::std::vector<ParameterizedTestSuiteInfoBase*>;
+
+ TestSuiteInfoContainer test_suite_infos_;
+
+ ParameterizedTestSuiteRegistry(const ParameterizedTestSuiteRegistry&) =
+ delete;
+ ParameterizedTestSuiteRegistry& operator=(
+ const ParameterizedTestSuiteRegistry&) = delete;
+};
+
+// Keep track of what type-parameterized test suite are defined and
+// where as well as which are intatiated. This allows susequently
+// identifying suits that are defined but never used.
+class TypeParameterizedTestSuiteRegistry {
+ public:
+ // Add a suite definition
+ void RegisterTestSuite(const char* test_suite_name,
+ CodeLocation code_location);
+
+ // Add an instantiation of a suit.
+ void RegisterInstantiation(const char* test_suite_name);
+
+ // For each suit repored as defined but not reported as instantiation,
+ // emit a test that reports that fact (configurably, as an error).
+ void CheckForInstantiations();
+
+ private:
+ struct TypeParameterizedTestSuiteInfo {
+ explicit TypeParameterizedTestSuiteInfo(CodeLocation c)
+ : code_location(c), instantiated(false) {}
+
+ CodeLocation code_location;
+ bool instantiated;
+ };
+
+ std::map<std::string, TypeParameterizedTestSuiteInfo> suites_;
+};
+
+} // namespace internal
+
+// Forward declarations of ValuesIn(), which is implemented in
+// include/gtest/gtest-param-test.h.
+template <class Container>
+internal::ParamGenerator<typename Container::value_type> ValuesIn(
+ const Container& container);
+
+namespace internal {
+// Used in the Values() function to provide polymorphic capabilities.
+
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable : 4100)
+#endif
+
+template <typename... Ts>
+class ValueArray {
+ public:
+ explicit ValueArray(Ts... v) : v_(FlatTupleConstructTag{}, std::move(v)...) {}
+
+ template <typename T>
+ operator ParamGenerator<T>() const { // NOLINT
+ return ValuesIn(MakeVector<T>(MakeIndexSequence<sizeof...(Ts)>()));
+ }
+
+ private:
+ template <typename T, size_t... I>
+ std::vector<T> MakeVector(IndexSequence<I...>) const {
+ return std::vector<T>{static_cast<T>(v_.template Get<I>())...};
+ }
+
+ FlatTuple<Ts...> v_;
+};
+
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+
+template <typename... T>
+class CartesianProductGenerator
+ : public ParamGeneratorInterface<::std::tuple<T...>> {
+ public:
+ typedef ::std::tuple<T...> ParamType;
+
+ CartesianProductGenerator(const std::tuple<ParamGenerator<T>...>& g)
+ : generators_(g) {}
+ ~CartesianProductGenerator() override {}
+
+ ParamIteratorInterface<ParamType>* Begin() const override {
+ return new Iterator(this, generators_, false);
+ }
+ ParamIteratorInterface<ParamType>* End() const override {
+ return new Iterator(this, generators_, true);
+ }
+
+ private:
+ template <class I>
+ class IteratorImpl;
+ template <size_t... I>
+ class IteratorImpl<IndexSequence<I...>>
+ : public ParamIteratorInterface<ParamType> {
+ public:
+ IteratorImpl(const ParamGeneratorInterface<ParamType>* base,
+ const std::tuple<ParamGenerator<T>...>& generators,
+ bool is_end)
+ : base_(base),
+ begin_(std::get<I>(generators).begin()...),
+ end_(std::get<I>(generators).end()...),
+ current_(is_end ? end_ : begin_) {
+ ComputeCurrentValue();
+ }
+ ~IteratorImpl() override {}
+
+ const ParamGeneratorInterface<ParamType>* BaseGenerator() const override {
+ return base_;
+ }
+ // Advance should not be called on beyond-of-range iterators
+ // so no component iterators must be beyond end of range, either.
+ void Advance() override {
+ assert(!AtEnd());
+ // Advance the last iterator.
+ ++std::get<sizeof...(T) - 1>(current_);
+ // if that reaches end, propagate that up.
+ AdvanceIfEnd<sizeof...(T) - 1>();
+ ComputeCurrentValue();
+ }
+ ParamIteratorInterface<ParamType>* Clone() const override {
+ return new IteratorImpl(*this);
+ }
+
+ const ParamType* Current() const override { return current_value_.get(); }
+
+ bool Equals(const ParamIteratorInterface<ParamType>& other) const override {
+ // Having the same base generator guarantees that the other
+ // iterator is of the same type and we can downcast.
+ GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
+ << "The program attempted to compare iterators "
+ << "from different generators." << std::endl;
+ const IteratorImpl* typed_other =
+ CheckedDowncastToActualType<const IteratorImpl>(&other);
+
+ // We must report iterators equal if they both point beyond their
+ // respective ranges. That can happen in a variety of fashions,
+ // so we have to consult AtEnd().
+ if (AtEnd() && typed_other->AtEnd()) return true;
+
+ bool same = true;
+ bool dummy[] = {
+ (same = same && std::get<I>(current_) ==
+ std::get<I>(typed_other->current_))...};
+ (void)dummy;
+ return same;
+ }
+
+ private:
+ template <size_t ThisI>
+ void AdvanceIfEnd() {
+ if (std::get<ThisI>(current_) != std::get<ThisI>(end_)) return;
+
+ bool last = ThisI == 0;
+ if (last) {
+ // We are done. Nothing else to propagate.
+ return;
+ }
+
+ constexpr size_t NextI = ThisI - (ThisI != 0);
+ std::get<ThisI>(current_) = std::get<ThisI>(begin_);
+ ++std::get<NextI>(current_);
+ AdvanceIfEnd<NextI>();
+ }
+
+ void ComputeCurrentValue() {
+ if (!AtEnd())
+ current_value_ = std::make_shared<ParamType>(*std::get<I>(current_)...);
+ }
+ bool AtEnd() const {
+ bool at_end = false;
+ bool dummy[] = {
+ (at_end = at_end || std::get<I>(current_) == std::get<I>(end_))...};
+ (void)dummy;
+ return at_end;
+ }
+
+ const ParamGeneratorInterface<ParamType>* const base_;
+ std::tuple<typename ParamGenerator<T>::iterator...> begin_;
+ std::tuple<typename ParamGenerator<T>::iterator...> end_;
+ std::tuple<typename ParamGenerator<T>::iterator...> current_;
+ std::shared_ptr<ParamType> current_value_;
+ };
+
+ using Iterator = IteratorImpl<typename MakeIndexSequence<sizeof...(T)>::type>;
+
+ std::tuple<ParamGenerator<T>...> generators_;
+};
+
+template <class... Gen>
+class CartesianProductHolder {
+ public:
+ CartesianProductHolder(const Gen&... g) : generators_(g...) {}
+ template <typename... T>
+ operator ParamGenerator<::std::tuple<T...>>() const {
+ return ParamGenerator<::std::tuple<T...>>(
+ new CartesianProductGenerator<T...>(generators_));
+ }
+
+ private:
+ std::tuple<Gen...> generators_;
+};
+
+} // namespace internal
+} // namespace testing
+
+#endif // GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_H_
diff --git a/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-port-arch.h b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-port-arch.h
new file mode 100644
index 0000000000..f025db76ad
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-port-arch.h
@@ -0,0 +1,116 @@
+// Copyright 2015, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// The Google C++ Testing and Mocking Framework (Google Test)
+//
+// This header file defines the GTEST_OS_* macro.
+// It is separate from gtest-port.h so that custom/gtest-port.h can include it.
+
+#ifndef GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_ARCH_H_
+#define GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_ARCH_H_
+
+// Determines the platform on which Google Test is compiled.
+#ifdef __CYGWIN__
+#define GTEST_OS_CYGWIN 1
+#elif defined(__MINGW__) || defined(__MINGW32__) || defined(__MINGW64__)
+#define GTEST_OS_WINDOWS_MINGW 1
+#define GTEST_OS_WINDOWS 1
+#elif defined _WIN32
+#define GTEST_OS_WINDOWS 1
+#ifdef _WIN32_WCE
+#define GTEST_OS_WINDOWS_MOBILE 1
+#elif defined(WINAPI_FAMILY)
+#include <winapifamily.h>
+#if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
+#define GTEST_OS_WINDOWS_DESKTOP 1
+#elif WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_PHONE_APP)
+#define GTEST_OS_WINDOWS_PHONE 1
+#elif WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP)
+#define GTEST_OS_WINDOWS_RT 1
+#elif WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_TV_TITLE)
+#define GTEST_OS_WINDOWS_PHONE 1
+#define GTEST_OS_WINDOWS_TV_TITLE 1
+#else
+// WINAPI_FAMILY defined but no known partition matched.
+// Default to desktop.
+#define GTEST_OS_WINDOWS_DESKTOP 1
+#endif
+#else
+#define GTEST_OS_WINDOWS_DESKTOP 1
+#endif // _WIN32_WCE
+#elif defined __OS2__
+#define GTEST_OS_OS2 1
+#elif defined __APPLE__
+#define GTEST_OS_MAC 1
+#include <TargetConditionals.h>
+#if TARGET_OS_IPHONE
+#define GTEST_OS_IOS 1
+#endif
+#elif defined __DragonFly__
+#define GTEST_OS_DRAGONFLY 1
+#elif defined __FreeBSD__
+#define GTEST_OS_FREEBSD 1
+#elif defined __Fuchsia__
+#define GTEST_OS_FUCHSIA 1
+#elif defined(__GNU__)
+#define GTEST_OS_GNU_HURD 1
+#elif defined(__GLIBC__) && defined(__FreeBSD_kernel__)
+#define GTEST_OS_GNU_KFREEBSD 1
+#elif defined __linux__
+#define GTEST_OS_LINUX 1
+#if defined __ANDROID__
+#define GTEST_OS_LINUX_ANDROID 1
+#endif
+#elif defined __MVS__
+#define GTEST_OS_ZOS 1
+#elif defined(__sun) && defined(__SVR4)
+#define GTEST_OS_SOLARIS 1
+#elif defined(_AIX)
+#define GTEST_OS_AIX 1
+#elif defined(__hpux)
+#define GTEST_OS_HPUX 1
+#elif defined __native_client__
+#define GTEST_OS_NACL 1
+#elif defined __NetBSD__
+#define GTEST_OS_NETBSD 1
+#elif defined __OpenBSD__
+#define GTEST_OS_OPENBSD 1
+#elif defined __QNX__
+#define GTEST_OS_QNX 1
+#elif defined(__HAIKU__)
+#define GTEST_OS_HAIKU 1
+#elif defined ESP8266
+#define GTEST_OS_ESP8266 1
+#elif defined ESP32
+#define GTEST_OS_ESP32 1
+#elif defined(__XTENSA__)
+#define GTEST_OS_XTENSA 1
+#endif // __CYGWIN__
+
+#endif // GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_ARCH_H_
diff --git a/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-port.h b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-port.h
new file mode 100644
index 0000000000..0003d27658
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-port.h
@@ -0,0 +1,2413 @@
+// Copyright 2005, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Low-level types and utilities for porting Google Test to various
+// platforms. All macros ending with _ and symbols defined in an
+// internal namespace are subject to change without notice. Code
+// outside Google Test MUST NOT USE THEM DIRECTLY. Macros that don't
+// end with _ are part of Google Test's public API and can be used by
+// code outside Google Test.
+//
+// This file is fundamental to Google Test. All other Google Test source
+// files are expected to #include this. Therefore, it cannot #include
+// any other Google Test header.
+
+// IWYU pragma: private, include "gtest/gtest.h"
+// IWYU pragma: friend gtest/.*
+// IWYU pragma: friend gmock/.*
+
+#ifndef GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_H_
+#define GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_H_
+
+// Environment-describing macros
+// -----------------------------
+//
+// Google Test can be used in many different environments. Macros in
+// this section tell Google Test what kind of environment it is being
+// used in, such that Google Test can provide environment-specific
+// features and implementations.
+//
+// Google Test tries to automatically detect the properties of its
+// environment, so users usually don't need to worry about these
+// macros. However, the automatic detection is not perfect.
+// Sometimes it's necessary for a user to define some of the following
+// macros in the build script to override Google Test's decisions.
+//
+// If the user doesn't define a macro in the list, Google Test will
+// provide a default definition. After this header is #included, all
+// macros in this list will be defined to either 1 or 0.
+//
+// Notes to maintainers:
+// - Each macro here is a user-tweakable knob; do not grow the list
+// lightly.
+// - Use #if to key off these macros. Don't use #ifdef or "#if
+// defined(...)", which will not work as these macros are ALWAYS
+// defined.
+//
+// GTEST_HAS_CLONE - Define it to 1/0 to indicate that clone(2)
+// is/isn't available.
+// GTEST_HAS_EXCEPTIONS - Define it to 1/0 to indicate that exceptions
+// are enabled.
+// GTEST_HAS_POSIX_RE - Define it to 1/0 to indicate that POSIX regular
+// expressions are/aren't available.
+// GTEST_HAS_PTHREAD - Define it to 1/0 to indicate that <pthread.h>
+// is/isn't available.
+// GTEST_HAS_RTTI - Define it to 1/0 to indicate that RTTI is/isn't
+// enabled.
+// GTEST_HAS_STD_WSTRING - Define it to 1/0 to indicate that
+// std::wstring does/doesn't work (Google Test can
+// be used where std::wstring is unavailable).
+// GTEST_HAS_SEH - Define it to 1/0 to indicate whether the
+// compiler supports Microsoft's "Structured
+// Exception Handling".
+// GTEST_HAS_STREAM_REDIRECTION
+// - Define it to 1/0 to indicate whether the
+// platform supports I/O stream redirection using
+// dup() and dup2().
+// GTEST_LINKED_AS_SHARED_LIBRARY
+// - Define to 1 when compiling tests that use
+// Google Test as a shared library (known as
+// DLL on Windows).
+// GTEST_CREATE_SHARED_LIBRARY
+// - Define to 1 when compiling Google Test itself
+// as a shared library.
+// GTEST_DEFAULT_DEATH_TEST_STYLE
+// - The default value of --gtest_death_test_style.
+// The legacy default has been "fast" in the open
+// source version since 2008. The recommended value
+// is "threadsafe", and can be set in
+// custom/gtest-port.h.
+
+// Platform-indicating macros
+// --------------------------
+//
+// Macros indicating the platform on which Google Test is being used
+// (a macro is defined to 1 if compiled on the given platform;
+// otherwise UNDEFINED -- it's never defined to 0.). Google Test
+// defines these macros automatically. Code outside Google Test MUST
+// NOT define them.
+//
+// GTEST_OS_AIX - IBM AIX
+// GTEST_OS_CYGWIN - Cygwin
+// GTEST_OS_DRAGONFLY - DragonFlyBSD
+// GTEST_OS_FREEBSD - FreeBSD
+// GTEST_OS_FUCHSIA - Fuchsia
+// GTEST_OS_GNU_HURD - GNU/Hurd
+// GTEST_OS_GNU_KFREEBSD - GNU/kFreeBSD
+// GTEST_OS_HAIKU - Haiku
+// GTEST_OS_HPUX - HP-UX
+// GTEST_OS_LINUX - Linux
+// GTEST_OS_LINUX_ANDROID - Google Android
+// GTEST_OS_MAC - Mac OS X
+// GTEST_OS_IOS - iOS
+// GTEST_OS_NACL - Google Native Client (NaCl)
+// GTEST_OS_NETBSD - NetBSD
+// GTEST_OS_OPENBSD - OpenBSD
+// GTEST_OS_OS2 - OS/2
+// GTEST_OS_QNX - QNX
+// GTEST_OS_SOLARIS - Sun Solaris
+// GTEST_OS_WINDOWS - Windows (Desktop, MinGW, or Mobile)
+// GTEST_OS_WINDOWS_DESKTOP - Windows Desktop
+// GTEST_OS_WINDOWS_MINGW - MinGW
+// GTEST_OS_WINDOWS_MOBILE - Windows Mobile
+// GTEST_OS_WINDOWS_PHONE - Windows Phone
+// GTEST_OS_WINDOWS_RT - Windows Store App/WinRT
+// GTEST_OS_ZOS - z/OS
+//
+// Among the platforms, Cygwin, Linux, Mac OS X, and Windows have the
+// most stable support. Since core members of the Google Test project
+// don't have access to other platforms, support for them may be less
+// stable. If you notice any problems on your platform, please notify
+// googletestframework@googlegroups.com (patches for fixing them are
+// even more welcome!).
+//
+// It is possible that none of the GTEST_OS_* macros are defined.
+
+// Feature-indicating macros
+// -------------------------
+//
+// Macros indicating which Google Test features are available (a macro
+// is defined to 1 if the corresponding feature is supported;
+// otherwise UNDEFINED -- it's never defined to 0.). Google Test
+// defines these macros automatically. Code outside Google Test MUST
+// NOT define them.
+//
+// These macros are public so that portable tests can be written.
+// Such tests typically surround code using a feature with an #if
+// which controls that code. For example:
+//
+// #if GTEST_HAS_DEATH_TEST
+// EXPECT_DEATH(DoSomethingDeadly());
+// #endif
+//
+// GTEST_HAS_DEATH_TEST - death tests
+// GTEST_HAS_TYPED_TEST - typed tests
+// GTEST_HAS_TYPED_TEST_P - type-parameterized tests
+// GTEST_IS_THREADSAFE - Google Test is thread-safe.
+// GTEST_USES_RE2 - the RE2 regular expression library is used
+// GTEST_USES_POSIX_RE - enhanced POSIX regex is used. Do not confuse with
+// GTEST_HAS_POSIX_RE (see above) which users can
+// define themselves.
+// GTEST_USES_SIMPLE_RE - our own simple regex is used;
+// the above RE\b(s) are mutually exclusive.
+
+// Misc public macros
+// ------------------
+//
+// GTEST_FLAG(flag_name) - references the variable corresponding to
+// the given Google Test flag.
+
+// Internal utilities
+// ------------------
+//
+// The following macros and utilities are for Google Test's INTERNAL
+// use only. Code outside Google Test MUST NOT USE THEM DIRECTLY.
+//
+// Macros for basic C++ coding:
+// GTEST_AMBIGUOUS_ELSE_BLOCKER_ - for disabling a gcc warning.
+// GTEST_ATTRIBUTE_UNUSED_ - declares that a class' instances or a
+// variable don't have to be used.
+// GTEST_MUST_USE_RESULT_ - declares that a function's result must be used.
+// GTEST_INTENTIONAL_CONST_COND_PUSH_ - start code section where MSVC C4127 is
+// suppressed (constant conditional).
+// GTEST_INTENTIONAL_CONST_COND_POP_ - finish code section where MSVC C4127
+// is suppressed.
+// GTEST_INTERNAL_HAS_ANY - for enabling UniversalPrinter<std::any> or
+// UniversalPrinter<absl::any> specializations.
+// GTEST_INTERNAL_HAS_OPTIONAL - for enabling UniversalPrinter<std::optional>
+// or
+// UniversalPrinter<absl::optional>
+// specializations.
+// GTEST_INTERNAL_HAS_STRING_VIEW - for enabling Matcher<std::string_view> or
+// Matcher<absl::string_view>
+// specializations.
+// GTEST_INTERNAL_HAS_VARIANT - for enabling UniversalPrinter<std::variant> or
+// UniversalPrinter<absl::variant>
+// specializations.
+//
+// Synchronization:
+// Mutex, MutexLock, ThreadLocal, GetThreadCount()
+// - synchronization primitives.
+//
+// Regular expressions:
+// RE - a simple regular expression class using
+// 1) the RE2 syntax on all platforms when built with RE2
+// and Abseil as dependencies
+// 2) the POSIX Extended Regular Expression syntax on
+// UNIX-like platforms,
+// 3) A reduced regular exception syntax on other platforms,
+// including Windows.
+// Logging:
+// GTEST_LOG_() - logs messages at the specified severity level.
+// LogToStderr() - directs all log messages to stderr.
+// FlushInfoLog() - flushes informational log messages.
+//
+// Stdout and stderr capturing:
+// CaptureStdout() - starts capturing stdout.
+// GetCapturedStdout() - stops capturing stdout and returns the captured
+// string.
+// CaptureStderr() - starts capturing stderr.
+// GetCapturedStderr() - stops capturing stderr and returns the captured
+// string.
+//
+// Integer types:
+// TypeWithSize - maps an integer to a int type.
+// TimeInMillis - integers of known sizes.
+// BiggestInt - the biggest signed integer type.
+//
+// Command-line utilities:
+// GetInjectableArgvs() - returns the command line as a vector of strings.
+//
+// Environment variable utilities:
+// GetEnv() - gets the value of an environment variable.
+// BoolFromGTestEnv() - parses a bool environment variable.
+// Int32FromGTestEnv() - parses an int32_t environment variable.
+// StringFromGTestEnv() - parses a string environment variable.
+//
+// Deprecation warnings:
+// GTEST_INTERNAL_DEPRECATED(message) - attribute marking a function as
+// deprecated; calling a marked function
+// should generate a compiler warning
+
+#include <ctype.h> // for isspace, etc
+#include <stddef.h> // for ptrdiff_t
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <cerrno>
+// #include <condition_variable> // Guarded by GTEST_IS_THREADSAFE below
+#include <cstdint>
+#include <iostream>
+#include <limits>
+#include <locale>
+#include <memory>
+#include <string>
+// #include <mutex> // Guarded by GTEST_IS_THREADSAFE below
+#include <tuple>
+#include <type_traits>
+#include <vector>
+
+#ifndef _WIN32_WCE
+#include <sys/stat.h>
+#include <sys/types.h>
+#endif // !_WIN32_WCE
+
+#if defined __APPLE__
+#include <AvailabilityMacros.h>
+#include <TargetConditionals.h>
+#endif
+
+#include "gtest/internal/custom/gtest-port.h"
+#include "gtest/internal/gtest-port-arch.h"
+
+#if GTEST_HAS_ABSL
+#include "absl/flags/declare.h"
+#include "absl/flags/flag.h"
+#include "absl/flags/reflection.h"
+#endif
+
+#if !defined(GTEST_DEV_EMAIL_)
+#define GTEST_DEV_EMAIL_ "googletestframework@@googlegroups.com"
+#define GTEST_FLAG_PREFIX_ "gtest_"
+#define GTEST_FLAG_PREFIX_DASH_ "gtest-"
+#define GTEST_FLAG_PREFIX_UPPER_ "GTEST_"
+#define GTEST_NAME_ "Google Test"
+#define GTEST_PROJECT_URL_ "https://github.com/google/googletest/"
+#endif // !defined(GTEST_DEV_EMAIL_)
+
+#if !defined(GTEST_INIT_GOOGLE_TEST_NAME_)
+#define GTEST_INIT_GOOGLE_TEST_NAME_ "testing::InitGoogleTest"
+#endif // !defined(GTEST_INIT_GOOGLE_TEST_NAME_)
+
+// Determines the version of gcc that is used to compile this.
+#ifdef __GNUC__
+// 40302 means version 4.3.2.
+#define GTEST_GCC_VER_ \
+ (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
+#endif // __GNUC__
+
+// Macros for disabling Microsoft Visual C++ warnings.
+//
+// GTEST_DISABLE_MSC_WARNINGS_PUSH_(4800 4385)
+// /* code that triggers warnings C4800 and C4385 */
+// GTEST_DISABLE_MSC_WARNINGS_POP_()
+#if defined(_MSC_VER)
+#define GTEST_DISABLE_MSC_WARNINGS_PUSH_(warnings) \
+ __pragma(warning(push)) __pragma(warning(disable : warnings))
+#define GTEST_DISABLE_MSC_WARNINGS_POP_() __pragma(warning(pop))
+#else
+// Not all compilers are MSVC
+#define GTEST_DISABLE_MSC_WARNINGS_PUSH_(warnings)
+#define GTEST_DISABLE_MSC_WARNINGS_POP_()
+#endif
+
+// Clang on Windows does not understand MSVC's pragma warning.
+// We need clang-specific way to disable function deprecation warning.
+#ifdef __clang__
+#define GTEST_DISABLE_MSC_DEPRECATED_PUSH_() \
+ _Pragma("clang diagnostic push") \
+ _Pragma("clang diagnostic ignored \"-Wdeprecated-declarations\"") \
+ _Pragma("clang diagnostic ignored \"-Wdeprecated-implementations\"")
+#define GTEST_DISABLE_MSC_DEPRECATED_POP_() _Pragma("clang diagnostic pop")
+#else
+#define GTEST_DISABLE_MSC_DEPRECATED_PUSH_() \
+ GTEST_DISABLE_MSC_WARNINGS_PUSH_(4996)
+#define GTEST_DISABLE_MSC_DEPRECATED_POP_() GTEST_DISABLE_MSC_WARNINGS_POP_()
+#endif
+
+// Brings in definitions for functions used in the testing::internal::posix
+// namespace (read, write, close, chdir, isatty, stat). We do not currently
+// use them on Windows Mobile.
+#if GTEST_OS_WINDOWS
+#if !GTEST_OS_WINDOWS_MOBILE
+#include <direct.h>
+#include <io.h>
+#endif
+// In order to avoid having to include <windows.h>, use forward declaration
+#if GTEST_OS_WINDOWS_MINGW && !defined(__MINGW64_VERSION_MAJOR)
+// MinGW defined _CRITICAL_SECTION and _RTL_CRITICAL_SECTION as two
+// separate (equivalent) structs, instead of using typedef
+typedef struct _CRITICAL_SECTION GTEST_CRITICAL_SECTION;
+#else
+// Assume CRITICAL_SECTION is a typedef of _RTL_CRITICAL_SECTION.
+// This assumption is verified by
+// WindowsTypesTest.CRITICAL_SECTIONIs_RTL_CRITICAL_SECTION.
+typedef struct _RTL_CRITICAL_SECTION GTEST_CRITICAL_SECTION;
+#endif
+#elif GTEST_OS_XTENSA
+#include <unistd.h>
+// Xtensa toolchains define strcasecmp in the string.h header instead of
+// strings.h. string.h is already included.
+#else
+// This assumes that non-Windows OSes provide unistd.h. For OSes where this
+// is not the case, we need to include headers that provide the functions
+// mentioned above.
+#include <strings.h>
+#include <unistd.h>
+#endif // GTEST_OS_WINDOWS
+
+#if GTEST_OS_LINUX_ANDROID
+// Used to define __ANDROID_API__ matching the target NDK API level.
+#include <android/api-level.h> // NOLINT
+#endif
+
+// Defines this to true if and only if Google Test can use POSIX regular
+// expressions.
+#ifndef GTEST_HAS_POSIX_RE
+#if GTEST_OS_LINUX_ANDROID
+// On Android, <regex.h> is only available starting with Gingerbread.
+#define GTEST_HAS_POSIX_RE (__ANDROID_API__ >= 9)
+#else
+#define GTEST_HAS_POSIX_RE (!GTEST_OS_WINDOWS && !GTEST_OS_XTENSA)
+#endif
+#endif
+
+// Select the regular expression implementation.
+#if GTEST_HAS_ABSL
+// When using Abseil, RE2 is required.
+#include "absl/strings/string_view.h"
+#include "re2/re2.h"
+#define GTEST_USES_RE2 1
+#elif GTEST_HAS_POSIX_RE
+#include <regex.h> // NOLINT
+#define GTEST_USES_POSIX_RE 1
+#else
+// Use our own simple regex implementation.
+#define GTEST_USES_SIMPLE_RE 1
+#endif
+
+#ifndef GTEST_HAS_EXCEPTIONS
+// The user didn't tell us whether exceptions are enabled, so we need
+// to figure it out.
+#if defined(_MSC_VER) && defined(_CPPUNWIND)
+// MSVC defines _CPPUNWIND to 1 if and only if exceptions are enabled.
+#define GTEST_HAS_EXCEPTIONS 1
+#elif defined(__BORLANDC__)
+// C++Builder's implementation of the STL uses the _HAS_EXCEPTIONS
+// macro to enable exceptions, so we'll do the same.
+// Assumes that exceptions are enabled by default.
+#ifndef _HAS_EXCEPTIONS
+#define _HAS_EXCEPTIONS 1
+#endif // _HAS_EXCEPTIONS
+#define GTEST_HAS_EXCEPTIONS _HAS_EXCEPTIONS
+#elif defined(__clang__)
+// clang defines __EXCEPTIONS if and only if exceptions are enabled before clang
+// 220714, but if and only if cleanups are enabled after that. In Obj-C++ files,
+// there can be cleanups for ObjC exceptions which also need cleanups, even if
+// C++ exceptions are disabled. clang has __has_feature(cxx_exceptions) which
+// checks for C++ exceptions starting at clang r206352, but which checked for
+// cleanups prior to that. To reliably check for C++ exception availability with
+// clang, check for
+// __EXCEPTIONS && __has_feature(cxx_exceptions).
+#define GTEST_HAS_EXCEPTIONS (__EXCEPTIONS && __has_feature(cxx_exceptions))
+#elif defined(__GNUC__) && __EXCEPTIONS
+// gcc defines __EXCEPTIONS to 1 if and only if exceptions are enabled.
+#define GTEST_HAS_EXCEPTIONS 1
+#elif defined(__SUNPRO_CC)
+// Sun Pro CC supports exceptions. However, there is no compile-time way of
+// detecting whether they are enabled or not. Therefore, we assume that
+// they are enabled unless the user tells us otherwise.
+#define GTEST_HAS_EXCEPTIONS 1
+#elif defined(__IBMCPP__) && __EXCEPTIONS
+// xlC defines __EXCEPTIONS to 1 if and only if exceptions are enabled.
+#define GTEST_HAS_EXCEPTIONS 1
+#elif defined(__HP_aCC)
+// Exception handling is in effect by default in HP aCC compiler. It has to
+// be turned of by +noeh compiler option if desired.
+#define GTEST_HAS_EXCEPTIONS 1
+#else
+// For other compilers, we assume exceptions are disabled to be
+// conservative.
+#define GTEST_HAS_EXCEPTIONS 0
+#endif // defined(_MSC_VER) || defined(__BORLANDC__)
+#endif // GTEST_HAS_EXCEPTIONS
+
+#ifndef GTEST_HAS_STD_WSTRING
+// The user didn't tell us whether ::std::wstring is available, so we need
+// to figure it out.
+// Cygwin 1.7 and below doesn't support ::std::wstring.
+// Solaris' libc++ doesn't support it either. Android has
+// no support for it at least as recent as Froyo (2.2).
+#define GTEST_HAS_STD_WSTRING \
+ (!(GTEST_OS_LINUX_ANDROID || GTEST_OS_CYGWIN || GTEST_OS_SOLARIS || \
+ GTEST_OS_HAIKU || GTEST_OS_ESP32 || GTEST_OS_ESP8266 || GTEST_OS_XTENSA))
+
+#endif // GTEST_HAS_STD_WSTRING
+
+// Determines whether RTTI is available.
+#ifndef GTEST_HAS_RTTI
+// The user didn't tell us whether RTTI is enabled, so we need to
+// figure it out.
+
+#ifdef _MSC_VER
+
+#ifdef _CPPRTTI // MSVC defines this macro if and only if RTTI is enabled.
+#define GTEST_HAS_RTTI 1
+#else
+#define GTEST_HAS_RTTI 0
+#endif
+
+// Starting with version 4.3.2, gcc defines __GXX_RTTI if and only if RTTI is
+// enabled.
+#elif defined(__GNUC__)
+
+#ifdef __GXX_RTTI
+// When building against STLport with the Android NDK and with
+// -frtti -fno-exceptions, the build fails at link time with undefined
+// references to __cxa_bad_typeid. Note sure if STL or toolchain bug,
+// so disable RTTI when detected.
+#if GTEST_OS_LINUX_ANDROID && defined(_STLPORT_MAJOR) && !defined(__EXCEPTIONS)
+#define GTEST_HAS_RTTI 0
+#else
+#define GTEST_HAS_RTTI 1
+#endif // GTEST_OS_LINUX_ANDROID && __STLPORT_MAJOR && !__EXCEPTIONS
+#else
+#define GTEST_HAS_RTTI 0
+#endif // __GXX_RTTI
+
+// Clang defines __GXX_RTTI starting with version 3.0, but its manual recommends
+// using has_feature instead. has_feature(cxx_rtti) is supported since 2.7, the
+// first version with C++ support.
+#elif defined(__clang__)
+
+#define GTEST_HAS_RTTI __has_feature(cxx_rtti)
+
+// Starting with version 9.0 IBM Visual Age defines __RTTI_ALL__ to 1 if
+// both the typeid and dynamic_cast features are present.
+#elif defined(__IBMCPP__) && (__IBMCPP__ >= 900)
+
+#ifdef __RTTI_ALL__
+#define GTEST_HAS_RTTI 1
+#else
+#define GTEST_HAS_RTTI 0
+#endif
+
+#else
+
+// For all other compilers, we assume RTTI is enabled.
+#define GTEST_HAS_RTTI 1
+
+#endif // _MSC_VER
+
+#endif // GTEST_HAS_RTTI
+
+// It's this header's responsibility to #include <typeinfo> when RTTI
+// is enabled.
+#if GTEST_HAS_RTTI
+#include <typeinfo>
+#endif
+
+// Determines whether Google Test can use the pthreads library.
+#ifndef GTEST_HAS_PTHREAD
+// The user didn't tell us explicitly, so we make reasonable assumptions about
+// which platforms have pthreads support.
+//
+// To disable threading support in Google Test, add -DGTEST_HAS_PTHREAD=0
+// to your compiler flags.
+#define GTEST_HAS_PTHREAD \
+ (GTEST_OS_LINUX || GTEST_OS_MAC || GTEST_OS_HPUX || GTEST_OS_QNX || \
+ GTEST_OS_FREEBSD || GTEST_OS_NACL || GTEST_OS_NETBSD || GTEST_OS_FUCHSIA || \
+ GTEST_OS_DRAGONFLY || GTEST_OS_GNU_KFREEBSD || GTEST_OS_OPENBSD || \
+ GTEST_OS_HAIKU || GTEST_OS_GNU_HURD)
+#endif // GTEST_HAS_PTHREAD
+
+#if GTEST_HAS_PTHREAD
+// gtest-port.h guarantees to #include <pthread.h> when GTEST_HAS_PTHREAD is
+// true.
+#include <pthread.h> // NOLINT
+
+// For timespec and nanosleep, used below.
+#include <time.h> // NOLINT
+#endif
+
+// Determines whether clone(2) is supported.
+// Usually it will only be available on Linux, excluding
+// Linux on the Itanium architecture.
+// Also see http://linux.die.net/man/2/clone.
+#ifndef GTEST_HAS_CLONE
+// The user didn't tell us, so we need to figure it out.
+
+#if GTEST_OS_LINUX && !defined(__ia64__)
+#if GTEST_OS_LINUX_ANDROID
+// On Android, clone() became available at different API levels for each 32-bit
+// architecture.
+#if defined(__LP64__) || (defined(__arm__) && __ANDROID_API__ >= 9) || \
+ (defined(__mips__) && __ANDROID_API__ >= 12) || \
+ (defined(__i386__) && __ANDROID_API__ >= 17)
+#define GTEST_HAS_CLONE 1
+#else
+#define GTEST_HAS_CLONE 0
+#endif
+#else
+#define GTEST_HAS_CLONE 1
+#endif
+#else
+#define GTEST_HAS_CLONE 0
+#endif // GTEST_OS_LINUX && !defined(__ia64__)
+
+#endif // GTEST_HAS_CLONE
+
+// Determines whether to support stream redirection. This is used to test
+// output correctness and to implement death tests.
+#ifndef GTEST_HAS_STREAM_REDIRECTION
+// By default, we assume that stream redirection is supported on all
+// platforms except known mobile ones.
+#if GTEST_OS_WINDOWS_MOBILE || GTEST_OS_WINDOWS_PHONE || \
+ GTEST_OS_WINDOWS_RT || GTEST_OS_ESP8266 || GTEST_OS_XTENSA
+#define GTEST_HAS_STREAM_REDIRECTION 0
+#else
+#define GTEST_HAS_STREAM_REDIRECTION 1
+#endif // !GTEST_OS_WINDOWS_MOBILE
+#endif // GTEST_HAS_STREAM_REDIRECTION
+
+// Determines whether to support death tests.
+// pops up a dialog window that cannot be suppressed programmatically.
+#if (GTEST_OS_LINUX || GTEST_OS_CYGWIN || GTEST_OS_SOLARIS || \
+ (GTEST_OS_MAC && !GTEST_OS_IOS) || \
+ (GTEST_OS_WINDOWS_DESKTOP && _MSC_VER) || GTEST_OS_WINDOWS_MINGW || \
+ GTEST_OS_AIX || GTEST_OS_HPUX || GTEST_OS_OPENBSD || GTEST_OS_QNX || \
+ GTEST_OS_FREEBSD || GTEST_OS_NETBSD || GTEST_OS_FUCHSIA || \
+ GTEST_OS_DRAGONFLY || GTEST_OS_GNU_KFREEBSD || GTEST_OS_HAIKU || \
+ GTEST_OS_GNU_HURD)
+#define GTEST_HAS_DEATH_TEST 1
+#endif
+
+// Determines whether to support type-driven tests.
+
+// Typed tests need <typeinfo> and variadic macros, which GCC, VC++ 8.0,
+// Sun Pro CC, IBM Visual Age, and HP aCC support.
+#if defined(__GNUC__) || defined(_MSC_VER) || defined(__SUNPRO_CC) || \
+ defined(__IBMCPP__) || defined(__HP_aCC)
+#define GTEST_HAS_TYPED_TEST 1
+#define GTEST_HAS_TYPED_TEST_P 1
+#endif
+
+// Determines whether the system compiler uses UTF-16 for encoding wide strings.
+#define GTEST_WIDE_STRING_USES_UTF16_ \
+ (GTEST_OS_WINDOWS || GTEST_OS_CYGWIN || GTEST_OS_AIX || GTEST_OS_OS2)
+
+// Determines whether test results can be streamed to a socket.
+#if GTEST_OS_LINUX || GTEST_OS_GNU_KFREEBSD || GTEST_OS_DRAGONFLY || \
+ GTEST_OS_FREEBSD || GTEST_OS_NETBSD || GTEST_OS_OPENBSD || \
+ GTEST_OS_GNU_HURD
+#define GTEST_CAN_STREAM_RESULTS_ 1
+#endif
+
+// Defines some utility macros.
+
+// The GNU compiler emits a warning if nested "if" statements are followed by
+// an "else" statement and braces are not used to explicitly disambiguate the
+// "else" binding. This leads to problems with code like:
+//
+// if (gate)
+// ASSERT_*(condition) << "Some message";
+//
+// The "switch (0) case 0:" idiom is used to suppress this.
+#ifdef __INTEL_COMPILER
+#define GTEST_AMBIGUOUS_ELSE_BLOCKER_
+#else
+#define GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
+ switch (0) \
+ case 0: \
+ default: // NOLINT
+#endif
+
+// Use this annotation at the end of a struct/class definition to
+// prevent the compiler from optimizing away instances that are never
+// used. This is useful when all interesting logic happens inside the
+// c'tor and / or d'tor. Example:
+//
+// struct Foo {
+// Foo() { ... }
+// } GTEST_ATTRIBUTE_UNUSED_;
+//
+// Also use it after a variable or parameter declaration to tell the
+// compiler the variable/parameter does not have to be used.
+#if defined(__GNUC__) && !defined(COMPILER_ICC)
+#define GTEST_ATTRIBUTE_UNUSED_ __attribute__((unused))
+#elif defined(__clang__)
+#if __has_attribute(unused)
+#define GTEST_ATTRIBUTE_UNUSED_ __attribute__((unused))
+#endif
+#endif
+#ifndef GTEST_ATTRIBUTE_UNUSED_
+#define GTEST_ATTRIBUTE_UNUSED_
+#endif
+
+// Use this annotation before a function that takes a printf format string.
+#if (defined(__GNUC__) || defined(__clang__)) && !defined(COMPILER_ICC)
+#if defined(__MINGW_PRINTF_FORMAT)
+// MinGW has two different printf implementations. Ensure the format macro
+// matches the selected implementation. See
+// https://sourceforge.net/p/mingw-w64/wiki2/gnu%20printf/.
+#define GTEST_ATTRIBUTE_PRINTF_(string_index, first_to_check) \
+ __attribute__(( \
+ __format__(__MINGW_PRINTF_FORMAT, string_index, first_to_check)))
+#else
+#define GTEST_ATTRIBUTE_PRINTF_(string_index, first_to_check) \
+ __attribute__((__format__(__printf__, string_index, first_to_check)))
+#endif
+#else
+#define GTEST_ATTRIBUTE_PRINTF_(string_index, first_to_check)
+#endif
+
+// Tell the compiler to warn about unused return values for functions declared
+// with this macro. The macro should be used on function declarations
+// following the argument list:
+//
+// Sprocket* AllocateSprocket() GTEST_MUST_USE_RESULT_;
+#if defined(__GNUC__) && !defined(COMPILER_ICC)
+#define GTEST_MUST_USE_RESULT_ __attribute__((warn_unused_result))
+#else
+#define GTEST_MUST_USE_RESULT_
+#endif // __GNUC__ && !COMPILER_ICC
+
+// MS C++ compiler emits warning when a conditional expression is compile time
+// constant. In some contexts this warning is false positive and needs to be
+// suppressed. Use the following two macros in such cases:
+//
+// GTEST_INTENTIONAL_CONST_COND_PUSH_()
+// while (true) {
+// GTEST_INTENTIONAL_CONST_COND_POP_()
+// }
+#define GTEST_INTENTIONAL_CONST_COND_PUSH_() \
+ GTEST_DISABLE_MSC_WARNINGS_PUSH_(4127)
+#define GTEST_INTENTIONAL_CONST_COND_POP_() GTEST_DISABLE_MSC_WARNINGS_POP_()
+
+// Determine whether the compiler supports Microsoft's Structured Exception
+// Handling. This is supported by several Windows compilers but generally
+// does not exist on any other system.
+#ifndef GTEST_HAS_SEH
+// The user didn't tell us, so we need to figure it out.
+
+#if defined(_MSC_VER) || defined(__BORLANDC__)
+// These two compilers are known to support SEH.
+#define GTEST_HAS_SEH 1
+#else
+// Assume no SEH.
+#define GTEST_HAS_SEH 0
+#endif
+
+#endif // GTEST_HAS_SEH
+
+#ifndef GTEST_IS_THREADSAFE
+
+#define GTEST_IS_THREADSAFE \
+ (GTEST_HAS_MUTEX_AND_THREAD_LOCAL_ || \
+ (GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_PHONE && !GTEST_OS_WINDOWS_RT) || \
+ GTEST_HAS_PTHREAD)
+
+#endif // GTEST_IS_THREADSAFE
+
+#if GTEST_IS_THREADSAFE
+// Some platforms don't support including these threading related headers.
+#include <condition_variable> // NOLINT
+#include <mutex> // NOLINT
+#endif // GTEST_IS_THREADSAFE
+
+// GTEST_API_ qualifies all symbols that must be exported. The definitions below
+// are guarded by #ifndef to give embedders a chance to define GTEST_API_ in
+// gtest/internal/custom/gtest-port.h
+#ifndef GTEST_API_
+
+#ifdef _MSC_VER
+#if GTEST_LINKED_AS_SHARED_LIBRARY
+#define GTEST_API_ __declspec(dllimport)
+#elif GTEST_CREATE_SHARED_LIBRARY
+#define GTEST_API_ __declspec(dllexport)
+#endif
+#elif __GNUC__ >= 4 || defined(__clang__)
+#define GTEST_API_ __attribute__((visibility("default")))
+#endif // _MSC_VER
+
+#endif // GTEST_API_
+
+#ifndef GTEST_API_
+#define GTEST_API_
+#endif // GTEST_API_
+
+#ifndef GTEST_DEFAULT_DEATH_TEST_STYLE
+#define GTEST_DEFAULT_DEATH_TEST_STYLE "fast"
+#endif // GTEST_DEFAULT_DEATH_TEST_STYLE
+
+#ifdef __GNUC__
+// Ask the compiler to never inline a given function.
+#define GTEST_NO_INLINE_ __attribute__((noinline))
+#else
+#define GTEST_NO_INLINE_
+#endif
+
+#if defined(__clang__)
+// Nested ifs to avoid triggering MSVC warning.
+#if __has_attribute(disable_tail_calls)
+// Ask the compiler not to perform tail call optimization inside
+// the marked function.
+#define GTEST_NO_TAIL_CALL_ __attribute__((disable_tail_calls))
+#endif
+#elif __GNUC__
+#define GTEST_NO_TAIL_CALL_ \
+ __attribute__((optimize("no-optimize-sibling-calls")))
+#else
+#define GTEST_NO_TAIL_CALL_
+#endif
+
+// _LIBCPP_VERSION is defined by the libc++ library from the LLVM project.
+#if !defined(GTEST_HAS_CXXABI_H_)
+#if defined(__GLIBCXX__) || (defined(_LIBCPP_VERSION) && !defined(_MSC_VER))
+#define GTEST_HAS_CXXABI_H_ 1
+#else
+#define GTEST_HAS_CXXABI_H_ 0
+#endif
+#endif
+
+// A function level attribute to disable checking for use of uninitialized
+// memory when built with MemorySanitizer.
+#if defined(__clang__)
+#if __has_feature(memory_sanitizer)
+#define GTEST_ATTRIBUTE_NO_SANITIZE_MEMORY_ __attribute__((no_sanitize_memory))
+#else
+#define GTEST_ATTRIBUTE_NO_SANITIZE_MEMORY_
+#endif // __has_feature(memory_sanitizer)
+#else
+#define GTEST_ATTRIBUTE_NO_SANITIZE_MEMORY_
+#endif // __clang__
+
+// A function level attribute to disable AddressSanitizer instrumentation.
+#if defined(__clang__)
+#if __has_feature(address_sanitizer)
+#define GTEST_ATTRIBUTE_NO_SANITIZE_ADDRESS_ \
+ __attribute__((no_sanitize_address))
+#else
+#define GTEST_ATTRIBUTE_NO_SANITIZE_ADDRESS_
+#endif // __has_feature(address_sanitizer)
+#else
+#define GTEST_ATTRIBUTE_NO_SANITIZE_ADDRESS_
+#endif // __clang__
+
+// A function level attribute to disable HWAddressSanitizer instrumentation.
+#if defined(__clang__)
+#if __has_feature(hwaddress_sanitizer)
+#define GTEST_ATTRIBUTE_NO_SANITIZE_HWADDRESS_ \
+ __attribute__((no_sanitize("hwaddress")))
+#else
+#define GTEST_ATTRIBUTE_NO_SANITIZE_HWADDRESS_
+#endif // __has_feature(hwaddress_sanitizer)
+#else
+#define GTEST_ATTRIBUTE_NO_SANITIZE_HWADDRESS_
+#endif // __clang__
+
+// A function level attribute to disable ThreadSanitizer instrumentation.
+#if defined(__clang__)
+#if __has_feature(thread_sanitizer)
+#define GTEST_ATTRIBUTE_NO_SANITIZE_THREAD_ __attribute__((no_sanitize_thread))
+#else
+#define GTEST_ATTRIBUTE_NO_SANITIZE_THREAD_
+#endif // __has_feature(thread_sanitizer)
+#else
+#define GTEST_ATTRIBUTE_NO_SANITIZE_THREAD_
+#endif // __clang__
+
+namespace testing {
+
+class Message;
+
+// Legacy imports for backwards compatibility.
+// New code should use std:: names directly.
+using std::get;
+using std::make_tuple;
+using std::tuple;
+using std::tuple_element;
+using std::tuple_size;
+
+namespace internal {
+
+// A secret type that Google Test users don't know about. It has no
+// definition on purpose. Therefore it's impossible to create a
+// Secret object, which is what we want.
+class Secret;
+
+// A helper for suppressing warnings on constant condition. It just
+// returns 'condition'.
+GTEST_API_ bool IsTrue(bool condition);
+
+// Defines RE.
+
+#if GTEST_USES_RE2
+
+// This is almost `using RE = ::RE2`, except it is copy-constructible, and it
+// needs to disambiguate the `std::string`, `absl::string_view`, and `const
+// char*` constructors.
+class GTEST_API_ RE {
+ public:
+ RE(absl::string_view regex) : regex_(regex) {} // NOLINT
+ RE(const char* regex) : RE(absl::string_view(regex)) {} // NOLINT
+ RE(const std::string& regex) : RE(absl::string_view(regex)) {} // NOLINT
+ RE(const RE& other) : RE(other.pattern()) {}
+
+ const std::string& pattern() const { return regex_.pattern(); }
+
+ static bool FullMatch(absl::string_view str, const RE& re) {
+ return RE2::FullMatch(str, re.regex_);
+ }
+ static bool PartialMatch(absl::string_view str, const RE& re) {
+ return RE2::PartialMatch(str, re.regex_);
+ }
+
+ private:
+ RE2 regex_;
+};
+
+#elif GTEST_USES_POSIX_RE || GTEST_USES_SIMPLE_RE
+
+// A simple C++ wrapper for <regex.h>. It uses the POSIX Extended
+// Regular Expression syntax.
+class GTEST_API_ RE {
+ public:
+ // A copy constructor is required by the Standard to initialize object
+ // references from r-values.
+ RE(const RE& other) { Init(other.pattern()); }
+
+ // Constructs an RE from a string.
+ RE(const ::std::string& regex) { Init(regex.c_str()); } // NOLINT
+
+ RE(const char* regex) { Init(regex); } // NOLINT
+ ~RE();
+
+ // Returns the string representation of the regex.
+ const char* pattern() const { return pattern_; }
+
+ // FullMatch(str, re) returns true if and only if regular expression re
+ // matches the entire str.
+ // PartialMatch(str, re) returns true if and only if regular expression re
+ // matches a substring of str (including str itself).
+ static bool FullMatch(const ::std::string& str, const RE& re) {
+ return FullMatch(str.c_str(), re);
+ }
+ static bool PartialMatch(const ::std::string& str, const RE& re) {
+ return PartialMatch(str.c_str(), re);
+ }
+
+ static bool FullMatch(const char* str, const RE& re);
+ static bool PartialMatch(const char* str, const RE& re);
+
+ private:
+ void Init(const char* regex);
+ const char* pattern_;
+ bool is_valid_;
+
+#if GTEST_USES_POSIX_RE
+
+ regex_t full_regex_; // For FullMatch().
+ regex_t partial_regex_; // For PartialMatch().
+
+#else // GTEST_USES_SIMPLE_RE
+
+ const char* full_pattern_; // For FullMatch();
+
+#endif
+};
+
+#endif // ::testing::internal::RE implementation
+
+// Formats a source file path and a line number as they would appear
+// in an error message from the compiler used to compile this code.
+GTEST_API_ ::std::string FormatFileLocation(const char* file, int line);
+
+// Formats a file location for compiler-independent XML output.
+// Although this function is not platform dependent, we put it next to
+// FormatFileLocation in order to contrast the two functions.
+GTEST_API_ ::std::string FormatCompilerIndependentFileLocation(const char* file,
+ int line);
+
+// Defines logging utilities:
+// GTEST_LOG_(severity) - logs messages at the specified severity level. The
+// message itself is streamed into the macro.
+// LogToStderr() - directs all log messages to stderr.
+// FlushInfoLog() - flushes informational log messages.
+
+enum GTestLogSeverity { GTEST_INFO, GTEST_WARNING, GTEST_ERROR, GTEST_FATAL };
+
+// Formats log entry severity, provides a stream object for streaming the
+// log message, and terminates the message with a newline when going out of
+// scope.
+class GTEST_API_ GTestLog {
+ public:
+ GTestLog(GTestLogSeverity severity, const char* file, int line);
+
+ // Flushes the buffers and, if severity is GTEST_FATAL, aborts the program.
+ ~GTestLog();
+
+ ::std::ostream& GetStream() { return ::std::cerr; }
+
+ private:
+ const GTestLogSeverity severity_;
+
+ GTestLog(const GTestLog&) = delete;
+ GTestLog& operator=(const GTestLog&) = delete;
+};
+
+#if !defined(GTEST_LOG_)
+
+#define GTEST_LOG_(severity) \
+ ::testing::internal::GTestLog(::testing::internal::GTEST_##severity, \
+ __FILE__, __LINE__) \
+ .GetStream()
+
+inline void LogToStderr() {}
+inline void FlushInfoLog() { fflush(nullptr); }
+
+#endif // !defined(GTEST_LOG_)
+
+#if !defined(GTEST_CHECK_)
+// INTERNAL IMPLEMENTATION - DO NOT USE.
+//
+// GTEST_CHECK_ is an all-mode assert. It aborts the program if the condition
+// is not satisfied.
+// Synopsis:
+// GTEST_CHECK_(boolean_condition);
+// or
+// GTEST_CHECK_(boolean_condition) << "Additional message";
+//
+// This checks the condition and if the condition is not satisfied
+// it prints message about the condition violation, including the
+// condition itself, plus additional message streamed into it, if any,
+// and then it aborts the program. It aborts the program irrespective of
+// whether it is built in the debug mode or not.
+#define GTEST_CHECK_(condition) \
+ GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
+ if (::testing::internal::IsTrue(condition)) \
+ ; \
+ else \
+ GTEST_LOG_(FATAL) << "Condition " #condition " failed. "
+#endif // !defined(GTEST_CHECK_)
+
+// An all-mode assert to verify that the given POSIX-style function
+// call returns 0 (indicating success). Known limitation: this
+// doesn't expand to a balanced 'if' statement, so enclose the macro
+// in {} if you need to use it as the only statement in an 'if'
+// branch.
+#define GTEST_CHECK_POSIX_SUCCESS_(posix_call) \
+ if (const int gtest_error = (posix_call)) \
+ GTEST_LOG_(FATAL) << #posix_call << "failed with error " << gtest_error
+
+// Transforms "T" into "const T&" according to standard reference collapsing
+// rules (this is only needed as a backport for C++98 compilers that do not
+// support reference collapsing). Specifically, it transforms:
+//
+// char ==> const char&
+// const char ==> const char&
+// char& ==> char&
+// const char& ==> const char&
+//
+// Note that the non-const reference will not have "const" added. This is
+// standard, and necessary so that "T" can always bind to "const T&".
+template <typename T>
+struct ConstRef {
+ typedef const T& type;
+};
+template <typename T>
+struct ConstRef<T&> {
+ typedef T& type;
+};
+
+// The argument T must depend on some template parameters.
+#define GTEST_REFERENCE_TO_CONST_(T) \
+ typename ::testing::internal::ConstRef<T>::type
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// Use ImplicitCast_ as a safe version of static_cast for upcasting in
+// the type hierarchy (e.g. casting a Foo* to a SuperclassOfFoo* or a
+// const Foo*). When you use ImplicitCast_, the compiler checks that
+// the cast is safe. Such explicit ImplicitCast_s are necessary in
+// surprisingly many situations where C++ demands an exact type match
+// instead of an argument type convertible to a target type.
+//
+// The syntax for using ImplicitCast_ is the same as for static_cast:
+//
+// ImplicitCast_<ToType>(expr)
+//
+// ImplicitCast_ would have been part of the C++ standard library,
+// but the proposal was submitted too late. It will probably make
+// its way into the language in the future.
+//
+// This relatively ugly name is intentional. It prevents clashes with
+// similar functions users may have (e.g., implicit_cast). The internal
+// namespace alone is not enough because the function can be found by ADL.
+template <typename To>
+inline To ImplicitCast_(To x) {
+ return x;
+}
+
+// When you upcast (that is, cast a pointer from type Foo to type
+// SuperclassOfFoo), it's fine to use ImplicitCast_<>, since upcasts
+// always succeed. When you downcast (that is, cast a pointer from
+// type Foo to type SubclassOfFoo), static_cast<> isn't safe, because
+// how do you know the pointer is really of type SubclassOfFoo? It
+// could be a bare Foo, or of type DifferentSubclassOfFoo. Thus,
+// when you downcast, you should use this macro. In debug mode, we
+// use dynamic_cast<> to double-check the downcast is legal (we die
+// if it's not). In normal mode, we do the efficient static_cast<>
+// instead. Thus, it's important to test in debug mode to make sure
+// the cast is legal!
+// This is the only place in the code we should use dynamic_cast<>.
+// In particular, you SHOULDN'T be using dynamic_cast<> in order to
+// do RTTI (eg code like this:
+// if (dynamic_cast<Subclass1>(foo)) HandleASubclass1Object(foo);
+// if (dynamic_cast<Subclass2>(foo)) HandleASubclass2Object(foo);
+// You should design the code some other way not to need this.
+//
+// This relatively ugly name is intentional. It prevents clashes with
+// similar functions users may have (e.g., down_cast). The internal
+// namespace alone is not enough because the function can be found by ADL.
+template <typename To, typename From> // use like this: DownCast_<T*>(foo);
+inline To DownCast_(From* f) { // so we only accept pointers
+ // Ensures that To is a sub-type of From *. This test is here only
+ // for compile-time type checking, and has no overhead in an
+ // optimized build at run-time, as it will be optimized away
+ // completely.
+ GTEST_INTENTIONAL_CONST_COND_PUSH_()
+ if (false) {
+ GTEST_INTENTIONAL_CONST_COND_POP_()
+ const To to = nullptr;
+ ::testing::internal::ImplicitCast_<From*>(to);
+ }
+
+#if GTEST_HAS_RTTI
+ // RTTI: debug mode only!
+ GTEST_CHECK_(f == nullptr || dynamic_cast<To>(f) != nullptr);
+#endif
+ return static_cast<To>(f);
+}
+
+// Downcasts the pointer of type Base to Derived.
+// Derived must be a subclass of Base. The parameter MUST
+// point to a class of type Derived, not any subclass of it.
+// When RTTI is available, the function performs a runtime
+// check to enforce this.
+template <class Derived, class Base>
+Derived* CheckedDowncastToActualType(Base* base) {
+#if GTEST_HAS_RTTI
+ GTEST_CHECK_(typeid(*base) == typeid(Derived));
+#endif
+
+#if GTEST_HAS_DOWNCAST_
+ return ::down_cast<Derived*>(base);
+#elif GTEST_HAS_RTTI
+ return dynamic_cast<Derived*>(base); // NOLINT
+#else
+ return static_cast<Derived*>(base); // Poor man's downcast.
+#endif
+}
+
+#if GTEST_HAS_STREAM_REDIRECTION
+
+// Defines the stderr capturer:
+// CaptureStdout - starts capturing stdout.
+// GetCapturedStdout - stops capturing stdout and returns the captured string.
+// CaptureStderr - starts capturing stderr.
+// GetCapturedStderr - stops capturing stderr and returns the captured string.
+//
+GTEST_API_ void CaptureStdout();
+GTEST_API_ std::string GetCapturedStdout();
+GTEST_API_ void CaptureStderr();
+GTEST_API_ std::string GetCapturedStderr();
+
+#endif // GTEST_HAS_STREAM_REDIRECTION
+// Returns the size (in bytes) of a file.
+GTEST_API_ size_t GetFileSize(FILE* file);
+
+// Reads the entire content of a file as a string.
+GTEST_API_ std::string ReadEntireFile(FILE* file);
+
+// All command line arguments.
+GTEST_API_ std::vector<std::string> GetArgvs();
+
+#if GTEST_HAS_DEATH_TEST
+
+std::vector<std::string> GetInjectableArgvs();
+// Deprecated: pass the args vector by value instead.
+void SetInjectableArgvs(const std::vector<std::string>* new_argvs);
+void SetInjectableArgvs(const std::vector<std::string>& new_argvs);
+void ClearInjectableArgvs();
+
+#endif // GTEST_HAS_DEATH_TEST
+
+// Defines synchronization primitives.
+#if GTEST_IS_THREADSAFE
+
+#if GTEST_OS_WINDOWS
+// Provides leak-safe Windows kernel handle ownership.
+// Used in death tests and in threading support.
+class GTEST_API_ AutoHandle {
+ public:
+ // Assume that Win32 HANDLE type is equivalent to void*. Doing so allows us to
+ // avoid including <windows.h> in this header file. Including <windows.h> is
+ // undesirable because it defines a lot of symbols and macros that tend to
+ // conflict with client code. This assumption is verified by
+ // WindowsTypesTest.HANDLEIsVoidStar.
+ typedef void* Handle;
+ AutoHandle();
+ explicit AutoHandle(Handle handle);
+
+ ~AutoHandle();
+
+ Handle Get() const;
+ void Reset();
+ void Reset(Handle handle);
+
+ private:
+ // Returns true if and only if the handle is a valid handle object that can be
+ // closed.
+ bool IsCloseable() const;
+
+ Handle handle_;
+
+ AutoHandle(const AutoHandle&) = delete;
+ AutoHandle& operator=(const AutoHandle&) = delete;
+};
+#endif
+
+#if GTEST_HAS_NOTIFICATION_
+// Notification has already been imported into the namespace.
+// Nothing to do here.
+
+#else
+GTEST_DISABLE_MSC_WARNINGS_PUSH_(4251 \
+/* class A needs to have dll-interface to be used by clients of class B */)
+
+// Allows a controller thread to pause execution of newly created
+// threads until notified. Instances of this class must be created
+// and destroyed in the controller thread.
+//
+// This class is only for testing Google Test's own constructs. Do not
+// use it in user tests, either directly or indirectly.
+// TODO(b/203539622): Replace unconditionally with absl::Notification.
+class GTEST_API_ Notification {
+ public:
+ Notification() : notified_(false) {}
+ Notification(const Notification&) = delete;
+ Notification& operator=(const Notification&) = delete;
+
+ // Notifies all threads created with this notification to start. Must
+ // be called from the controller thread.
+ void Notify() {
+ std::lock_guard<std::mutex> lock(mu_);
+ notified_ = true;
+ cv_.notify_all();
+ }
+
+ // Blocks until the controller thread notifies. Must be called from a test
+ // thread.
+ void WaitForNotification() {
+ std::unique_lock<std::mutex> lock(mu_);
+ cv_.wait(lock, [this]() { return notified_; });
+ }
+
+ private:
+ std::mutex mu_;
+ std::condition_variable cv_;
+ bool notified_;
+};
+GTEST_DISABLE_MSC_WARNINGS_POP_() // 4251
+#endif // GTEST_HAS_NOTIFICATION_
+
+// On MinGW, we can have both GTEST_OS_WINDOWS and GTEST_HAS_PTHREAD
+// defined, but we don't want to use MinGW's pthreads implementation, which
+// has conformance problems with some versions of the POSIX standard.
+#if GTEST_HAS_PTHREAD && !GTEST_OS_WINDOWS_MINGW
+
+// As a C-function, ThreadFuncWithCLinkage cannot be templated itself.
+// Consequently, it cannot select a correct instantiation of ThreadWithParam
+// in order to call its Run(). Introducing ThreadWithParamBase as a
+// non-templated base class for ThreadWithParam allows us to bypass this
+// problem.
+class ThreadWithParamBase {
+ public:
+ virtual ~ThreadWithParamBase() {}
+ virtual void Run() = 0;
+};
+
+// pthread_create() accepts a pointer to a function type with the C linkage.
+// According to the Standard (7.5/1), function types with different linkages
+// are different even if they are otherwise identical. Some compilers (for
+// example, SunStudio) treat them as different types. Since class methods
+// cannot be defined with C-linkage we need to define a free C-function to
+// pass into pthread_create().
+extern "C" inline void* ThreadFuncWithCLinkage(void* thread) {
+ static_cast<ThreadWithParamBase*>(thread)->Run();
+ return nullptr;
+}
+
+// Helper class for testing Google Test's multi-threading constructs.
+// To use it, write:
+//
+// void ThreadFunc(int param) { /* Do things with param */ }
+// Notification thread_can_start;
+// ...
+// // The thread_can_start parameter is optional; you can supply NULL.
+// ThreadWithParam<int> thread(&ThreadFunc, 5, &thread_can_start);
+// thread_can_start.Notify();
+//
+// These classes are only for testing Google Test's own constructs. Do
+// not use them in user tests, either directly or indirectly.
+template <typename T>
+class ThreadWithParam : public ThreadWithParamBase {
+ public:
+ typedef void UserThreadFunc(T);
+
+ ThreadWithParam(UserThreadFunc* func, T param, Notification* thread_can_start)
+ : func_(func),
+ param_(param),
+ thread_can_start_(thread_can_start),
+ finished_(false) {
+ ThreadWithParamBase* const base = this;
+ // The thread can be created only after all fields except thread_
+ // have been initialized.
+ GTEST_CHECK_POSIX_SUCCESS_(
+ pthread_create(&thread_, nullptr, &ThreadFuncWithCLinkage, base));
+ }
+ ~ThreadWithParam() override { Join(); }
+
+ void Join() {
+ if (!finished_) {
+ GTEST_CHECK_POSIX_SUCCESS_(pthread_join(thread_, nullptr));
+ finished_ = true;
+ }
+ }
+
+ void Run() override {
+ if (thread_can_start_ != nullptr) thread_can_start_->WaitForNotification();
+ func_(param_);
+ }
+
+ private:
+ UserThreadFunc* const func_; // User-supplied thread function.
+ const T param_; // User-supplied parameter to the thread function.
+ // When non-NULL, used to block execution until the controller thread
+ // notifies.
+ Notification* const thread_can_start_;
+ bool finished_; // true if and only if we know that the thread function has
+ // finished.
+ pthread_t thread_; // The native thread object.
+
+ ThreadWithParam(const ThreadWithParam&) = delete;
+ ThreadWithParam& operator=(const ThreadWithParam&) = delete;
+};
+#endif // !GTEST_OS_WINDOWS && GTEST_HAS_PTHREAD ||
+ // GTEST_HAS_MUTEX_AND_THREAD_LOCAL_
+
+#if GTEST_HAS_MUTEX_AND_THREAD_LOCAL_
+// Mutex and ThreadLocal have already been imported into the namespace.
+// Nothing to do here.
+
+#elif GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_PHONE && !GTEST_OS_WINDOWS_RT
+
+// Mutex implements mutex on Windows platforms. It is used in conjunction
+// with class MutexLock:
+//
+// Mutex mutex;
+// ...
+// MutexLock lock(&mutex); // Acquires the mutex and releases it at the
+// // end of the current scope.
+//
+// A static Mutex *must* be defined or declared using one of the following
+// macros:
+// GTEST_DEFINE_STATIC_MUTEX_(g_some_mutex);
+// GTEST_DECLARE_STATIC_MUTEX_(g_some_mutex);
+//
+// (A non-static Mutex is defined/declared in the usual way).
+class GTEST_API_ Mutex {
+ public:
+ enum MutexType { kStatic = 0, kDynamic = 1 };
+ // We rely on kStaticMutex being 0 as it is to what the linker initializes
+ // type_ in static mutexes. critical_section_ will be initialized lazily
+ // in ThreadSafeLazyInit().
+ enum StaticConstructorSelector { kStaticMutex = 0 };
+
+ // This constructor intentionally does nothing. It relies on type_ being
+ // statically initialized to 0 (effectively setting it to kStatic) and on
+ // ThreadSafeLazyInit() to lazily initialize the rest of the members.
+ explicit Mutex(StaticConstructorSelector /*dummy*/) {}
+
+ Mutex();
+ ~Mutex();
+
+ void Lock();
+
+ void Unlock();
+
+ // Does nothing if the current thread holds the mutex. Otherwise, crashes
+ // with high probability.
+ void AssertHeld();
+
+ private:
+ // Initializes owner_thread_id_ and critical_section_ in static mutexes.
+ void ThreadSafeLazyInit();
+
+ // Per https://blogs.msdn.microsoft.com/oldnewthing/20040223-00/?p=40503,
+ // we assume that 0 is an invalid value for thread IDs.
+ unsigned int owner_thread_id_;
+
+ // For static mutexes, we rely on these members being initialized to zeros
+ // by the linker.
+ MutexType type_;
+ long critical_section_init_phase_; // NOLINT
+ GTEST_CRITICAL_SECTION* critical_section_;
+
+ Mutex(const Mutex&) = delete;
+ Mutex& operator=(const Mutex&) = delete;
+};
+
+#define GTEST_DECLARE_STATIC_MUTEX_(mutex) \
+ extern ::testing::internal::Mutex mutex
+
+#define GTEST_DEFINE_STATIC_MUTEX_(mutex) \
+ ::testing::internal::Mutex mutex(::testing::internal::Mutex::kStaticMutex)
+
+// We cannot name this class MutexLock because the ctor declaration would
+// conflict with a macro named MutexLock, which is defined on some
+// platforms. That macro is used as a defensive measure to prevent against
+// inadvertent misuses of MutexLock like "MutexLock(&mu)" rather than
+// "MutexLock l(&mu)". Hence the typedef trick below.
+class GTestMutexLock {
+ public:
+ explicit GTestMutexLock(Mutex* mutex) : mutex_(mutex) { mutex_->Lock(); }
+
+ ~GTestMutexLock() { mutex_->Unlock(); }
+
+ private:
+ Mutex* const mutex_;
+
+ GTestMutexLock(const GTestMutexLock&) = delete;
+ GTestMutexLock& operator=(const GTestMutexLock&) = delete;
+};
+
+typedef GTestMutexLock MutexLock;
+
+// Base class for ValueHolder<T>. Allows a caller to hold and delete a value
+// without knowing its type.
+class ThreadLocalValueHolderBase {
+ public:
+ virtual ~ThreadLocalValueHolderBase() {}
+};
+
+// Provides a way for a thread to send notifications to a ThreadLocal
+// regardless of its parameter type.
+class ThreadLocalBase {
+ public:
+ // Creates a new ValueHolder<T> object holding a default value passed to
+ // this ThreadLocal<T>'s constructor and returns it. It is the caller's
+ // responsibility not to call this when the ThreadLocal<T> instance already
+ // has a value on the current thread.
+ virtual ThreadLocalValueHolderBase* NewValueForCurrentThread() const = 0;
+
+ protected:
+ ThreadLocalBase() {}
+ virtual ~ThreadLocalBase() {}
+
+ private:
+ ThreadLocalBase(const ThreadLocalBase&) = delete;
+ ThreadLocalBase& operator=(const ThreadLocalBase&) = delete;
+};
+
+// Maps a thread to a set of ThreadLocals that have values instantiated on that
+// thread and notifies them when the thread exits. A ThreadLocal instance is
+// expected to persist until all threads it has values on have terminated.
+class GTEST_API_ ThreadLocalRegistry {
+ public:
+ // Registers thread_local_instance as having value on the current thread.
+ // Returns a value that can be used to identify the thread from other threads.
+ static ThreadLocalValueHolderBase* GetValueOnCurrentThread(
+ const ThreadLocalBase* thread_local_instance);
+
+ // Invoked when a ThreadLocal instance is destroyed.
+ static void OnThreadLocalDestroyed(
+ const ThreadLocalBase* thread_local_instance);
+};
+
+class GTEST_API_ ThreadWithParamBase {
+ public:
+ void Join();
+
+ protected:
+ class Runnable {
+ public:
+ virtual ~Runnable() {}
+ virtual void Run() = 0;
+ };
+
+ ThreadWithParamBase(Runnable* runnable, Notification* thread_can_start);
+ virtual ~ThreadWithParamBase();
+
+ private:
+ AutoHandle thread_;
+};
+
+// Helper class for testing Google Test's multi-threading constructs.
+template <typename T>
+class ThreadWithParam : public ThreadWithParamBase {
+ public:
+ typedef void UserThreadFunc(T);
+
+ ThreadWithParam(UserThreadFunc* func, T param, Notification* thread_can_start)
+ : ThreadWithParamBase(new RunnableImpl(func, param), thread_can_start) {}
+ virtual ~ThreadWithParam() {}
+
+ private:
+ class RunnableImpl : public Runnable {
+ public:
+ RunnableImpl(UserThreadFunc* func, T param) : func_(func), param_(param) {}
+ virtual ~RunnableImpl() {}
+ virtual void Run() { func_(param_); }
+
+ private:
+ UserThreadFunc* const func_;
+ const T param_;
+
+ RunnableImpl(const RunnableImpl&) = delete;
+ RunnableImpl& operator=(const RunnableImpl&) = delete;
+ };
+
+ ThreadWithParam(const ThreadWithParam&) = delete;
+ ThreadWithParam& operator=(const ThreadWithParam&) = delete;
+};
+
+// Implements thread-local storage on Windows systems.
+//
+// // Thread 1
+// ThreadLocal<int> tl(100); // 100 is the default value for each thread.
+//
+// // Thread 2
+// tl.set(150); // Changes the value for thread 2 only.
+// EXPECT_EQ(150, tl.get());
+//
+// // Thread 1
+// EXPECT_EQ(100, tl.get()); // In thread 1, tl has the original value.
+// tl.set(200);
+// EXPECT_EQ(200, tl.get());
+//
+// The template type argument T must have a public copy constructor.
+// In addition, the default ThreadLocal constructor requires T to have
+// a public default constructor.
+//
+// The users of a TheadLocal instance have to make sure that all but one
+// threads (including the main one) using that instance have exited before
+// destroying it. Otherwise, the per-thread objects managed for them by the
+// ThreadLocal instance are not guaranteed to be destroyed on all platforms.
+//
+// Google Test only uses global ThreadLocal objects. That means they
+// will die after main() has returned. Therefore, no per-thread
+// object managed by Google Test will be leaked as long as all threads
+// using Google Test have exited when main() returns.
+template <typename T>
+class ThreadLocal : public ThreadLocalBase {
+ public:
+ ThreadLocal() : default_factory_(new DefaultValueHolderFactory()) {}
+ explicit ThreadLocal(const T& value)
+ : default_factory_(new InstanceValueHolderFactory(value)) {}
+
+ ~ThreadLocal() override { ThreadLocalRegistry::OnThreadLocalDestroyed(this); }
+
+ T* pointer() { return GetOrCreateValue(); }
+ const T* pointer() const { return GetOrCreateValue(); }
+ const T& get() const { return *pointer(); }
+ void set(const T& value) { *pointer() = value; }
+
+ private:
+ // Holds a value of T. Can be deleted via its base class without the caller
+ // knowing the type of T.
+ class ValueHolder : public ThreadLocalValueHolderBase {
+ public:
+ ValueHolder() : value_() {}
+ explicit ValueHolder(const T& value) : value_(value) {}
+
+ T* pointer() { return &value_; }
+
+ private:
+ T value_;
+ ValueHolder(const ValueHolder&) = delete;
+ ValueHolder& operator=(const ValueHolder&) = delete;
+ };
+
+ T* GetOrCreateValue() const {
+ return static_cast<ValueHolder*>(
+ ThreadLocalRegistry::GetValueOnCurrentThread(this))
+ ->pointer();
+ }
+
+ ThreadLocalValueHolderBase* NewValueForCurrentThread() const override {
+ return default_factory_->MakeNewHolder();
+ }
+
+ class ValueHolderFactory {
+ public:
+ ValueHolderFactory() {}
+ virtual ~ValueHolderFactory() {}
+ virtual ValueHolder* MakeNewHolder() const = 0;
+
+ private:
+ ValueHolderFactory(const ValueHolderFactory&) = delete;
+ ValueHolderFactory& operator=(const ValueHolderFactory&) = delete;
+ };
+
+ class DefaultValueHolderFactory : public ValueHolderFactory {
+ public:
+ DefaultValueHolderFactory() {}
+ ValueHolder* MakeNewHolder() const override { return new ValueHolder(); }
+
+ private:
+ DefaultValueHolderFactory(const DefaultValueHolderFactory&) = delete;
+ DefaultValueHolderFactory& operator=(const DefaultValueHolderFactory&) =
+ delete;
+ };
+
+ class InstanceValueHolderFactory : public ValueHolderFactory {
+ public:
+ explicit InstanceValueHolderFactory(const T& value) : value_(value) {}
+ ValueHolder* MakeNewHolder() const override {
+ return new ValueHolder(value_);
+ }
+
+ private:
+ const T value_; // The value for each thread.
+
+ InstanceValueHolderFactory(const InstanceValueHolderFactory&) = delete;
+ InstanceValueHolderFactory& operator=(const InstanceValueHolderFactory&) =
+ delete;
+ };
+
+ std::unique_ptr<ValueHolderFactory> default_factory_;
+
+ ThreadLocal(const ThreadLocal&) = delete;
+ ThreadLocal& operator=(const ThreadLocal&) = delete;
+};
+
+#elif GTEST_HAS_PTHREAD
+
+// MutexBase and Mutex implement mutex on pthreads-based platforms.
+class MutexBase {
+ public:
+ // Acquires this mutex.
+ void Lock() {
+ GTEST_CHECK_POSIX_SUCCESS_(pthread_mutex_lock(&mutex_));
+ owner_ = pthread_self();
+ has_owner_ = true;
+ }
+
+ // Releases this mutex.
+ void Unlock() {
+ // Since the lock is being released the owner_ field should no longer be
+ // considered valid. We don't protect writing to has_owner_ here, as it's
+ // the caller's responsibility to ensure that the current thread holds the
+ // mutex when this is called.
+ has_owner_ = false;
+ GTEST_CHECK_POSIX_SUCCESS_(pthread_mutex_unlock(&mutex_));
+ }
+
+ // Does nothing if the current thread holds the mutex. Otherwise, crashes
+ // with high probability.
+ void AssertHeld() const {
+ GTEST_CHECK_(has_owner_ && pthread_equal(owner_, pthread_self()))
+ << "The current thread is not holding the mutex @" << this;
+ }
+
+ // A static mutex may be used before main() is entered. It may even
+ // be used before the dynamic initialization stage. Therefore we
+ // must be able to initialize a static mutex object at link time.
+ // This means MutexBase has to be a POD and its member variables
+ // have to be public.
+ public:
+ pthread_mutex_t mutex_; // The underlying pthread mutex.
+ // has_owner_ indicates whether the owner_ field below contains a valid thread
+ // ID and is therefore safe to inspect (e.g., to use in pthread_equal()). All
+ // accesses to the owner_ field should be protected by a check of this field.
+ // An alternative might be to memset() owner_ to all zeros, but there's no
+ // guarantee that a zero'd pthread_t is necessarily invalid or even different
+ // from pthread_self().
+ bool has_owner_;
+ pthread_t owner_; // The thread holding the mutex.
+};
+
+// Forward-declares a static mutex.
+#define GTEST_DECLARE_STATIC_MUTEX_(mutex) \
+ extern ::testing::internal::MutexBase mutex
+
+// Defines and statically (i.e. at link time) initializes a static mutex.
+// The initialization list here does not explicitly initialize each field,
+// instead relying on default initialization for the unspecified fields. In
+// particular, the owner_ field (a pthread_t) is not explicitly initialized.
+// This allows initialization to work whether pthread_t is a scalar or struct.
+// The flag -Wmissing-field-initializers must not be specified for this to work.
+#define GTEST_DEFINE_STATIC_MUTEX_(mutex) \
+ ::testing::internal::MutexBase mutex = {PTHREAD_MUTEX_INITIALIZER, false, 0}
+
+// The Mutex class can only be used for mutexes created at runtime. It
+// shares its API with MutexBase otherwise.
+class Mutex : public MutexBase {
+ public:
+ Mutex() {
+ GTEST_CHECK_POSIX_SUCCESS_(pthread_mutex_init(&mutex_, nullptr));
+ has_owner_ = false;
+ }
+ ~Mutex() { GTEST_CHECK_POSIX_SUCCESS_(pthread_mutex_destroy(&mutex_)); }
+
+ private:
+ Mutex(const Mutex&) = delete;
+ Mutex& operator=(const Mutex&) = delete;
+};
+
+// We cannot name this class MutexLock because the ctor declaration would
+// conflict with a macro named MutexLock, which is defined on some
+// platforms. That macro is used as a defensive measure to prevent against
+// inadvertent misuses of MutexLock like "MutexLock(&mu)" rather than
+// "MutexLock l(&mu)". Hence the typedef trick below.
+class GTestMutexLock {
+ public:
+ explicit GTestMutexLock(MutexBase* mutex) : mutex_(mutex) { mutex_->Lock(); }
+
+ ~GTestMutexLock() { mutex_->Unlock(); }
+
+ private:
+ MutexBase* const mutex_;
+
+ GTestMutexLock(const GTestMutexLock&) = delete;
+ GTestMutexLock& operator=(const GTestMutexLock&) = delete;
+};
+
+typedef GTestMutexLock MutexLock;
+
+// Helpers for ThreadLocal.
+
+// pthread_key_create() requires DeleteThreadLocalValue() to have
+// C-linkage. Therefore it cannot be templatized to access
+// ThreadLocal<T>. Hence the need for class
+// ThreadLocalValueHolderBase.
+class ThreadLocalValueHolderBase {
+ public:
+ virtual ~ThreadLocalValueHolderBase() {}
+};
+
+// Called by pthread to delete thread-local data stored by
+// pthread_setspecific().
+extern "C" inline void DeleteThreadLocalValue(void* value_holder) {
+ delete static_cast<ThreadLocalValueHolderBase*>(value_holder);
+}
+
+// Implements thread-local storage on pthreads-based systems.
+template <typename T>
+class GTEST_API_ ThreadLocal {
+ public:
+ ThreadLocal()
+ : key_(CreateKey()), default_factory_(new DefaultValueHolderFactory()) {}
+ explicit ThreadLocal(const T& value)
+ : key_(CreateKey()),
+ default_factory_(new InstanceValueHolderFactory(value)) {}
+
+ ~ThreadLocal() {
+ // Destroys the managed object for the current thread, if any.
+ DeleteThreadLocalValue(pthread_getspecific(key_));
+
+ // Releases resources associated with the key. This will *not*
+ // delete managed objects for other threads.
+ GTEST_CHECK_POSIX_SUCCESS_(pthread_key_delete(key_));
+ }
+
+ T* pointer() { return GetOrCreateValue(); }
+ const T* pointer() const { return GetOrCreateValue(); }
+ const T& get() const { return *pointer(); }
+ void set(const T& value) { *pointer() = value; }
+
+ private:
+ // Holds a value of type T.
+ class ValueHolder : public ThreadLocalValueHolderBase {
+ public:
+ ValueHolder() : value_() {}
+ explicit ValueHolder(const T& value) : value_(value) {}
+
+ T* pointer() { return &value_; }
+
+ private:
+ T value_;
+ ValueHolder(const ValueHolder&) = delete;
+ ValueHolder& operator=(const ValueHolder&) = delete;
+ };
+
+ static pthread_key_t CreateKey() {
+ pthread_key_t key;
+ // When a thread exits, DeleteThreadLocalValue() will be called on
+ // the object managed for that thread.
+ GTEST_CHECK_POSIX_SUCCESS_(
+ pthread_key_create(&key, &DeleteThreadLocalValue));
+ return key;
+ }
+
+ T* GetOrCreateValue() const {
+ ThreadLocalValueHolderBase* const holder =
+ static_cast<ThreadLocalValueHolderBase*>(pthread_getspecific(key_));
+ if (holder != nullptr) {
+ return CheckedDowncastToActualType<ValueHolder>(holder)->pointer();
+ }
+
+ ValueHolder* const new_holder = default_factory_->MakeNewHolder();
+ ThreadLocalValueHolderBase* const holder_base = new_holder;
+ GTEST_CHECK_POSIX_SUCCESS_(pthread_setspecific(key_, holder_base));
+ return new_holder->pointer();
+ }
+
+ class ValueHolderFactory {
+ public:
+ ValueHolderFactory() {}
+ virtual ~ValueHolderFactory() {}
+ virtual ValueHolder* MakeNewHolder() const = 0;
+
+ private:
+ ValueHolderFactory(const ValueHolderFactory&) = delete;
+ ValueHolderFactory& operator=(const ValueHolderFactory&) = delete;
+ };
+
+ class DefaultValueHolderFactory : public ValueHolderFactory {
+ public:
+ DefaultValueHolderFactory() {}
+ ValueHolder* MakeNewHolder() const override { return new ValueHolder(); }
+
+ private:
+ DefaultValueHolderFactory(const DefaultValueHolderFactory&) = delete;
+ DefaultValueHolderFactory& operator=(const DefaultValueHolderFactory&) =
+ delete;
+ };
+
+ class InstanceValueHolderFactory : public ValueHolderFactory {
+ public:
+ explicit InstanceValueHolderFactory(const T& value) : value_(value) {}
+ ValueHolder* MakeNewHolder() const override {
+ return new ValueHolder(value_);
+ }
+
+ private:
+ const T value_; // The value for each thread.
+
+ InstanceValueHolderFactory(const InstanceValueHolderFactory&) = delete;
+ InstanceValueHolderFactory& operator=(const InstanceValueHolderFactory&) =
+ delete;
+ };
+
+ // A key pthreads uses for looking up per-thread values.
+ const pthread_key_t key_;
+ std::unique_ptr<ValueHolderFactory> default_factory_;
+
+ ThreadLocal(const ThreadLocal&) = delete;
+ ThreadLocal& operator=(const ThreadLocal&) = delete;
+};
+
+#endif // GTEST_HAS_MUTEX_AND_THREAD_LOCAL_
+
+#else // GTEST_IS_THREADSAFE
+
+// A dummy implementation of synchronization primitives (mutex, lock,
+// and thread-local variable). Necessary for compiling Google Test where
+// mutex is not supported - using Google Test in multiple threads is not
+// supported on such platforms.
+
+class Mutex {
+ public:
+ Mutex() {}
+ void Lock() {}
+ void Unlock() {}
+ void AssertHeld() const {}
+};
+
+#define GTEST_DECLARE_STATIC_MUTEX_(mutex) \
+ extern ::testing::internal::Mutex mutex
+
+#define GTEST_DEFINE_STATIC_MUTEX_(mutex) ::testing::internal::Mutex mutex
+
+// We cannot name this class MutexLock because the ctor declaration would
+// conflict with a macro named MutexLock, which is defined on some
+// platforms. That macro is used as a defensive measure to prevent against
+// inadvertent misuses of MutexLock like "MutexLock(&mu)" rather than
+// "MutexLock l(&mu)". Hence the typedef trick below.
+class GTestMutexLock {
+ public:
+ explicit GTestMutexLock(Mutex*) {} // NOLINT
+};
+
+typedef GTestMutexLock MutexLock;
+
+template <typename T>
+class GTEST_API_ ThreadLocal {
+ public:
+ ThreadLocal() : value_() {}
+ explicit ThreadLocal(const T& value) : value_(value) {}
+ T* pointer() { return &value_; }
+ const T* pointer() const { return &value_; }
+ const T& get() const { return value_; }
+ void set(const T& value) { value_ = value; }
+
+ private:
+ T value_;
+};
+
+#endif // GTEST_IS_THREADSAFE
+
+// Returns the number of threads running in the process, or 0 to indicate that
+// we cannot detect it.
+GTEST_API_ size_t GetThreadCount();
+
+#if GTEST_OS_WINDOWS
+#define GTEST_PATH_SEP_ "\\"
+#define GTEST_HAS_ALT_PATH_SEP_ 1
+#else
+#define GTEST_PATH_SEP_ "/"
+#define GTEST_HAS_ALT_PATH_SEP_ 0
+#endif // GTEST_OS_WINDOWS
+
+// Utilities for char.
+
+// isspace(int ch) and friends accept an unsigned char or EOF. char
+// may be signed, depending on the compiler (or compiler flags).
+// Therefore we need to cast a char to unsigned char before calling
+// isspace(), etc.
+
+inline bool IsAlpha(char ch) {
+ return isalpha(static_cast<unsigned char>(ch)) != 0;
+}
+inline bool IsAlNum(char ch) {
+ return isalnum(static_cast<unsigned char>(ch)) != 0;
+}
+inline bool IsDigit(char ch) {
+ return isdigit(static_cast<unsigned char>(ch)) != 0;
+}
+inline bool IsLower(char ch) {
+ return islower(static_cast<unsigned char>(ch)) != 0;
+}
+inline bool IsSpace(char ch) {
+ return isspace(static_cast<unsigned char>(ch)) != 0;
+}
+inline bool IsUpper(char ch) {
+ return isupper(static_cast<unsigned char>(ch)) != 0;
+}
+inline bool IsXDigit(char ch) {
+ return isxdigit(static_cast<unsigned char>(ch)) != 0;
+}
+#ifdef __cpp_char8_t
+inline bool IsXDigit(char8_t ch) {
+ return isxdigit(static_cast<unsigned char>(ch)) != 0;
+}
+#endif
+inline bool IsXDigit(char16_t ch) {
+ const unsigned char low_byte = static_cast<unsigned char>(ch);
+ return ch == low_byte && isxdigit(low_byte) != 0;
+}
+inline bool IsXDigit(char32_t ch) {
+ const unsigned char low_byte = static_cast<unsigned char>(ch);
+ return ch == low_byte && isxdigit(low_byte) != 0;
+}
+inline bool IsXDigit(wchar_t ch) {
+ const unsigned char low_byte = static_cast<unsigned char>(ch);
+ return ch == low_byte && isxdigit(low_byte) != 0;
+}
+
+inline char ToLower(char ch) {
+ return static_cast<char>(tolower(static_cast<unsigned char>(ch)));
+}
+inline char ToUpper(char ch) {
+ return static_cast<char>(toupper(static_cast<unsigned char>(ch)));
+}
+
+inline std::string StripTrailingSpaces(std::string str) {
+ std::string::iterator it = str.end();
+ while (it != str.begin() && IsSpace(*--it)) it = str.erase(it);
+ return str;
+}
+
+// The testing::internal::posix namespace holds wrappers for common
+// POSIX functions. These wrappers hide the differences between
+// Windows/MSVC and POSIX systems. Since some compilers define these
+// standard functions as macros, the wrapper cannot have the same name
+// as the wrapped function.
+
+namespace posix {
+
+// Functions with a different name on Windows.
+
+#if GTEST_OS_WINDOWS
+
+typedef struct _stat StatStruct;
+
+#ifdef __BORLANDC__
+inline int DoIsATTY(int fd) { return isatty(fd); }
+inline int StrCaseCmp(const char* s1, const char* s2) {
+ return stricmp(s1, s2);
+}
+inline char* StrDup(const char* src) { return strdup(src); }
+#else // !__BORLANDC__
+#if GTEST_OS_WINDOWS_MOBILE || GTEST_OS_ZOS || GTEST_OS_IOS || \
+ GTEST_OS_WINDOWS_PHONE || GTEST_OS_WINDOWS_RT || defined(ESP_PLATFORM)
+inline int DoIsATTY(int /* fd */) { return 0; }
+#else
+inline int DoIsATTY(int fd) { return _isatty(fd); }
+#endif // GTEST_OS_WINDOWS_MOBILE
+inline int StrCaseCmp(const char* s1, const char* s2) {
+ return _stricmp(s1, s2);
+}
+inline char* StrDup(const char* src) { return _strdup(src); }
+#endif // __BORLANDC__
+
+#if GTEST_OS_WINDOWS_MOBILE
+inline int FileNo(FILE* file) { return reinterpret_cast<int>(_fileno(file)); }
+// Stat(), RmDir(), and IsDir() are not needed on Windows CE at this
+// time and thus not defined there.
+#else
+inline int FileNo(FILE* file) { return _fileno(file); }
+inline int Stat(const char* path, StatStruct* buf) { return _stat(path, buf); }
+inline int RmDir(const char* dir) { return _rmdir(dir); }
+inline bool IsDir(const StatStruct& st) { return (_S_IFDIR & st.st_mode) != 0; }
+#endif // GTEST_OS_WINDOWS_MOBILE
+
+#elif GTEST_OS_ESP8266
+typedef struct stat StatStruct;
+
+inline int FileNo(FILE* file) { return fileno(file); }
+inline int DoIsATTY(int fd) { return isatty(fd); }
+inline int Stat(const char* path, StatStruct* buf) {
+ // stat function not implemented on ESP8266
+ return 0;
+}
+inline int StrCaseCmp(const char* s1, const char* s2) {
+ return strcasecmp(s1, s2);
+}
+inline char* StrDup(const char* src) { return strdup(src); }
+inline int RmDir(const char* dir) { return rmdir(dir); }
+inline bool IsDir(const StatStruct& st) { return S_ISDIR(st.st_mode); }
+
+#else
+
+typedef struct stat StatStruct;
+
+inline int FileNo(FILE* file) { return fileno(file); }
+inline int DoIsATTY(int fd) { return isatty(fd); }
+inline int Stat(const char* path, StatStruct* buf) { return stat(path, buf); }
+inline int StrCaseCmp(const char* s1, const char* s2) {
+ return strcasecmp(s1, s2);
+}
+inline char* StrDup(const char* src) { return strdup(src); }
+inline int RmDir(const char* dir) { return rmdir(dir); }
+inline bool IsDir(const StatStruct& st) { return S_ISDIR(st.st_mode); }
+
+#endif // GTEST_OS_WINDOWS
+
+inline int IsATTY(int fd) {
+ // DoIsATTY might change errno (for example ENOTTY in case you redirect stdout
+ // to a file on Linux), which is unexpected, so save the previous value, and
+ // restore it after the call.
+ int savedErrno = errno;
+ int isAttyValue = DoIsATTY(fd);
+ errno = savedErrno;
+
+ return isAttyValue;
+}
+
+// Functions deprecated by MSVC 8.0.
+
+GTEST_DISABLE_MSC_DEPRECATED_PUSH_()
+
+// ChDir(), FReopen(), FDOpen(), Read(), Write(), Close(), and
+// StrError() aren't needed on Windows CE at this time and thus not
+// defined there.
+
+#if !GTEST_OS_WINDOWS_MOBILE && !GTEST_OS_WINDOWS_PHONE && \
+ !GTEST_OS_WINDOWS_RT && !GTEST_OS_ESP8266 && !GTEST_OS_XTENSA
+inline int ChDir(const char* dir) { return chdir(dir); }
+#endif
+inline FILE* FOpen(const char* path, const char* mode) {
+#if GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MINGW
+ struct wchar_codecvt : public std::codecvt<wchar_t, char, std::mbstate_t> {};
+ std::wstring_convert<wchar_codecvt> converter;
+ std::wstring wide_path = converter.from_bytes(path);
+ std::wstring wide_mode = converter.from_bytes(mode);
+ return _wfopen(wide_path.c_str(), wide_mode.c_str());
+#else // GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MINGW
+ return fopen(path, mode);
+#endif // GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MINGW
+}
+#if !GTEST_OS_WINDOWS_MOBILE
+inline FILE* FReopen(const char* path, const char* mode, FILE* stream) {
+ return freopen(path, mode, stream);
+}
+inline FILE* FDOpen(int fd, const char* mode) { return fdopen(fd, mode); }
+#endif
+inline int FClose(FILE* fp) { return fclose(fp); }
+#if !GTEST_OS_WINDOWS_MOBILE
+inline int Read(int fd, void* buf, unsigned int count) {
+ return static_cast<int>(read(fd, buf, count));
+}
+inline int Write(int fd, const void* buf, unsigned int count) {
+ return static_cast<int>(write(fd, buf, count));
+}
+inline int Close(int fd) { return close(fd); }
+inline const char* StrError(int errnum) { return strerror(errnum); }
+#endif
+inline const char* GetEnv(const char* name) {
+#if GTEST_OS_WINDOWS_MOBILE || GTEST_OS_WINDOWS_PHONE || \
+ GTEST_OS_WINDOWS_RT || GTEST_OS_ESP8266 || GTEST_OS_XTENSA
+ // We are on an embedded platform, which has no environment variables.
+ static_cast<void>(name); // To prevent 'unused argument' warning.
+ return nullptr;
+#elif defined(__BORLANDC__) || defined(__SunOS_5_8) || defined(__SunOS_5_9)
+ // Environment variables which we programmatically clear will be set to the
+ // empty string rather than unset (NULL). Handle that case.
+ const char* const env = getenv(name);
+ return (env != nullptr && env[0] != '\0') ? env : nullptr;
+#else
+ return getenv(name);
+#endif
+}
+
+GTEST_DISABLE_MSC_DEPRECATED_POP_()
+
+#if GTEST_OS_WINDOWS_MOBILE
+// Windows CE has no C library. The abort() function is used in
+// several places in Google Test. This implementation provides a reasonable
+// imitation of standard behaviour.
+[[noreturn]] void Abort();
+#else
+[[noreturn]] inline void Abort() { abort(); }
+#endif // GTEST_OS_WINDOWS_MOBILE
+
+} // namespace posix
+
+// MSVC "deprecates" snprintf and issues warnings wherever it is used. In
+// order to avoid these warnings, we need to use _snprintf or _snprintf_s on
+// MSVC-based platforms. We map the GTEST_SNPRINTF_ macro to the appropriate
+// function in order to achieve that. We use macro definition here because
+// snprintf is a variadic function.
+#if _MSC_VER && !GTEST_OS_WINDOWS_MOBILE
+// MSVC 2005 and above support variadic macros.
+#define GTEST_SNPRINTF_(buffer, size, format, ...) \
+ _snprintf_s(buffer, size, size, format, __VA_ARGS__)
+#elif defined(_MSC_VER)
+// Windows CE does not define _snprintf_s
+#define GTEST_SNPRINTF_ _snprintf
+#else
+#define GTEST_SNPRINTF_ snprintf
+#endif
+
+// The biggest signed integer type the compiler supports.
+//
+// long long is guaranteed to be at least 64-bits in C++11.
+using BiggestInt = long long; // NOLINT
+
+// The maximum number a BiggestInt can represent.
+constexpr BiggestInt kMaxBiggestInt = (std::numeric_limits<BiggestInt>::max)();
+
+// This template class serves as a compile-time function from size to
+// type. It maps a size in bytes to a primitive type with that
+// size. e.g.
+//
+// TypeWithSize<4>::UInt
+//
+// is typedef-ed to be unsigned int (unsigned integer made up of 4
+// bytes).
+//
+// Such functionality should belong to STL, but I cannot find it
+// there.
+//
+// Google Test uses this class in the implementation of floating-point
+// comparison.
+//
+// For now it only handles UInt (unsigned int) as that's all Google Test
+// needs. Other types can be easily added in the future if need
+// arises.
+template <size_t size>
+class TypeWithSize {
+ public:
+ // This prevents the user from using TypeWithSize<N> with incorrect
+ // values of N.
+ using UInt = void;
+};
+
+// The specialization for size 4.
+template <>
+class TypeWithSize<4> {
+ public:
+ using Int = std::int32_t;
+ using UInt = std::uint32_t;
+};
+
+// The specialization for size 8.
+template <>
+class TypeWithSize<8> {
+ public:
+ using Int = std::int64_t;
+ using UInt = std::uint64_t;
+};
+
+// Integer types of known sizes.
+using TimeInMillis = int64_t; // Represents time in milliseconds.
+
+// Utilities for command line flags and environment variables.
+
+// Macro for referencing flags.
+#if !defined(GTEST_FLAG)
+#define GTEST_FLAG_NAME_(name) gtest_##name
+#define GTEST_FLAG(name) FLAGS_gtest_##name
+#endif // !defined(GTEST_FLAG)
+
+// Pick a command line flags implementation.
+#if GTEST_HAS_ABSL
+
+// Macros for defining flags.
+#define GTEST_DEFINE_bool_(name, default_val, doc) \
+ ABSL_FLAG(bool, GTEST_FLAG_NAME_(name), default_val, doc)
+#define GTEST_DEFINE_int32_(name, default_val, doc) \
+ ABSL_FLAG(int32_t, GTEST_FLAG_NAME_(name), default_val, doc)
+#define GTEST_DEFINE_string_(name, default_val, doc) \
+ ABSL_FLAG(std::string, GTEST_FLAG_NAME_(name), default_val, doc)
+
+// Macros for declaring flags.
+#define GTEST_DECLARE_bool_(name) \
+ ABSL_DECLARE_FLAG(bool, GTEST_FLAG_NAME_(name))
+#define GTEST_DECLARE_int32_(name) \
+ ABSL_DECLARE_FLAG(int32_t, GTEST_FLAG_NAME_(name))
+#define GTEST_DECLARE_string_(name) \
+ ABSL_DECLARE_FLAG(std::string, GTEST_FLAG_NAME_(name))
+
+#define GTEST_FLAG_SAVER_ ::absl::FlagSaver
+
+#define GTEST_FLAG_GET(name) ::absl::GetFlag(GTEST_FLAG(name))
+#define GTEST_FLAG_SET(name, value) \
+ (void)(::absl::SetFlag(&GTEST_FLAG(name), value))
+#define GTEST_USE_OWN_FLAGFILE_FLAG_ 0
+
+#else // GTEST_HAS_ABSL
+
+// Macros for defining flags.
+#define GTEST_DEFINE_bool_(name, default_val, doc) \
+ namespace testing { \
+ GTEST_API_ bool GTEST_FLAG(name) = (default_val); \
+ } \
+ static_assert(true, "no-op to require trailing semicolon")
+#define GTEST_DEFINE_int32_(name, default_val, doc) \
+ namespace testing { \
+ GTEST_API_ std::int32_t GTEST_FLAG(name) = (default_val); \
+ } \
+ static_assert(true, "no-op to require trailing semicolon")
+#define GTEST_DEFINE_string_(name, default_val, doc) \
+ namespace testing { \
+ GTEST_API_ ::std::string GTEST_FLAG(name) = (default_val); \
+ } \
+ static_assert(true, "no-op to require trailing semicolon")
+
+// Macros for declaring flags.
+#define GTEST_DECLARE_bool_(name) \
+ namespace testing { \
+ GTEST_API_ extern bool GTEST_FLAG(name); \
+ } \
+ static_assert(true, "no-op to require trailing semicolon")
+#define GTEST_DECLARE_int32_(name) \
+ namespace testing { \
+ GTEST_API_ extern std::int32_t GTEST_FLAG(name); \
+ } \
+ static_assert(true, "no-op to require trailing semicolon")
+#define GTEST_DECLARE_string_(name) \
+ namespace testing { \
+ GTEST_API_ extern ::std::string GTEST_FLAG(name); \
+ } \
+ static_assert(true, "no-op to require trailing semicolon")
+
+#define GTEST_FLAG_SAVER_ ::testing::internal::GTestFlagSaver
+
+#define GTEST_FLAG_GET(name) ::testing::GTEST_FLAG(name)
+#define GTEST_FLAG_SET(name, value) (void)(::testing::GTEST_FLAG(name) = value)
+#define GTEST_USE_OWN_FLAGFILE_FLAG_ 1
+
+#endif // GTEST_HAS_ABSL
+
+// Thread annotations
+#if !defined(GTEST_EXCLUSIVE_LOCK_REQUIRED_)
+#define GTEST_EXCLUSIVE_LOCK_REQUIRED_(locks)
+#define GTEST_LOCK_EXCLUDED_(locks)
+#endif // !defined(GTEST_EXCLUSIVE_LOCK_REQUIRED_)
+
+// Parses 'str' for a 32-bit signed integer. If successful, writes the result
+// to *value and returns true; otherwise leaves *value unchanged and returns
+// false.
+GTEST_API_ bool ParseInt32(const Message& src_text, const char* str,
+ int32_t* value);
+
+// Parses a bool/int32_t/string from the environment variable
+// corresponding to the given Google Test flag.
+bool BoolFromGTestEnv(const char* flag, bool default_val);
+GTEST_API_ int32_t Int32FromGTestEnv(const char* flag, int32_t default_val);
+std::string OutputFlagAlsoCheckEnvVar();
+const char* StringFromGTestEnv(const char* flag, const char* default_val);
+
+} // namespace internal
+} // namespace testing
+
+#if !defined(GTEST_INTERNAL_DEPRECATED)
+
+// Internal Macro to mark an API deprecated, for googletest usage only
+// Usage: class GTEST_INTERNAL_DEPRECATED(message) MyClass or
+// GTEST_INTERNAL_DEPRECATED(message) <return_type> myFunction(); Every usage of
+// a deprecated entity will trigger a warning when compiled with
+// `-Wdeprecated-declarations` option (clang, gcc, any __GNUC__ compiler).
+// For msvc /W3 option will need to be used
+// Note that for 'other' compilers this macro evaluates to nothing to prevent
+// compilations errors.
+#if defined(_MSC_VER)
+#define GTEST_INTERNAL_DEPRECATED(message) __declspec(deprecated(message))
+#elif defined(__GNUC__)
+#define GTEST_INTERNAL_DEPRECATED(message) __attribute__((deprecated(message)))
+#else
+#define GTEST_INTERNAL_DEPRECATED(message)
+#endif
+
+#endif // !defined(GTEST_INTERNAL_DEPRECATED)
+
+#if GTEST_HAS_ABSL
+// Always use absl::any for UniversalPrinter<> specializations if googletest
+// is built with absl support.
+#define GTEST_INTERNAL_HAS_ANY 1
+#include "absl/types/any.h"
+namespace testing {
+namespace internal {
+using Any = ::absl::any;
+} // namespace internal
+} // namespace testing
+#else
+#ifdef __has_include
+#if __has_include(<any>) && __cplusplus >= 201703L
+// Otherwise for C++17 and higher use std::any for UniversalPrinter<>
+// specializations.
+#define GTEST_INTERNAL_HAS_ANY 1
+#include <any>
+namespace testing {
+namespace internal {
+using Any = ::std::any;
+} // namespace internal
+} // namespace testing
+// The case where absl is configured NOT to alias std::any is not
+// supported.
+#endif // __has_include(<any>) && __cplusplus >= 201703L
+#endif // __has_include
+#endif // GTEST_HAS_ABSL
+
+#if GTEST_HAS_ABSL
+// Always use absl::optional for UniversalPrinter<> specializations if
+// googletest is built with absl support.
+#define GTEST_INTERNAL_HAS_OPTIONAL 1
+#include "absl/types/optional.h"
+namespace testing {
+namespace internal {
+template <typename T>
+using Optional = ::absl::optional<T>;
+inline ::absl::nullopt_t Nullopt() { return ::absl::nullopt; }
+} // namespace internal
+} // namespace testing
+#else
+#ifdef __has_include
+#if __has_include(<optional>) && __cplusplus >= 201703L
+// Otherwise for C++17 and higher use std::optional for UniversalPrinter<>
+// specializations.
+#define GTEST_INTERNAL_HAS_OPTIONAL 1
+#include <optional>
+namespace testing {
+namespace internal {
+template <typename T>
+using Optional = ::std::optional<T>;
+inline ::std::nullopt_t Nullopt() { return ::std::nullopt; }
+} // namespace internal
+} // namespace testing
+// The case where absl is configured NOT to alias std::optional is not
+// supported.
+#endif // __has_include(<optional>) && __cplusplus >= 201703L
+#endif // __has_include
+#endif // GTEST_HAS_ABSL
+
+#if GTEST_HAS_ABSL
+// Always use absl::string_view for Matcher<> specializations if googletest
+// is built with absl support.
+#define GTEST_INTERNAL_HAS_STRING_VIEW 1
+#include "absl/strings/string_view.h"
+namespace testing {
+namespace internal {
+using StringView = ::absl::string_view;
+} // namespace internal
+} // namespace testing
+#else
+#ifdef __has_include
+#if __has_include(<string_view>) && __cplusplus >= 201703L
+// Otherwise for C++17 and higher use std::string_view for Matcher<>
+// specializations.
+#define GTEST_INTERNAL_HAS_STRING_VIEW 1
+#include <string_view>
+namespace testing {
+namespace internal {
+using StringView = ::std::string_view;
+} // namespace internal
+} // namespace testing
+// The case where absl is configured NOT to alias std::string_view is not
+// supported.
+#endif // __has_include(<string_view>) && __cplusplus >= 201703L
+#endif // __has_include
+#endif // GTEST_HAS_ABSL
+
+#if GTEST_HAS_ABSL
+// Always use absl::variant for UniversalPrinter<> specializations if googletest
+// is built with absl support.
+#define GTEST_INTERNAL_HAS_VARIANT 1
+#include "absl/types/variant.h"
+namespace testing {
+namespace internal {
+template <typename... T>
+using Variant = ::absl::variant<T...>;
+} // namespace internal
+} // namespace testing
+#else
+#ifdef __has_include
+#if __has_include(<variant>) && __cplusplus >= 201703L
+// Otherwise for C++17 and higher use std::variant for UniversalPrinter<>
+// specializations.
+#define GTEST_INTERNAL_HAS_VARIANT 1
+#include <variant>
+namespace testing {
+namespace internal {
+template <typename... T>
+using Variant = ::std::variant<T...>;
+} // namespace internal
+} // namespace testing
+// The case where absl is configured NOT to alias std::variant is not supported.
+#endif // __has_include(<variant>) && __cplusplus >= 201703L
+#endif // __has_include
+#endif // GTEST_HAS_ABSL
+
+#endif // GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_H_
diff --git a/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-string.h b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-string.h
new file mode 100644
index 0000000000..cca2e1f2ad
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-string.h
@@ -0,0 +1,177 @@
+// Copyright 2005, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// The Google C++ Testing and Mocking Framework (Google Test)
+//
+// This header file declares the String class and functions used internally by
+// Google Test. They are subject to change without notice. They should not used
+// by code external to Google Test.
+//
+// This header file is #included by gtest-internal.h.
+// It should not be #included by other files.
+
+// IWYU pragma: private, include "gtest/gtest.h"
+// IWYU pragma: friend gtest/.*
+// IWYU pragma: friend gmock/.*
+
+#ifndef GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_STRING_H_
+#define GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_STRING_H_
+
+#ifdef __BORLANDC__
+// string.h is not guaranteed to provide strcpy on C++ Builder.
+#include <mem.h>
+#endif
+
+#include <string.h>
+
+#include <cstdint>
+#include <string>
+
+#include "gtest/internal/gtest-port.h"
+
+namespace testing {
+namespace internal {
+
+// String - an abstract class holding static string utilities.
+class GTEST_API_ String {
+ public:
+ // Static utility methods
+
+ // Clones a 0-terminated C string, allocating memory using new. The
+ // caller is responsible for deleting the return value using
+ // delete[]. Returns the cloned string, or NULL if the input is
+ // NULL.
+ //
+ // This is different from strdup() in string.h, which allocates
+ // memory using malloc().
+ static const char* CloneCString(const char* c_str);
+
+#if GTEST_OS_WINDOWS_MOBILE
+ // Windows CE does not have the 'ANSI' versions of Win32 APIs. To be
+ // able to pass strings to Win32 APIs on CE we need to convert them
+ // to 'Unicode', UTF-16.
+
+ // Creates a UTF-16 wide string from the given ANSI string, allocating
+ // memory using new. The caller is responsible for deleting the return
+ // value using delete[]. Returns the wide string, or NULL if the
+ // input is NULL.
+ //
+ // The wide string is created using the ANSI codepage (CP_ACP) to
+ // match the behaviour of the ANSI versions of Win32 calls and the
+ // C runtime.
+ static LPCWSTR AnsiToUtf16(const char* c_str);
+
+ // Creates an ANSI string from the given wide string, allocating
+ // memory using new. The caller is responsible for deleting the return
+ // value using delete[]. Returns the ANSI string, or NULL if the
+ // input is NULL.
+ //
+ // The returned string is created using the ANSI codepage (CP_ACP) to
+ // match the behaviour of the ANSI versions of Win32 calls and the
+ // C runtime.
+ static const char* Utf16ToAnsi(LPCWSTR utf16_str);
+#endif
+
+ // Compares two C strings. Returns true if and only if they have the same
+ // content.
+ //
+ // Unlike strcmp(), this function can handle NULL argument(s). A
+ // NULL C string is considered different to any non-NULL C string,
+ // including the empty string.
+ static bool CStringEquals(const char* lhs, const char* rhs);
+
+ // Converts a wide C string to a String using the UTF-8 encoding.
+ // NULL will be converted to "(null)". If an error occurred during
+ // the conversion, "(failed to convert from wide string)" is
+ // returned.
+ static std::string ShowWideCString(const wchar_t* wide_c_str);
+
+ // Compares two wide C strings. Returns true if and only if they have the
+ // same content.
+ //
+ // Unlike wcscmp(), this function can handle NULL argument(s). A
+ // NULL C string is considered different to any non-NULL C string,
+ // including the empty string.
+ static bool WideCStringEquals(const wchar_t* lhs, const wchar_t* rhs);
+
+ // Compares two C strings, ignoring case. Returns true if and only if
+ // they have the same content.
+ //
+ // Unlike strcasecmp(), this function can handle NULL argument(s).
+ // A NULL C string is considered different to any non-NULL C string,
+ // including the empty string.
+ static bool CaseInsensitiveCStringEquals(const char* lhs, const char* rhs);
+
+ // Compares two wide C strings, ignoring case. Returns true if and only if
+ // they have the same content.
+ //
+ // Unlike wcscasecmp(), this function can handle NULL argument(s).
+ // A NULL C string is considered different to any non-NULL wide C string,
+ // including the empty string.
+ // NB: The implementations on different platforms slightly differ.
+ // On windows, this method uses _wcsicmp which compares according to LC_CTYPE
+ // environment variable. On GNU platform this method uses wcscasecmp
+ // which compares according to LC_CTYPE category of the current locale.
+ // On MacOS X, it uses towlower, which also uses LC_CTYPE category of the
+ // current locale.
+ static bool CaseInsensitiveWideCStringEquals(const wchar_t* lhs,
+ const wchar_t* rhs);
+
+ // Returns true if and only if the given string ends with the given suffix,
+ // ignoring case. Any string is considered to end with an empty suffix.
+ static bool EndsWithCaseInsensitive(const std::string& str,
+ const std::string& suffix);
+
+ // Formats an int value as "%02d".
+ static std::string FormatIntWidth2(int value); // "%02d" for width == 2
+
+ // Formats an int value to given width with leading zeros.
+ static std::string FormatIntWidthN(int value, int width);
+
+ // Formats an int value as "%X".
+ static std::string FormatHexInt(int value);
+
+ // Formats an int value as "%X".
+ static std::string FormatHexUInt32(uint32_t value);
+
+ // Formats a byte as "%02X".
+ static std::string FormatByte(unsigned char value);
+
+ private:
+ String(); // Not meant to be instantiated.
+}; // class String
+
+// Gets the content of the stringstream's buffer as an std::string. Each '\0'
+// character in the buffer is replaced with "\\0".
+GTEST_API_ std::string StringStreamToString(::std::stringstream* stream);
+
+} // namespace internal
+} // namespace testing
+
+#endif // GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_STRING_H_
diff --git a/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-type-util.h b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-type-util.h
new file mode 100644
index 0000000000..6bc02a7de3
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-type-util.h
@@ -0,0 +1,186 @@
+// Copyright 2008 Google Inc.
+// All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Type utilities needed for implementing typed and type-parameterized
+// tests.
+
+// IWYU pragma: private, include "gtest/gtest.h"
+// IWYU pragma: friend gtest/.*
+// IWYU pragma: friend gmock/.*
+
+#ifndef GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_TYPE_UTIL_H_
+#define GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_TYPE_UTIL_H_
+
+#include "gtest/internal/gtest-port.h"
+
+// #ifdef __GNUC__ is too general here. It is possible to use gcc without using
+// libstdc++ (which is where cxxabi.h comes from).
+#if GTEST_HAS_CXXABI_H_
+#include <cxxabi.h>
+#elif defined(__HP_aCC)
+#include <acxx_demangle.h>
+#endif // GTEST_HASH_CXXABI_H_
+
+namespace testing {
+namespace internal {
+
+// Canonicalizes a given name with respect to the Standard C++ Library.
+// This handles removing the inline namespace within `std` that is
+// used by various standard libraries (e.g., `std::__1`). Names outside
+// of namespace std are returned unmodified.
+inline std::string CanonicalizeForStdLibVersioning(std::string s) {
+ static const char prefix[] = "std::__";
+ if (s.compare(0, strlen(prefix), prefix) == 0) {
+ std::string::size_type end = s.find("::", strlen(prefix));
+ if (end != s.npos) {
+ // Erase everything between the initial `std` and the second `::`.
+ s.erase(strlen("std"), end - strlen("std"));
+ }
+ }
+ return s;
+}
+
+#if GTEST_HAS_RTTI
+// GetTypeName(const std::type_info&) returns a human-readable name of type T.
+inline std::string GetTypeName(const std::type_info& type) {
+ const char* const name = type.name();
+#if GTEST_HAS_CXXABI_H_ || defined(__HP_aCC)
+ int status = 0;
+ // gcc's implementation of typeid(T).name() mangles the type name,
+ // so we have to demangle it.
+#if GTEST_HAS_CXXABI_H_
+ using abi::__cxa_demangle;
+#endif // GTEST_HAS_CXXABI_H_
+ char* const readable_name = __cxa_demangle(name, nullptr, nullptr, &status);
+ const std::string name_str(status == 0 ? readable_name : name);
+ free(readable_name);
+ return CanonicalizeForStdLibVersioning(name_str);
+#else
+ return name;
+#endif // GTEST_HAS_CXXABI_H_ || __HP_aCC
+}
+#endif // GTEST_HAS_RTTI
+
+// GetTypeName<T>() returns a human-readable name of type T if and only if
+// RTTI is enabled, otherwise it returns a dummy type name.
+// NB: This function is also used in Google Mock, so don't move it inside of
+// the typed-test-only section below.
+template <typename T>
+std::string GetTypeName() {
+#if GTEST_HAS_RTTI
+ return GetTypeName(typeid(T));
+#else
+ return "<type>";
+#endif // GTEST_HAS_RTTI
+}
+
+// A unique type indicating an empty node
+struct None {};
+
+#define GTEST_TEMPLATE_ \
+ template <typename T> \
+ class
+
+// The template "selector" struct TemplateSel<Tmpl> is used to
+// represent Tmpl, which must be a class template with one type
+// parameter, as a type. TemplateSel<Tmpl>::Bind<T>::type is defined
+// as the type Tmpl<T>. This allows us to actually instantiate the
+// template "selected" by TemplateSel<Tmpl>.
+//
+// This trick is necessary for simulating typedef for class templates,
+// which C++ doesn't support directly.
+template <GTEST_TEMPLATE_ Tmpl>
+struct TemplateSel {
+ template <typename T>
+ struct Bind {
+ typedef Tmpl<T> type;
+ };
+};
+
+#define GTEST_BIND_(TmplSel, T) TmplSel::template Bind<T>::type
+
+template <GTEST_TEMPLATE_ Head_, GTEST_TEMPLATE_... Tail_>
+struct Templates {
+ using Head = TemplateSel<Head_>;
+ using Tail = Templates<Tail_...>;
+};
+
+template <GTEST_TEMPLATE_ Head_>
+struct Templates<Head_> {
+ using Head = TemplateSel<Head_>;
+ using Tail = None;
+};
+
+// Tuple-like type lists
+template <typename Head_, typename... Tail_>
+struct Types {
+ using Head = Head_;
+ using Tail = Types<Tail_...>;
+};
+
+template <typename Head_>
+struct Types<Head_> {
+ using Head = Head_;
+ using Tail = None;
+};
+
+// Helper metafunctions to tell apart a single type from types
+// generated by ::testing::Types
+template <typename... Ts>
+struct ProxyTypeList {
+ using type = Types<Ts...>;
+};
+
+template <typename>
+struct is_proxy_type_list : std::false_type {};
+
+template <typename... Ts>
+struct is_proxy_type_list<ProxyTypeList<Ts...>> : std::true_type {};
+
+// Generator which conditionally creates type lists.
+// It recognizes if a requested type list should be created
+// and prevents creating a new type list nested within another one.
+template <typename T>
+struct GenerateTypeList {
+ private:
+ using proxy = typename std::conditional<is_proxy_type_list<T>::value, T,
+ ProxyTypeList<T>>::type;
+
+ public:
+ using type = typename proxy::type;
+};
+
+} // namespace internal
+
+template <typename... Ts>
+using Types = internal::ProxyTypeList<Ts...>;
+
+} // namespace testing
+
+#endif // GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_TYPE_UTIL_H_
diff --git a/media/libvpx/libvpx/third_party/googletest/src/src/gtest-all.cc b/media/libvpx/libvpx/third_party/googletest/src/src/gtest-all.cc
new file mode 100644
index 0000000000..2a70ed88c7
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/googletest/src/src/gtest-all.cc
@@ -0,0 +1,49 @@
+// Copyright 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+//
+// Google C++ Testing and Mocking Framework (Google Test)
+//
+// Sometimes it's desirable to build Google Test by compiling a single file.
+// This file serves this purpose.
+
+// This line ensures that gtest.h can be compiled on its own, even
+// when it's fused.
+#include "gtest/gtest.h"
+
+// The following lines pull in the real gtest *.cc files.
+#include "src/gtest-assertion-result.cc"
+#include "src/gtest-death-test.cc"
+#include "src/gtest-filepath.cc"
+#include "src/gtest-matchers.cc"
+#include "src/gtest-port.cc"
+#include "src/gtest-printers.cc"
+#include "src/gtest-test-part.cc"
+#include "src/gtest-typed-test.cc"
+#include "src/gtest.cc"
diff --git a/media/libvpx/libvpx/third_party/googletest/src/src/gtest-assertion-result.cc b/media/libvpx/libvpx/third_party/googletest/src/src/gtest-assertion-result.cc
new file mode 100644
index 0000000000..f1c0b10dc9
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/googletest/src/src/gtest-assertion-result.cc
@@ -0,0 +1,77 @@
+// Copyright 2005, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// The Google C++ Testing and Mocking Framework (Google Test)
+//
+// This file defines the AssertionResult type.
+
+#include "gtest/gtest-assertion-result.h"
+
+#include <string>
+#include <utility>
+
+#include "gtest/gtest-message.h"
+
+namespace testing {
+
+// AssertionResult constructors.
+// Used in EXPECT_TRUE/FALSE(assertion_result).
+AssertionResult::AssertionResult(const AssertionResult& other)
+ : success_(other.success_),
+ message_(other.message_.get() != nullptr
+ ? new ::std::string(*other.message_)
+ : static_cast< ::std::string*>(nullptr)) {}
+
+// Swaps two AssertionResults.
+void AssertionResult::swap(AssertionResult& other) {
+ using std::swap;
+ swap(success_, other.success_);
+ swap(message_, other.message_);
+}
+
+// Returns the assertion's negation. Used with EXPECT/ASSERT_FALSE.
+AssertionResult AssertionResult::operator!() const {
+ AssertionResult negation(!success_);
+ if (message_.get() != nullptr) negation << *message_;
+ return negation;
+}
+
+// Makes a successful assertion result.
+AssertionResult AssertionSuccess() { return AssertionResult(true); }
+
+// Makes a failed assertion result.
+AssertionResult AssertionFailure() { return AssertionResult(false); }
+
+// Makes a failed assertion result with the given failure message.
+// Deprecated; use AssertionFailure() << message.
+AssertionResult AssertionFailure(const Message& message) {
+ return AssertionFailure() << message;
+}
+
+} // namespace testing
diff --git a/media/libvpx/libvpx/third_party/googletest/src/src/gtest-death-test.cc b/media/libvpx/libvpx/third_party/googletest/src/src/gtest-death-test.cc
new file mode 100644
index 0000000000..e6abc6278a
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/googletest/src/src/gtest-death-test.cc
@@ -0,0 +1,1620 @@
+// Copyright 2005, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+//
+// This file implements death tests.
+
+#include "gtest/gtest-death-test.h"
+
+#include <functional>
+#include <utility>
+
+#include "gtest/internal/custom/gtest.h"
+#include "gtest/internal/gtest-port.h"
+
+#if GTEST_HAS_DEATH_TEST
+
+#if GTEST_OS_MAC
+#include <crt_externs.h>
+#endif // GTEST_OS_MAC
+
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+
+#if GTEST_OS_LINUX
+#include <signal.h>
+#endif // GTEST_OS_LINUX
+
+#include <stdarg.h>
+
+#if GTEST_OS_WINDOWS
+#include <windows.h>
+#else
+#include <sys/mman.h>
+#include <sys/wait.h>
+#endif // GTEST_OS_WINDOWS
+
+#if GTEST_OS_QNX
+#include <spawn.h>
+#endif // GTEST_OS_QNX
+
+#if GTEST_OS_FUCHSIA
+#include <lib/fdio/fd.h>
+#include <lib/fdio/io.h>
+#include <lib/fdio/spawn.h>
+#include <lib/zx/channel.h>
+#include <lib/zx/port.h>
+#include <lib/zx/process.h>
+#include <lib/zx/socket.h>
+#include <zircon/processargs.h>
+#include <zircon/syscalls.h>
+#include <zircon/syscalls/policy.h>
+#include <zircon/syscalls/port.h>
+#endif // GTEST_OS_FUCHSIA
+
+#endif // GTEST_HAS_DEATH_TEST
+
+#include "gtest/gtest-message.h"
+#include "gtest/internal/gtest-string.h"
+#include "src/gtest-internal-inl.h"
+
+namespace testing {
+
+// Constants.
+
+// The default death test style.
+//
+// This is defined in internal/gtest-port.h as "fast", but can be overridden by
+// a definition in internal/custom/gtest-port.h. The recommended value, which is
+// used internally at Google, is "threadsafe".
+static const char kDefaultDeathTestStyle[] = GTEST_DEFAULT_DEATH_TEST_STYLE;
+
+} // namespace testing
+
+GTEST_DEFINE_string_(
+ death_test_style,
+ testing::internal::StringFromGTestEnv("death_test_style",
+ testing::kDefaultDeathTestStyle),
+ "Indicates how to run a death test in a forked child process: "
+ "\"threadsafe\" (child process re-executes the test binary "
+ "from the beginning, running only the specific death test) or "
+ "\"fast\" (child process runs the death test immediately "
+ "after forking).");
+
+GTEST_DEFINE_bool_(
+ death_test_use_fork,
+ testing::internal::BoolFromGTestEnv("death_test_use_fork", false),
+ "Instructs to use fork()/_exit() instead of clone() in death tests. "
+ "Ignored and always uses fork() on POSIX systems where clone() is not "
+ "implemented. Useful when running under valgrind or similar tools if "
+ "those do not support clone(). Valgrind 3.3.1 will just fail if "
+ "it sees an unsupported combination of clone() flags. "
+ "It is not recommended to use this flag w/o valgrind though it will "
+ "work in 99% of the cases. Once valgrind is fixed, this flag will "
+ "most likely be removed.");
+
+GTEST_DEFINE_string_(
+ internal_run_death_test, "",
+ "Indicates the file, line number, temporal index of "
+ "the single death test to run, and a file descriptor to "
+ "which a success code may be sent, all separated by "
+ "the '|' characters. This flag is specified if and only if the "
+ "current process is a sub-process launched for running a thread-safe "
+ "death test. FOR INTERNAL USE ONLY.");
+
+namespace testing {
+
+#if GTEST_HAS_DEATH_TEST
+
+namespace internal {
+
+// Valid only for fast death tests. Indicates the code is running in the
+// child process of a fast style death test.
+#if !GTEST_OS_WINDOWS && !GTEST_OS_FUCHSIA
+static bool g_in_fast_death_test_child = false;
+#endif
+
+// Returns a Boolean value indicating whether the caller is currently
+// executing in the context of the death test child process. Tools such as
+// Valgrind heap checkers may need this to modify their behavior in death
+// tests. IMPORTANT: This is an internal utility. Using it may break the
+// implementation of death tests. User code MUST NOT use it.
+bool InDeathTestChild() {
+#if GTEST_OS_WINDOWS || GTEST_OS_FUCHSIA
+
+ // On Windows and Fuchsia, death tests are thread-safe regardless of the value
+ // of the death_test_style flag.
+ return !GTEST_FLAG_GET(internal_run_death_test).empty();
+
+#else
+
+ if (GTEST_FLAG_GET(death_test_style) == "threadsafe")
+ return !GTEST_FLAG_GET(internal_run_death_test).empty();
+ else
+ return g_in_fast_death_test_child;
+#endif
+}
+
+} // namespace internal
+
+// ExitedWithCode constructor.
+ExitedWithCode::ExitedWithCode(int exit_code) : exit_code_(exit_code) {}
+
+// ExitedWithCode function-call operator.
+bool ExitedWithCode::operator()(int exit_status) const {
+#if GTEST_OS_WINDOWS || GTEST_OS_FUCHSIA
+
+ return exit_status == exit_code_;
+
+#else
+
+ return WIFEXITED(exit_status) && WEXITSTATUS(exit_status) == exit_code_;
+
+#endif // GTEST_OS_WINDOWS || GTEST_OS_FUCHSIA
+}
+
+#if !GTEST_OS_WINDOWS && !GTEST_OS_FUCHSIA
+// KilledBySignal constructor.
+KilledBySignal::KilledBySignal(int signum) : signum_(signum) {}
+
+// KilledBySignal function-call operator.
+bool KilledBySignal::operator()(int exit_status) const {
+#if defined(GTEST_KILLED_BY_SIGNAL_OVERRIDE_)
+ {
+ bool result;
+ if (GTEST_KILLED_BY_SIGNAL_OVERRIDE_(signum_, exit_status, &result)) {
+ return result;
+ }
+ }
+#endif // defined(GTEST_KILLED_BY_SIGNAL_OVERRIDE_)
+ return WIFSIGNALED(exit_status) && WTERMSIG(exit_status) == signum_;
+}
+#endif // !GTEST_OS_WINDOWS && !GTEST_OS_FUCHSIA
+
+namespace internal {
+
+// Utilities needed for death tests.
+
+// Generates a textual description of a given exit code, in the format
+// specified by wait(2).
+static std::string ExitSummary(int exit_code) {
+ Message m;
+
+#if GTEST_OS_WINDOWS || GTEST_OS_FUCHSIA
+
+ m << "Exited with exit status " << exit_code;
+
+#else
+
+ if (WIFEXITED(exit_code)) {
+ m << "Exited with exit status " << WEXITSTATUS(exit_code);
+ } else if (WIFSIGNALED(exit_code)) {
+ m << "Terminated by signal " << WTERMSIG(exit_code);
+ }
+#ifdef WCOREDUMP
+ if (WCOREDUMP(exit_code)) {
+ m << " (core dumped)";
+ }
+#endif
+#endif // GTEST_OS_WINDOWS || GTEST_OS_FUCHSIA
+
+ return m.GetString();
+}
+
+// Returns true if exit_status describes a process that was terminated
+// by a signal, or exited normally with a nonzero exit code.
+bool ExitedUnsuccessfully(int exit_status) {
+ return !ExitedWithCode(0)(exit_status);
+}
+
+#if !GTEST_OS_WINDOWS && !GTEST_OS_FUCHSIA
+// Generates a textual failure message when a death test finds more than
+// one thread running, or cannot determine the number of threads, prior
+// to executing the given statement. It is the responsibility of the
+// caller not to pass a thread_count of 1.
+static std::string DeathTestThreadWarning(size_t thread_count) {
+ Message msg;
+ msg << "Death tests use fork(), which is unsafe particularly"
+ << " in a threaded context. For this test, " << GTEST_NAME_ << " ";
+ if (thread_count == 0) {
+ msg << "couldn't detect the number of threads.";
+ } else {
+ msg << "detected " << thread_count << " threads.";
+ }
+ msg << " See "
+ "https://github.com/google/googletest/blob/master/docs/"
+ "advanced.md#death-tests-and-threads"
+ << " for more explanation and suggested solutions, especially if"
+ << " this is the last message you see before your test times out.";
+ return msg.GetString();
+}
+#endif // !GTEST_OS_WINDOWS && !GTEST_OS_FUCHSIA
+
+// Flag characters for reporting a death test that did not die.
+static const char kDeathTestLived = 'L';
+static const char kDeathTestReturned = 'R';
+static const char kDeathTestThrew = 'T';
+static const char kDeathTestInternalError = 'I';
+
+#if GTEST_OS_FUCHSIA
+
+// File descriptor used for the pipe in the child process.
+static const int kFuchsiaReadPipeFd = 3;
+
+#endif
+
+// An enumeration describing all of the possible ways that a death test can
+// conclude. DIED means that the process died while executing the test
+// code; LIVED means that process lived beyond the end of the test code;
+// RETURNED means that the test statement attempted to execute a return
+// statement, which is not allowed; THREW means that the test statement
+// returned control by throwing an exception. IN_PROGRESS means the test
+// has not yet concluded.
+enum DeathTestOutcome { IN_PROGRESS, DIED, LIVED, RETURNED, THREW };
+
+// Routine for aborting the program which is safe to call from an
+// exec-style death test child process, in which case the error
+// message is propagated back to the parent process. Otherwise, the
+// message is simply printed to stderr. In either case, the program
+// then exits with status 1.
+static void DeathTestAbort(const std::string& message) {
+ // On a POSIX system, this function may be called from a threadsafe-style
+ // death test child process, which operates on a very small stack. Use
+ // the heap for any additional non-minuscule memory requirements.
+ const InternalRunDeathTestFlag* const flag =
+ GetUnitTestImpl()->internal_run_death_test_flag();
+ if (flag != nullptr) {
+ FILE* parent = posix::FDOpen(flag->write_fd(), "w");
+ fputc(kDeathTestInternalError, parent);
+ fprintf(parent, "%s", message.c_str());
+ fflush(parent);
+ _exit(1);
+ } else {
+ fprintf(stderr, "%s", message.c_str());
+ fflush(stderr);
+ posix::Abort();
+ }
+}
+
+// A replacement for CHECK that calls DeathTestAbort if the assertion
+// fails.
+#define GTEST_DEATH_TEST_CHECK_(expression) \
+ do { \
+ if (!::testing::internal::IsTrue(expression)) { \
+ DeathTestAbort(::std::string("CHECK failed: File ") + __FILE__ + \
+ ", line " + \
+ ::testing::internal::StreamableToString(__LINE__) + \
+ ": " + #expression); \
+ } \
+ } while (::testing::internal::AlwaysFalse())
+
+// This macro is similar to GTEST_DEATH_TEST_CHECK_, but it is meant for
+// evaluating any system call that fulfills two conditions: it must return
+// -1 on failure, and set errno to EINTR when it is interrupted and
+// should be tried again. The macro expands to a loop that repeatedly
+// evaluates the expression as long as it evaluates to -1 and sets
+// errno to EINTR. If the expression evaluates to -1 but errno is
+// something other than EINTR, DeathTestAbort is called.
+#define GTEST_DEATH_TEST_CHECK_SYSCALL_(expression) \
+ do { \
+ int gtest_retval; \
+ do { \
+ gtest_retval = (expression); \
+ } while (gtest_retval == -1 && errno == EINTR); \
+ if (gtest_retval == -1) { \
+ DeathTestAbort(::std::string("CHECK failed: File ") + __FILE__ + \
+ ", line " + \
+ ::testing::internal::StreamableToString(__LINE__) + \
+ ": " + #expression + " != -1"); \
+ } \
+ } while (::testing::internal::AlwaysFalse())
+
+// Returns the message describing the last system error in errno.
+std::string GetLastErrnoDescription() {
+ return errno == 0 ? "" : posix::StrError(errno);
+}
+
+// This is called from a death test parent process to read a failure
+// message from the death test child process and log it with the FATAL
+// severity. On Windows, the message is read from a pipe handle. On other
+// platforms, it is read from a file descriptor.
+static void FailFromInternalError(int fd) {
+ Message error;
+ char buffer[256];
+ int num_read;
+
+ do {
+ while ((num_read = posix::Read(fd, buffer, 255)) > 0) {
+ buffer[num_read] = '\0';
+ error << buffer;
+ }
+ } while (num_read == -1 && errno == EINTR);
+
+ if (num_read == 0) {
+ GTEST_LOG_(FATAL) << error.GetString();
+ } else {
+ const int last_error = errno;
+ GTEST_LOG_(FATAL) << "Error while reading death test internal: "
+ << GetLastErrnoDescription() << " [" << last_error << "]";
+ }
+}
+
+// Death test constructor. Increments the running death test count
+// for the current test.
+DeathTest::DeathTest() {
+ TestInfo* const info = GetUnitTestImpl()->current_test_info();
+ if (info == nullptr) {
+ DeathTestAbort(
+ "Cannot run a death test outside of a TEST or "
+ "TEST_F construct");
+ }
+}
+
+// Creates and returns a death test by dispatching to the current
+// death test factory.
+bool DeathTest::Create(const char* statement,
+ Matcher<const std::string&> matcher, const char* file,
+ int line, DeathTest** test) {
+ return GetUnitTestImpl()->death_test_factory()->Create(
+ statement, std::move(matcher), file, line, test);
+}
+
+const char* DeathTest::LastMessage() {
+ return last_death_test_message_.c_str();
+}
+
+void DeathTest::set_last_death_test_message(const std::string& message) {
+ last_death_test_message_ = message;
+}
+
+std::string DeathTest::last_death_test_message_;
+
+// Provides cross platform implementation for some death functionality.
+class DeathTestImpl : public DeathTest {
+ protected:
+ DeathTestImpl(const char* a_statement, Matcher<const std::string&> matcher)
+ : statement_(a_statement),
+ matcher_(std::move(matcher)),
+ spawned_(false),
+ status_(-1),
+ outcome_(IN_PROGRESS),
+ read_fd_(-1),
+ write_fd_(-1) {}
+
+ // read_fd_ is expected to be closed and cleared by a derived class.
+ ~DeathTestImpl() override { GTEST_DEATH_TEST_CHECK_(read_fd_ == -1); }
+
+ void Abort(AbortReason reason) override;
+ bool Passed(bool status_ok) override;
+
+ const char* statement() const { return statement_; }
+ bool spawned() const { return spawned_; }
+ void set_spawned(bool is_spawned) { spawned_ = is_spawned; }
+ int status() const { return status_; }
+ void set_status(int a_status) { status_ = a_status; }
+ DeathTestOutcome outcome() const { return outcome_; }
+ void set_outcome(DeathTestOutcome an_outcome) { outcome_ = an_outcome; }
+ int read_fd() const { return read_fd_; }
+ void set_read_fd(int fd) { read_fd_ = fd; }
+ int write_fd() const { return write_fd_; }
+ void set_write_fd(int fd) { write_fd_ = fd; }
+
+ // Called in the parent process only. Reads the result code of the death
+ // test child process via a pipe, interprets it to set the outcome_
+ // member, and closes read_fd_. Outputs diagnostics and terminates in
+ // case of unexpected codes.
+ void ReadAndInterpretStatusByte();
+
+ // Returns stderr output from the child process.
+ virtual std::string GetErrorLogs();
+
+ private:
+ // The textual content of the code this object is testing. This class
+ // doesn't own this string and should not attempt to delete it.
+ const char* const statement_;
+ // A matcher that's expected to match the stderr output by the child process.
+ Matcher<const std::string&> matcher_;
+ // True if the death test child process has been successfully spawned.
+ bool spawned_;
+ // The exit status of the child process.
+ int status_;
+ // How the death test concluded.
+ DeathTestOutcome outcome_;
+ // Descriptor to the read end of the pipe to the child process. It is
+ // always -1 in the child process. The child keeps its write end of the
+ // pipe in write_fd_.
+ int read_fd_;
+ // Descriptor to the child's write end of the pipe to the parent process.
+ // It is always -1 in the parent process. The parent keeps its end of the
+ // pipe in read_fd_.
+ int write_fd_;
+};
+
+// Called in the parent process only. Reads the result code of the death
+// test child process via a pipe, interprets it to set the outcome_
+// member, and closes read_fd_. Outputs diagnostics and terminates in
+// case of unexpected codes.
+void DeathTestImpl::ReadAndInterpretStatusByte() {
+ char flag;
+ int bytes_read;
+
+ // The read() here blocks until data is available (signifying the
+ // failure of the death test) or until the pipe is closed (signifying
+ // its success), so it's okay to call this in the parent before
+ // the child process has exited.
+ do {
+ bytes_read = posix::Read(read_fd(), &flag, 1);
+ } while (bytes_read == -1 && errno == EINTR);
+
+ if (bytes_read == 0) {
+ set_outcome(DIED);
+ } else if (bytes_read == 1) {
+ switch (flag) {
+ case kDeathTestReturned:
+ set_outcome(RETURNED);
+ break;
+ case kDeathTestThrew:
+ set_outcome(THREW);
+ break;
+ case kDeathTestLived:
+ set_outcome(LIVED);
+ break;
+ case kDeathTestInternalError:
+ FailFromInternalError(read_fd()); // Does not return.
+ break;
+ default:
+ GTEST_LOG_(FATAL) << "Death test child process reported "
+ << "unexpected status byte ("
+ << static_cast<unsigned int>(flag) << ")";
+ }
+ } else {
+ GTEST_LOG_(FATAL) << "Read from death test child process failed: "
+ << GetLastErrnoDescription();
+ }
+ GTEST_DEATH_TEST_CHECK_SYSCALL_(posix::Close(read_fd()));
+ set_read_fd(-1);
+}
+
+std::string DeathTestImpl::GetErrorLogs() { return GetCapturedStderr(); }
+
+// Signals that the death test code which should have exited, didn't.
+// Should be called only in a death test child process.
+// Writes a status byte to the child's status file descriptor, then
+// calls _exit(1).
+void DeathTestImpl::Abort(AbortReason reason) {
+ // The parent process considers the death test to be a failure if
+ // it finds any data in our pipe. So, here we write a single flag byte
+ // to the pipe, then exit.
+ const char status_ch = reason == TEST_DID_NOT_DIE ? kDeathTestLived
+ : reason == TEST_THREW_EXCEPTION ? kDeathTestThrew
+ : kDeathTestReturned;
+
+ GTEST_DEATH_TEST_CHECK_SYSCALL_(posix::Write(write_fd(), &status_ch, 1));
+ // We are leaking the descriptor here because on some platforms (i.e.,
+ // when built as Windows DLL), destructors of global objects will still
+ // run after calling _exit(). On such systems, write_fd_ will be
+ // indirectly closed from the destructor of UnitTestImpl, causing double
+ // close if it is also closed here. On debug configurations, double close
+ // may assert. As there are no in-process buffers to flush here, we are
+ // relying on the OS to close the descriptor after the process terminates
+ // when the destructors are not run.
+ _exit(1); // Exits w/o any normal exit hooks (we were supposed to crash)
+}
+
+// Returns an indented copy of stderr output for a death test.
+// This makes distinguishing death test output lines from regular log lines
+// much easier.
+static ::std::string FormatDeathTestOutput(const ::std::string& output) {
+ ::std::string ret;
+ for (size_t at = 0;;) {
+ const size_t line_end = output.find('\n', at);
+ ret += "[ DEATH ] ";
+ if (line_end == ::std::string::npos) {
+ ret += output.substr(at);
+ break;
+ }
+ ret += output.substr(at, line_end + 1 - at);
+ at = line_end + 1;
+ }
+ return ret;
+}
+
+// Assesses the success or failure of a death test, using both private
+// members which have previously been set, and one argument:
+//
+// Private data members:
+// outcome: An enumeration describing how the death test
+// concluded: DIED, LIVED, THREW, or RETURNED. The death test
+// fails in the latter three cases.
+// status: The exit status of the child process. On *nix, it is in the
+// in the format specified by wait(2). On Windows, this is the
+// value supplied to the ExitProcess() API or a numeric code
+// of the exception that terminated the program.
+// matcher_: A matcher that's expected to match the stderr output by the child
+// process.
+//
+// Argument:
+// status_ok: true if exit_status is acceptable in the context of
+// this particular death test, which fails if it is false
+//
+// Returns true if and only if all of the above conditions are met. Otherwise,
+// the first failing condition, in the order given above, is the one that is
+// reported. Also sets the last death test message string.
+bool DeathTestImpl::Passed(bool status_ok) {
+ if (!spawned()) return false;
+
+ const std::string error_message = GetErrorLogs();
+
+ bool success = false;
+ Message buffer;
+
+ buffer << "Death test: " << statement() << "\n";
+ switch (outcome()) {
+ case LIVED:
+ buffer << " Result: failed to die.\n"
+ << " Error msg:\n"
+ << FormatDeathTestOutput(error_message);
+ break;
+ case THREW:
+ buffer << " Result: threw an exception.\n"
+ << " Error msg:\n"
+ << FormatDeathTestOutput(error_message);
+ break;
+ case RETURNED:
+ buffer << " Result: illegal return in test statement.\n"
+ << " Error msg:\n"
+ << FormatDeathTestOutput(error_message);
+ break;
+ case DIED:
+ if (status_ok) {
+ if (matcher_.Matches(error_message)) {
+ success = true;
+ } else {
+ std::ostringstream stream;
+ matcher_.DescribeTo(&stream);
+ buffer << " Result: died but not with expected error.\n"
+ << " Expected: " << stream.str() << "\n"
+ << "Actual msg:\n"
+ << FormatDeathTestOutput(error_message);
+ }
+ } else {
+ buffer << " Result: died but not with expected exit code:\n"
+ << " " << ExitSummary(status()) << "\n"
+ << "Actual msg:\n"
+ << FormatDeathTestOutput(error_message);
+ }
+ break;
+ case IN_PROGRESS:
+ default:
+ GTEST_LOG_(FATAL)
+ << "DeathTest::Passed somehow called before conclusion of test";
+ }
+
+ DeathTest::set_last_death_test_message(buffer.GetString());
+ return success;
+}
+
+#if GTEST_OS_WINDOWS
+// WindowsDeathTest implements death tests on Windows. Due to the
+// specifics of starting new processes on Windows, death tests there are
+// always threadsafe, and Google Test considers the
+// --gtest_death_test_style=fast setting to be equivalent to
+// --gtest_death_test_style=threadsafe there.
+//
+// A few implementation notes: Like the Linux version, the Windows
+// implementation uses pipes for child-to-parent communication. But due to
+// the specifics of pipes on Windows, some extra steps are required:
+//
+// 1. The parent creates a communication pipe and stores handles to both
+// ends of it.
+// 2. The parent starts the child and provides it with the information
+// necessary to acquire the handle to the write end of the pipe.
+// 3. The child acquires the write end of the pipe and signals the parent
+// using a Windows event.
+// 4. Now the parent can release the write end of the pipe on its side. If
+// this is done before step 3, the object's reference count goes down to
+// 0 and it is destroyed, preventing the child from acquiring it. The
+// parent now has to release it, or read operations on the read end of
+// the pipe will not return when the child terminates.
+// 5. The parent reads child's output through the pipe (outcome code and
+// any possible error messages) from the pipe, and its stderr and then
+// determines whether to fail the test.
+//
+// Note: to distinguish Win32 API calls from the local method and function
+// calls, the former are explicitly resolved in the global namespace.
+//
+class WindowsDeathTest : public DeathTestImpl {
+ public:
+ WindowsDeathTest(const char* a_statement, Matcher<const std::string&> matcher,
+ const char* file, int line)
+ : DeathTestImpl(a_statement, std::move(matcher)),
+ file_(file),
+ line_(line) {}
+
+ // All of these virtual functions are inherited from DeathTest.
+ virtual int Wait();
+ virtual TestRole AssumeRole();
+
+ private:
+ // The name of the file in which the death test is located.
+ const char* const file_;
+ // The line number on which the death test is located.
+ const int line_;
+ // Handle to the write end of the pipe to the child process.
+ AutoHandle write_handle_;
+ // Child process handle.
+ AutoHandle child_handle_;
+ // Event the child process uses to signal the parent that it has
+ // acquired the handle to the write end of the pipe. After seeing this
+ // event the parent can release its own handles to make sure its
+ // ReadFile() calls return when the child terminates.
+ AutoHandle event_handle_;
+};
+
+// Waits for the child in a death test to exit, returning its exit
+// status, or 0 if no child process exists. As a side effect, sets the
+// outcome data member.
+int WindowsDeathTest::Wait() {
+ if (!spawned()) return 0;
+
+ // Wait until the child either signals that it has acquired the write end
+ // of the pipe or it dies.
+ const HANDLE wait_handles[2] = {child_handle_.Get(), event_handle_.Get()};
+ switch (::WaitForMultipleObjects(2, wait_handles,
+ FALSE, // Waits for any of the handles.
+ INFINITE)) {
+ case WAIT_OBJECT_0:
+ case WAIT_OBJECT_0 + 1:
+ break;
+ default:
+ GTEST_DEATH_TEST_CHECK_(false); // Should not get here.
+ }
+
+ // The child has acquired the write end of the pipe or exited.
+ // We release the handle on our side and continue.
+ write_handle_.Reset();
+ event_handle_.Reset();
+
+ ReadAndInterpretStatusByte();
+
+ // Waits for the child process to exit if it haven't already. This
+ // returns immediately if the child has already exited, regardless of
+ // whether previous calls to WaitForMultipleObjects synchronized on this
+ // handle or not.
+ GTEST_DEATH_TEST_CHECK_(WAIT_OBJECT_0 ==
+ ::WaitForSingleObject(child_handle_.Get(), INFINITE));
+ DWORD status_code;
+ GTEST_DEATH_TEST_CHECK_(
+ ::GetExitCodeProcess(child_handle_.Get(), &status_code) != FALSE);
+ child_handle_.Reset();
+ set_status(static_cast<int>(status_code));
+ return status();
+}
+
+// The AssumeRole process for a Windows death test. It creates a child
+// process with the same executable as the current process to run the
+// death test. The child process is given the --gtest_filter and
+// --gtest_internal_run_death_test flags such that it knows to run the
+// current death test only.
+DeathTest::TestRole WindowsDeathTest::AssumeRole() {
+ const UnitTestImpl* const impl = GetUnitTestImpl();
+ const InternalRunDeathTestFlag* const flag =
+ impl->internal_run_death_test_flag();
+ const TestInfo* const info = impl->current_test_info();
+ const int death_test_index = info->result()->death_test_count();
+
+ if (flag != nullptr) {
+ // ParseInternalRunDeathTestFlag() has performed all the necessary
+ // processing.
+ set_write_fd(flag->write_fd());
+ return EXECUTE_TEST;
+ }
+
+ // WindowsDeathTest uses an anonymous pipe to communicate results of
+ // a death test.
+ SECURITY_ATTRIBUTES handles_are_inheritable = {sizeof(SECURITY_ATTRIBUTES),
+ nullptr, TRUE};
+ HANDLE read_handle, write_handle;
+ GTEST_DEATH_TEST_CHECK_(::CreatePipe(&read_handle, &write_handle,
+ &handles_are_inheritable,
+ 0) // Default buffer size.
+ != FALSE);
+ set_read_fd(
+ ::_open_osfhandle(reinterpret_cast<intptr_t>(read_handle), O_RDONLY));
+ write_handle_.Reset(write_handle);
+ event_handle_.Reset(::CreateEvent(
+ &handles_are_inheritable,
+ TRUE, // The event will automatically reset to non-signaled state.
+ FALSE, // The initial state is non-signalled.
+ nullptr)); // The even is unnamed.
+ GTEST_DEATH_TEST_CHECK_(event_handle_.Get() != nullptr);
+ const std::string filter_flag = std::string("--") + GTEST_FLAG_PREFIX_ +
+ "filter=" + info->test_suite_name() + "." +
+ info->name();
+ const std::string internal_flag =
+ std::string("--") + GTEST_FLAG_PREFIX_ +
+ "internal_run_death_test=" + file_ + "|" + StreamableToString(line_) +
+ "|" + StreamableToString(death_test_index) + "|" +
+ StreamableToString(static_cast<unsigned int>(::GetCurrentProcessId())) +
+ // size_t has the same width as pointers on both 32-bit and 64-bit
+ // Windows platforms.
+ // See http://msdn.microsoft.com/en-us/library/tcxf1dw6.aspx.
+ "|" + StreamableToString(reinterpret_cast<size_t>(write_handle)) + "|" +
+ StreamableToString(reinterpret_cast<size_t>(event_handle_.Get()));
+
+ char executable_path[_MAX_PATH + 1]; // NOLINT
+ GTEST_DEATH_TEST_CHECK_(_MAX_PATH + 1 != ::GetModuleFileNameA(nullptr,
+ executable_path,
+ _MAX_PATH));
+
+ std::string command_line = std::string(::GetCommandLineA()) + " " +
+ filter_flag + " \"" + internal_flag + "\"";
+
+ DeathTest::set_last_death_test_message("");
+
+ CaptureStderr();
+ // Flush the log buffers since the log streams are shared with the child.
+ FlushInfoLog();
+
+ // The child process will share the standard handles with the parent.
+ STARTUPINFOA startup_info;
+ memset(&startup_info, 0, sizeof(STARTUPINFO));
+ startup_info.dwFlags = STARTF_USESTDHANDLES;
+ startup_info.hStdInput = ::GetStdHandle(STD_INPUT_HANDLE);
+ startup_info.hStdOutput = ::GetStdHandle(STD_OUTPUT_HANDLE);
+ startup_info.hStdError = ::GetStdHandle(STD_ERROR_HANDLE);
+
+ PROCESS_INFORMATION process_info;
+ GTEST_DEATH_TEST_CHECK_(
+ ::CreateProcessA(
+ executable_path, const_cast<char*>(command_line.c_str()),
+ nullptr, // Returned process handle is not inheritable.
+ nullptr, // Returned thread handle is not inheritable.
+ TRUE, // Child inherits all inheritable handles (for write_handle_).
+ 0x0, // Default creation flags.
+ nullptr, // Inherit the parent's environment.
+ UnitTest::GetInstance()->original_working_dir(), &startup_info,
+ &process_info) != FALSE);
+ child_handle_.Reset(process_info.hProcess);
+ ::CloseHandle(process_info.hThread);
+ set_spawned(true);
+ return OVERSEE_TEST;
+}
+
+#elif GTEST_OS_FUCHSIA
+
+class FuchsiaDeathTest : public DeathTestImpl {
+ public:
+ FuchsiaDeathTest(const char* a_statement, Matcher<const std::string&> matcher,
+ const char* file, int line)
+ : DeathTestImpl(a_statement, std::move(matcher)),
+ file_(file),
+ line_(line) {}
+
+ // All of these virtual functions are inherited from DeathTest.
+ int Wait() override;
+ TestRole AssumeRole() override;
+ std::string GetErrorLogs() override;
+
+ private:
+ // The name of the file in which the death test is located.
+ const char* const file_;
+ // The line number on which the death test is located.
+ const int line_;
+ // The stderr data captured by the child process.
+ std::string captured_stderr_;
+
+ zx::process child_process_;
+ zx::channel exception_channel_;
+ zx::socket stderr_socket_;
+};
+
+// Utility class for accumulating command-line arguments.
+class Arguments {
+ public:
+ Arguments() { args_.push_back(nullptr); }
+
+ ~Arguments() {
+ for (std::vector<char*>::iterator i = args_.begin(); i != args_.end();
+ ++i) {
+ free(*i);
+ }
+ }
+ void AddArgument(const char* argument) {
+ args_.insert(args_.end() - 1, posix::StrDup(argument));
+ }
+
+ template <typename Str>
+ void AddArguments(const ::std::vector<Str>& arguments) {
+ for (typename ::std::vector<Str>::const_iterator i = arguments.begin();
+ i != arguments.end(); ++i) {
+ args_.insert(args_.end() - 1, posix::StrDup(i->c_str()));
+ }
+ }
+ char* const* Argv() { return &args_[0]; }
+
+ int size() { return static_cast<int>(args_.size()) - 1; }
+
+ private:
+ std::vector<char*> args_;
+};
+
+// Waits for the child in a death test to exit, returning its exit
+// status, or 0 if no child process exists. As a side effect, sets the
+// outcome data member.
+int FuchsiaDeathTest::Wait() {
+ const int kProcessKey = 0;
+ const int kSocketKey = 1;
+ const int kExceptionKey = 2;
+
+ if (!spawned()) return 0;
+
+ // Create a port to wait for socket/task/exception events.
+ zx_status_t status_zx;
+ zx::port port;
+ status_zx = zx::port::create(0, &port);
+ GTEST_DEATH_TEST_CHECK_(status_zx == ZX_OK);
+
+ // Register to wait for the child process to terminate.
+ status_zx =
+ child_process_.wait_async(port, kProcessKey, ZX_PROCESS_TERMINATED, 0);
+ GTEST_DEATH_TEST_CHECK_(status_zx == ZX_OK);
+
+ // Register to wait for the socket to be readable or closed.
+ status_zx = stderr_socket_.wait_async(
+ port, kSocketKey, ZX_SOCKET_READABLE | ZX_SOCKET_PEER_CLOSED, 0);
+ GTEST_DEATH_TEST_CHECK_(status_zx == ZX_OK);
+
+ // Register to wait for an exception.
+ status_zx = exception_channel_.wait_async(port, kExceptionKey,
+ ZX_CHANNEL_READABLE, 0);
+ GTEST_DEATH_TEST_CHECK_(status_zx == ZX_OK);
+
+ bool process_terminated = false;
+ bool socket_closed = false;
+ do {
+ zx_port_packet_t packet = {};
+ status_zx = port.wait(zx::time::infinite(), &packet);
+ GTEST_DEATH_TEST_CHECK_(status_zx == ZX_OK);
+
+ if (packet.key == kExceptionKey) {
+ // Process encountered an exception. Kill it directly rather than
+ // letting other handlers process the event. We will get a kProcessKey
+ // event when the process actually terminates.
+ status_zx = child_process_.kill();
+ GTEST_DEATH_TEST_CHECK_(status_zx == ZX_OK);
+ } else if (packet.key == kProcessKey) {
+ // Process terminated.
+ GTEST_DEATH_TEST_CHECK_(ZX_PKT_IS_SIGNAL_ONE(packet.type));
+ GTEST_DEATH_TEST_CHECK_(packet.signal.observed & ZX_PROCESS_TERMINATED);
+ process_terminated = true;
+ } else if (packet.key == kSocketKey) {
+ GTEST_DEATH_TEST_CHECK_(ZX_PKT_IS_SIGNAL_ONE(packet.type));
+ if (packet.signal.observed & ZX_SOCKET_READABLE) {
+ // Read data from the socket.
+ constexpr size_t kBufferSize = 1024;
+ do {
+ size_t old_length = captured_stderr_.length();
+ size_t bytes_read = 0;
+ captured_stderr_.resize(old_length + kBufferSize);
+ status_zx =
+ stderr_socket_.read(0, &captured_stderr_.front() + old_length,
+ kBufferSize, &bytes_read);
+ captured_stderr_.resize(old_length + bytes_read);
+ } while (status_zx == ZX_OK);
+ if (status_zx == ZX_ERR_PEER_CLOSED) {
+ socket_closed = true;
+ } else {
+ GTEST_DEATH_TEST_CHECK_(status_zx == ZX_ERR_SHOULD_WAIT);
+ status_zx = stderr_socket_.wait_async(
+ port, kSocketKey, ZX_SOCKET_READABLE | ZX_SOCKET_PEER_CLOSED, 0);
+ GTEST_DEATH_TEST_CHECK_(status_zx == ZX_OK);
+ }
+ } else {
+ GTEST_DEATH_TEST_CHECK_(packet.signal.observed & ZX_SOCKET_PEER_CLOSED);
+ socket_closed = true;
+ }
+ }
+ } while (!process_terminated && !socket_closed);
+
+ ReadAndInterpretStatusByte();
+
+ zx_info_process_t buffer;
+ status_zx = child_process_.get_info(ZX_INFO_PROCESS, &buffer, sizeof(buffer),
+ nullptr, nullptr);
+ GTEST_DEATH_TEST_CHECK_(status_zx == ZX_OK);
+
+ GTEST_DEATH_TEST_CHECK_(buffer.flags & ZX_INFO_PROCESS_FLAG_EXITED);
+ set_status(static_cast<int>(buffer.return_code));
+ return status();
+}
+
+// The AssumeRole process for a Fuchsia death test. It creates a child
+// process with the same executable as the current process to run the
+// death test. The child process is given the --gtest_filter and
+// --gtest_internal_run_death_test flags such that it knows to run the
+// current death test only.
+DeathTest::TestRole FuchsiaDeathTest::AssumeRole() {
+ const UnitTestImpl* const impl = GetUnitTestImpl();
+ const InternalRunDeathTestFlag* const flag =
+ impl->internal_run_death_test_flag();
+ const TestInfo* const info = impl->current_test_info();
+ const int death_test_index = info->result()->death_test_count();
+
+ if (flag != nullptr) {
+ // ParseInternalRunDeathTestFlag() has performed all the necessary
+ // processing.
+ set_write_fd(kFuchsiaReadPipeFd);
+ return EXECUTE_TEST;
+ }
+
+ // Flush the log buffers since the log streams are shared with the child.
+ FlushInfoLog();
+
+ // Build the child process command line.
+ const std::string filter_flag = std::string("--") + GTEST_FLAG_PREFIX_ +
+ "filter=" + info->test_suite_name() + "." +
+ info->name();
+ const std::string internal_flag = std::string("--") + GTEST_FLAG_PREFIX_ +
+ kInternalRunDeathTestFlag + "=" + file_ +
+ "|" + StreamableToString(line_) + "|" +
+ StreamableToString(death_test_index);
+ Arguments args;
+ args.AddArguments(GetInjectableArgvs());
+ args.AddArgument(filter_flag.c_str());
+ args.AddArgument(internal_flag.c_str());
+
+ // Build the pipe for communication with the child.
+ zx_status_t status;
+ zx_handle_t child_pipe_handle;
+ int child_pipe_fd;
+ status = fdio_pipe_half(&child_pipe_fd, &child_pipe_handle);
+ GTEST_DEATH_TEST_CHECK_(status == ZX_OK);
+ set_read_fd(child_pipe_fd);
+
+ // Set the pipe handle for the child.
+ fdio_spawn_action_t spawn_actions[2] = {};
+ fdio_spawn_action_t* add_handle_action = &spawn_actions[0];
+ add_handle_action->action = FDIO_SPAWN_ACTION_ADD_HANDLE;
+ add_handle_action->h.id = PA_HND(PA_FD, kFuchsiaReadPipeFd);
+ add_handle_action->h.handle = child_pipe_handle;
+
+ // Create a socket pair will be used to receive the child process' stderr.
+ zx::socket stderr_producer_socket;
+ status = zx::socket::create(0, &stderr_producer_socket, &stderr_socket_);
+ GTEST_DEATH_TEST_CHECK_(status >= 0);
+ int stderr_producer_fd = -1;
+ status =
+ fdio_fd_create(stderr_producer_socket.release(), &stderr_producer_fd);
+ GTEST_DEATH_TEST_CHECK_(status >= 0);
+
+ // Make the stderr socket nonblocking.
+ GTEST_DEATH_TEST_CHECK_(fcntl(stderr_producer_fd, F_SETFL, 0) == 0);
+
+ fdio_spawn_action_t* add_stderr_action = &spawn_actions[1];
+ add_stderr_action->action = FDIO_SPAWN_ACTION_CLONE_FD;
+ add_stderr_action->fd.local_fd = stderr_producer_fd;
+ add_stderr_action->fd.target_fd = STDERR_FILENO;
+
+ // Create a child job.
+ zx_handle_t child_job = ZX_HANDLE_INVALID;
+ status = zx_job_create(zx_job_default(), 0, &child_job);
+ GTEST_DEATH_TEST_CHECK_(status == ZX_OK);
+ zx_policy_basic_t policy;
+ policy.condition = ZX_POL_NEW_ANY;
+ policy.policy = ZX_POL_ACTION_ALLOW;
+ status = zx_job_set_policy(child_job, ZX_JOB_POL_RELATIVE, ZX_JOB_POL_BASIC,
+ &policy, 1);
+ GTEST_DEATH_TEST_CHECK_(status == ZX_OK);
+
+ // Create an exception channel attached to the |child_job|, to allow
+ // us to suppress the system default exception handler from firing.
+ status = zx_task_create_exception_channel(
+ child_job, 0, exception_channel_.reset_and_get_address());
+ GTEST_DEATH_TEST_CHECK_(status == ZX_OK);
+
+ // Spawn the child process.
+ status = fdio_spawn_etc(child_job, FDIO_SPAWN_CLONE_ALL, args.Argv()[0],
+ args.Argv(), nullptr, 2, spawn_actions,
+ child_process_.reset_and_get_address(), nullptr);
+ GTEST_DEATH_TEST_CHECK_(status == ZX_OK);
+
+ set_spawned(true);
+ return OVERSEE_TEST;
+}
+
+std::string FuchsiaDeathTest::GetErrorLogs() { return captured_stderr_; }
+
+#else // We are neither on Windows, nor on Fuchsia.
+
+// ForkingDeathTest provides implementations for most of the abstract
+// methods of the DeathTest interface. Only the AssumeRole method is
+// left undefined.
+class ForkingDeathTest : public DeathTestImpl {
+ public:
+ ForkingDeathTest(const char* statement, Matcher<const std::string&> matcher);
+
+ // All of these virtual functions are inherited from DeathTest.
+ int Wait() override;
+
+ protected:
+ void set_child_pid(pid_t child_pid) { child_pid_ = child_pid; }
+
+ private:
+ // PID of child process during death test; 0 in the child process itself.
+ pid_t child_pid_;
+};
+
+// Constructs a ForkingDeathTest.
+ForkingDeathTest::ForkingDeathTest(const char* a_statement,
+ Matcher<const std::string&> matcher)
+ : DeathTestImpl(a_statement, std::move(matcher)), child_pid_(-1) {}
+
+// Waits for the child in a death test to exit, returning its exit
+// status, or 0 if no child process exists. As a side effect, sets the
+// outcome data member.
+int ForkingDeathTest::Wait() {
+ if (!spawned()) return 0;
+
+ ReadAndInterpretStatusByte();
+
+ int status_value;
+ GTEST_DEATH_TEST_CHECK_SYSCALL_(waitpid(child_pid_, &status_value, 0));
+ set_status(status_value);
+ return status_value;
+}
+
+// A concrete death test class that forks, then immediately runs the test
+// in the child process.
+class NoExecDeathTest : public ForkingDeathTest {
+ public:
+ NoExecDeathTest(const char* a_statement, Matcher<const std::string&> matcher)
+ : ForkingDeathTest(a_statement, std::move(matcher)) {}
+ TestRole AssumeRole() override;
+};
+
+// The AssumeRole process for a fork-and-run death test. It implements a
+// straightforward fork, with a simple pipe to transmit the status byte.
+DeathTest::TestRole NoExecDeathTest::AssumeRole() {
+ const size_t thread_count = GetThreadCount();
+ if (thread_count != 1) {
+ GTEST_LOG_(WARNING) << DeathTestThreadWarning(thread_count);
+ }
+
+ int pipe_fd[2];
+ GTEST_DEATH_TEST_CHECK_(pipe(pipe_fd) != -1);
+
+ DeathTest::set_last_death_test_message("");
+ CaptureStderr();
+ // When we fork the process below, the log file buffers are copied, but the
+ // file descriptors are shared. We flush all log files here so that closing
+ // the file descriptors in the child process doesn't throw off the
+ // synchronization between descriptors and buffers in the parent process.
+ // This is as close to the fork as possible to avoid a race condition in case
+ // there are multiple threads running before the death test, and another
+ // thread writes to the log file.
+ FlushInfoLog();
+
+ const pid_t child_pid = fork();
+ GTEST_DEATH_TEST_CHECK_(child_pid != -1);
+ set_child_pid(child_pid);
+ if (child_pid == 0) {
+ GTEST_DEATH_TEST_CHECK_SYSCALL_(close(pipe_fd[0]));
+ set_write_fd(pipe_fd[1]);
+ // Redirects all logging to stderr in the child process to prevent
+ // concurrent writes to the log files. We capture stderr in the parent
+ // process and append the child process' output to a log.
+ LogToStderr();
+ // Event forwarding to the listeners of event listener API mush be shut
+ // down in death test subprocesses.
+ GetUnitTestImpl()->listeners()->SuppressEventForwarding();
+ g_in_fast_death_test_child = true;
+ return EXECUTE_TEST;
+ } else {
+ GTEST_DEATH_TEST_CHECK_SYSCALL_(close(pipe_fd[1]));
+ set_read_fd(pipe_fd[0]);
+ set_spawned(true);
+ return OVERSEE_TEST;
+ }
+}
+
+// A concrete death test class that forks and re-executes the main
+// program from the beginning, with command-line flags set that cause
+// only this specific death test to be run.
+class ExecDeathTest : public ForkingDeathTest {
+ public:
+ ExecDeathTest(const char* a_statement, Matcher<const std::string&> matcher,
+ const char* file, int line)
+ : ForkingDeathTest(a_statement, std::move(matcher)),
+ file_(file),
+ line_(line) {}
+ TestRole AssumeRole() override;
+
+ private:
+ static ::std::vector<std::string> GetArgvsForDeathTestChildProcess() {
+ ::std::vector<std::string> args = GetInjectableArgvs();
+#if defined(GTEST_EXTRA_DEATH_TEST_COMMAND_LINE_ARGS_)
+ ::std::vector<std::string> extra_args =
+ GTEST_EXTRA_DEATH_TEST_COMMAND_LINE_ARGS_();
+ args.insert(args.end(), extra_args.begin(), extra_args.end());
+#endif // defined(GTEST_EXTRA_DEATH_TEST_COMMAND_LINE_ARGS_)
+ return args;
+ }
+ // The name of the file in which the death test is located.
+ const char* const file_;
+ // The line number on which the death test is located.
+ const int line_;
+};
+
+// Utility class for accumulating command-line arguments.
+class Arguments {
+ public:
+ Arguments() { args_.push_back(nullptr); }
+
+ ~Arguments() {
+ for (std::vector<char*>::iterator i = args_.begin(); i != args_.end();
+ ++i) {
+ free(*i);
+ }
+ }
+ void AddArgument(const char* argument) {
+ args_.insert(args_.end() - 1, posix::StrDup(argument));
+ }
+
+ template <typename Str>
+ void AddArguments(const ::std::vector<Str>& arguments) {
+ for (typename ::std::vector<Str>::const_iterator i = arguments.begin();
+ i != arguments.end(); ++i) {
+ args_.insert(args_.end() - 1, posix::StrDup(i->c_str()));
+ }
+ }
+ char* const* Argv() { return &args_[0]; }
+
+ private:
+ std::vector<char*> args_;
+};
+
+// A struct that encompasses the arguments to the child process of a
+// threadsafe-style death test process.
+struct ExecDeathTestArgs {
+ char* const* argv; // Command-line arguments for the child's call to exec
+ int close_fd; // File descriptor to close; the read end of a pipe
+};
+
+#if GTEST_OS_QNX
+extern "C" char** environ;
+#else // GTEST_OS_QNX
+// The main function for a threadsafe-style death test child process.
+// This function is called in a clone()-ed process and thus must avoid
+// any potentially unsafe operations like malloc or libc functions.
+static int ExecDeathTestChildMain(void* child_arg) {
+ ExecDeathTestArgs* const args = static_cast<ExecDeathTestArgs*>(child_arg);
+ GTEST_DEATH_TEST_CHECK_SYSCALL_(close(args->close_fd));
+
+ // We need to execute the test program in the same environment where
+ // it was originally invoked. Therefore we change to the original
+ // working directory first.
+ const char* const original_dir =
+ UnitTest::GetInstance()->original_working_dir();
+ // We can safely call chdir() as it's a direct system call.
+ if (chdir(original_dir) != 0) {
+ DeathTestAbort(std::string("chdir(\"") + original_dir +
+ "\") failed: " + GetLastErrnoDescription());
+ return EXIT_FAILURE;
+ }
+
+ // We can safely call execv() as it's almost a direct system call. We
+ // cannot use execvp() as it's a libc function and thus potentially
+ // unsafe. Since execv() doesn't search the PATH, the user must
+ // invoke the test program via a valid path that contains at least
+ // one path separator.
+ execv(args->argv[0], args->argv);
+ DeathTestAbort(std::string("execv(") + args->argv[0] + ", ...) in " +
+ original_dir + " failed: " + GetLastErrnoDescription());
+ return EXIT_FAILURE;
+}
+#endif // GTEST_OS_QNX
+
+#if GTEST_HAS_CLONE
+// Two utility routines that together determine the direction the stack
+// grows.
+// This could be accomplished more elegantly by a single recursive
+// function, but we want to guard against the unlikely possibility of
+// a smart compiler optimizing the recursion away.
+//
+// GTEST_NO_INLINE_ is required to prevent GCC 4.6 from inlining
+// StackLowerThanAddress into StackGrowsDown, which then doesn't give
+// correct answer.
+static void StackLowerThanAddress(const void* ptr,
+ bool* result) GTEST_NO_INLINE_;
+// Make sure sanitizers do not tamper with the stack here.
+// Ideally, we want to use `__builtin_frame_address` instead of a local variable
+// address with sanitizer disabled, but it does not work when the
+// compiler optimizes the stack frame out, which happens on PowerPC targets.
+// HWAddressSanitizer add a random tag to the MSB of the local variable address,
+// making comparison result unpredictable.
+GTEST_ATTRIBUTE_NO_SANITIZE_ADDRESS_
+GTEST_ATTRIBUTE_NO_SANITIZE_HWADDRESS_
+static void StackLowerThanAddress(const void* ptr, bool* result) {
+ int dummy = 0;
+ *result = std::less<const void*>()(&dummy, ptr);
+}
+
+// Make sure AddressSanitizer does not tamper with the stack here.
+GTEST_ATTRIBUTE_NO_SANITIZE_ADDRESS_
+GTEST_ATTRIBUTE_NO_SANITIZE_HWADDRESS_
+static bool StackGrowsDown() {
+ int dummy = 0;
+ bool result;
+ StackLowerThanAddress(&dummy, &result);
+ return result;
+}
+#endif // GTEST_HAS_CLONE
+
+// Spawns a child process with the same executable as the current process in
+// a thread-safe manner and instructs it to run the death test. The
+// implementation uses fork(2) + exec. On systems where clone(2) is
+// available, it is used instead, being slightly more thread-safe. On QNX,
+// fork supports only single-threaded environments, so this function uses
+// spawn(2) there instead. The function dies with an error message if
+// anything goes wrong.
+static pid_t ExecDeathTestSpawnChild(char* const* argv, int close_fd) {
+ ExecDeathTestArgs args = {argv, close_fd};
+ pid_t child_pid = -1;
+
+#if GTEST_OS_QNX
+ // Obtains the current directory and sets it to be closed in the child
+ // process.
+ const int cwd_fd = open(".", O_RDONLY);
+ GTEST_DEATH_TEST_CHECK_(cwd_fd != -1);
+ GTEST_DEATH_TEST_CHECK_SYSCALL_(fcntl(cwd_fd, F_SETFD, FD_CLOEXEC));
+ // We need to execute the test program in the same environment where
+ // it was originally invoked. Therefore we change to the original
+ // working directory first.
+ const char* const original_dir =
+ UnitTest::GetInstance()->original_working_dir();
+ // We can safely call chdir() as it's a direct system call.
+ if (chdir(original_dir) != 0) {
+ DeathTestAbort(std::string("chdir(\"") + original_dir +
+ "\") failed: " + GetLastErrnoDescription());
+ return EXIT_FAILURE;
+ }
+
+ int fd_flags;
+ // Set close_fd to be closed after spawn.
+ GTEST_DEATH_TEST_CHECK_SYSCALL_(fd_flags = fcntl(close_fd, F_GETFD));
+ GTEST_DEATH_TEST_CHECK_SYSCALL_(
+ fcntl(close_fd, F_SETFD, fd_flags | FD_CLOEXEC));
+ struct inheritance inherit = {0};
+ // spawn is a system call.
+ child_pid = spawn(args.argv[0], 0, nullptr, &inherit, args.argv, environ);
+ // Restores the current working directory.
+ GTEST_DEATH_TEST_CHECK_(fchdir(cwd_fd) != -1);
+ GTEST_DEATH_TEST_CHECK_SYSCALL_(close(cwd_fd));
+
+#else // GTEST_OS_QNX
+#if GTEST_OS_LINUX
+ // When a SIGPROF signal is received while fork() or clone() are executing,
+ // the process may hang. To avoid this, we ignore SIGPROF here and re-enable
+ // it after the call to fork()/clone() is complete.
+ struct sigaction saved_sigprof_action;
+ struct sigaction ignore_sigprof_action;
+ memset(&ignore_sigprof_action, 0, sizeof(ignore_sigprof_action));
+ sigemptyset(&ignore_sigprof_action.sa_mask);
+ ignore_sigprof_action.sa_handler = SIG_IGN;
+ GTEST_DEATH_TEST_CHECK_SYSCALL_(
+ sigaction(SIGPROF, &ignore_sigprof_action, &saved_sigprof_action));
+#endif // GTEST_OS_LINUX
+
+#if GTEST_HAS_CLONE
+ const bool use_fork = GTEST_FLAG_GET(death_test_use_fork);
+
+ if (!use_fork) {
+ static const bool stack_grows_down = StackGrowsDown();
+ const auto stack_size = static_cast<size_t>(getpagesize() * 2);
+ // MMAP_ANONYMOUS is not defined on Mac, so we use MAP_ANON instead.
+ void* const stack = mmap(nullptr, stack_size, PROT_READ | PROT_WRITE,
+ MAP_ANON | MAP_PRIVATE, -1, 0);
+ GTEST_DEATH_TEST_CHECK_(stack != MAP_FAILED);
+
+ // Maximum stack alignment in bytes: For a downward-growing stack, this
+ // amount is subtracted from size of the stack space to get an address
+ // that is within the stack space and is aligned on all systems we care
+ // about. As far as I know there is no ABI with stack alignment greater
+ // than 64. We assume stack and stack_size already have alignment of
+ // kMaxStackAlignment.
+ const size_t kMaxStackAlignment = 64;
+ void* const stack_top =
+ static_cast<char*>(stack) +
+ (stack_grows_down ? stack_size - kMaxStackAlignment : 0);
+ GTEST_DEATH_TEST_CHECK_(
+ static_cast<size_t>(stack_size) > kMaxStackAlignment &&
+ reinterpret_cast<uintptr_t>(stack_top) % kMaxStackAlignment == 0);
+
+ child_pid = clone(&ExecDeathTestChildMain, stack_top, SIGCHLD, &args);
+
+ GTEST_DEATH_TEST_CHECK_(munmap(stack, stack_size) != -1);
+ }
+#else
+ const bool use_fork = true;
+#endif // GTEST_HAS_CLONE
+
+ if (use_fork && (child_pid = fork()) == 0) {
+ ExecDeathTestChildMain(&args);
+ _exit(0);
+ }
+#endif // GTEST_OS_QNX
+#if GTEST_OS_LINUX
+ GTEST_DEATH_TEST_CHECK_SYSCALL_(
+ sigaction(SIGPROF, &saved_sigprof_action, nullptr));
+#endif // GTEST_OS_LINUX
+
+ GTEST_DEATH_TEST_CHECK_(child_pid != -1);
+ return child_pid;
+}
+
+// The AssumeRole process for a fork-and-exec death test. It re-executes the
+// main program from the beginning, setting the --gtest_filter
+// and --gtest_internal_run_death_test flags to cause only the current
+// death test to be re-run.
+DeathTest::TestRole ExecDeathTest::AssumeRole() {
+ const UnitTestImpl* const impl = GetUnitTestImpl();
+ const InternalRunDeathTestFlag* const flag =
+ impl->internal_run_death_test_flag();
+ const TestInfo* const info = impl->current_test_info();
+ const int death_test_index = info->result()->death_test_count();
+
+ if (flag != nullptr) {
+ set_write_fd(flag->write_fd());
+ return EXECUTE_TEST;
+ }
+
+ int pipe_fd[2];
+ GTEST_DEATH_TEST_CHECK_(pipe(pipe_fd) != -1);
+ // Clear the close-on-exec flag on the write end of the pipe, lest
+ // it be closed when the child process does an exec:
+ GTEST_DEATH_TEST_CHECK_(fcntl(pipe_fd[1], F_SETFD, 0) != -1);
+
+ const std::string filter_flag = std::string("--") + GTEST_FLAG_PREFIX_ +
+ "filter=" + info->test_suite_name() + "." +
+ info->name();
+ const std::string internal_flag = std::string("--") + GTEST_FLAG_PREFIX_ +
+ "internal_run_death_test=" + file_ + "|" +
+ StreamableToString(line_) + "|" +
+ StreamableToString(death_test_index) + "|" +
+ StreamableToString(pipe_fd[1]);
+ Arguments args;
+ args.AddArguments(GetArgvsForDeathTestChildProcess());
+ args.AddArgument(filter_flag.c_str());
+ args.AddArgument(internal_flag.c_str());
+
+ DeathTest::set_last_death_test_message("");
+
+ CaptureStderr();
+ // See the comment in NoExecDeathTest::AssumeRole for why the next line
+ // is necessary.
+ FlushInfoLog();
+
+ const pid_t child_pid = ExecDeathTestSpawnChild(args.Argv(), pipe_fd[0]);
+ GTEST_DEATH_TEST_CHECK_SYSCALL_(close(pipe_fd[1]));
+ set_child_pid(child_pid);
+ set_read_fd(pipe_fd[0]);
+ set_spawned(true);
+ return OVERSEE_TEST;
+}
+
+#endif // !GTEST_OS_WINDOWS
+
+// Creates a concrete DeathTest-derived class that depends on the
+// --gtest_death_test_style flag, and sets the pointer pointed to
+// by the "test" argument to its address. If the test should be
+// skipped, sets that pointer to NULL. Returns true, unless the
+// flag is set to an invalid value.
+bool DefaultDeathTestFactory::Create(const char* statement,
+ Matcher<const std::string&> matcher,
+ const char* file, int line,
+ DeathTest** test) {
+ UnitTestImpl* const impl = GetUnitTestImpl();
+ const InternalRunDeathTestFlag* const flag =
+ impl->internal_run_death_test_flag();
+ const int death_test_index =
+ impl->current_test_info()->increment_death_test_count();
+
+ if (flag != nullptr) {
+ if (death_test_index > flag->index()) {
+ DeathTest::set_last_death_test_message(
+ "Death test count (" + StreamableToString(death_test_index) +
+ ") somehow exceeded expected maximum (" +
+ StreamableToString(flag->index()) + ")");
+ return false;
+ }
+
+ if (!(flag->file() == file && flag->line() == line &&
+ flag->index() == death_test_index)) {
+ *test = nullptr;
+ return true;
+ }
+ }
+
+#if GTEST_OS_WINDOWS
+
+ if (GTEST_FLAG_GET(death_test_style) == "threadsafe" ||
+ GTEST_FLAG_GET(death_test_style) == "fast") {
+ *test = new WindowsDeathTest(statement, std::move(matcher), file, line);
+ }
+
+#elif GTEST_OS_FUCHSIA
+
+ if (GTEST_FLAG_GET(death_test_style) == "threadsafe" ||
+ GTEST_FLAG_GET(death_test_style) == "fast") {
+ *test = new FuchsiaDeathTest(statement, std::move(matcher), file, line);
+ }
+
+#else
+
+ if (GTEST_FLAG_GET(death_test_style) == "threadsafe") {
+ *test = new ExecDeathTest(statement, std::move(matcher), file, line);
+ } else if (GTEST_FLAG_GET(death_test_style) == "fast") {
+ *test = new NoExecDeathTest(statement, std::move(matcher));
+ }
+
+#endif // GTEST_OS_WINDOWS
+
+ else { // NOLINT - this is more readable than unbalanced brackets inside #if.
+ DeathTest::set_last_death_test_message("Unknown death test style \"" +
+ GTEST_FLAG_GET(death_test_style) +
+ "\" encountered");
+ return false;
+ }
+
+ return true;
+}
+
+#if GTEST_OS_WINDOWS
+// Recreates the pipe and event handles from the provided parameters,
+// signals the event, and returns a file descriptor wrapped around the pipe
+// handle. This function is called in the child process only.
+static int GetStatusFileDescriptor(unsigned int parent_process_id,
+ size_t write_handle_as_size_t,
+ size_t event_handle_as_size_t) {
+ AutoHandle parent_process_handle(::OpenProcess(PROCESS_DUP_HANDLE,
+ FALSE, // Non-inheritable.
+ parent_process_id));
+ if (parent_process_handle.Get() == INVALID_HANDLE_VALUE) {
+ DeathTestAbort("Unable to open parent process " +
+ StreamableToString(parent_process_id));
+ }
+
+ GTEST_CHECK_(sizeof(HANDLE) <= sizeof(size_t));
+
+ const HANDLE write_handle = reinterpret_cast<HANDLE>(write_handle_as_size_t);
+ HANDLE dup_write_handle;
+
+ // The newly initialized handle is accessible only in the parent
+ // process. To obtain one accessible within the child, we need to use
+ // DuplicateHandle.
+ if (!::DuplicateHandle(parent_process_handle.Get(), write_handle,
+ ::GetCurrentProcess(), &dup_write_handle,
+ 0x0, // Requested privileges ignored since
+ // DUPLICATE_SAME_ACCESS is used.
+ FALSE, // Request non-inheritable handler.
+ DUPLICATE_SAME_ACCESS)) {
+ DeathTestAbort("Unable to duplicate the pipe handle " +
+ StreamableToString(write_handle_as_size_t) +
+ " from the parent process " +
+ StreamableToString(parent_process_id));
+ }
+
+ const HANDLE event_handle = reinterpret_cast<HANDLE>(event_handle_as_size_t);
+ HANDLE dup_event_handle;
+
+ if (!::DuplicateHandle(parent_process_handle.Get(), event_handle,
+ ::GetCurrentProcess(), &dup_event_handle, 0x0, FALSE,
+ DUPLICATE_SAME_ACCESS)) {
+ DeathTestAbort("Unable to duplicate the event handle " +
+ StreamableToString(event_handle_as_size_t) +
+ " from the parent process " +
+ StreamableToString(parent_process_id));
+ }
+
+ const int write_fd =
+ ::_open_osfhandle(reinterpret_cast<intptr_t>(dup_write_handle), O_APPEND);
+ if (write_fd == -1) {
+ DeathTestAbort("Unable to convert pipe handle " +
+ StreamableToString(write_handle_as_size_t) +
+ " to a file descriptor");
+ }
+
+ // Signals the parent that the write end of the pipe has been acquired
+ // so the parent can release its own write end.
+ ::SetEvent(dup_event_handle);
+
+ return write_fd;
+}
+#endif // GTEST_OS_WINDOWS
+
+// Returns a newly created InternalRunDeathTestFlag object with fields
+// initialized from the GTEST_FLAG(internal_run_death_test) flag if
+// the flag is specified; otherwise returns NULL.
+InternalRunDeathTestFlag* ParseInternalRunDeathTestFlag() {
+ if (GTEST_FLAG_GET(internal_run_death_test) == "") return nullptr;
+
+ // GTEST_HAS_DEATH_TEST implies that we have ::std::string, so we
+ // can use it here.
+ int line = -1;
+ int index = -1;
+ ::std::vector< ::std::string> fields;
+ SplitString(GTEST_FLAG_GET(internal_run_death_test), '|', &fields);
+ int write_fd = -1;
+
+#if GTEST_OS_WINDOWS
+
+ unsigned int parent_process_id = 0;
+ size_t write_handle_as_size_t = 0;
+ size_t event_handle_as_size_t = 0;
+
+ if (fields.size() != 6 || !ParseNaturalNumber(fields[1], &line) ||
+ !ParseNaturalNumber(fields[2], &index) ||
+ !ParseNaturalNumber(fields[3], &parent_process_id) ||
+ !ParseNaturalNumber(fields[4], &write_handle_as_size_t) ||
+ !ParseNaturalNumber(fields[5], &event_handle_as_size_t)) {
+ DeathTestAbort("Bad --gtest_internal_run_death_test flag: " +
+ GTEST_FLAG_GET(internal_run_death_test));
+ }
+ write_fd = GetStatusFileDescriptor(parent_process_id, write_handle_as_size_t,
+ event_handle_as_size_t);
+
+#elif GTEST_OS_FUCHSIA
+
+ if (fields.size() != 3 || !ParseNaturalNumber(fields[1], &line) ||
+ !ParseNaturalNumber(fields[2], &index)) {
+ DeathTestAbort("Bad --gtest_internal_run_death_test flag: " +
+ GTEST_FLAG_GET(internal_run_death_test));
+ }
+
+#else
+
+ if (fields.size() != 4 || !ParseNaturalNumber(fields[1], &line) ||
+ !ParseNaturalNumber(fields[2], &index) ||
+ !ParseNaturalNumber(fields[3], &write_fd)) {
+ DeathTestAbort("Bad --gtest_internal_run_death_test flag: " +
+ GTEST_FLAG_GET(internal_run_death_test));
+ }
+
+#endif // GTEST_OS_WINDOWS
+
+ return new InternalRunDeathTestFlag(fields[0], line, index, write_fd);
+}
+
+} // namespace internal
+
+#endif // GTEST_HAS_DEATH_TEST
+
+} // namespace testing
diff --git a/media/libvpx/libvpx/third_party/googletest/src/src/gtest-filepath.cc b/media/libvpx/libvpx/third_party/googletest/src/src/gtest-filepath.cc
new file mode 100644
index 0000000000..f6ee90cdb7
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/googletest/src/src/gtest-filepath.cc
@@ -0,0 +1,367 @@
+// Copyright 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "gtest/internal/gtest-filepath.h"
+
+#include <stdlib.h>
+
+#include "gtest/gtest-message.h"
+#include "gtest/internal/gtest-port.h"
+
+#if GTEST_OS_WINDOWS_MOBILE
+#include <windows.h>
+#elif GTEST_OS_WINDOWS
+#include <direct.h>
+#include <io.h>
+#else
+#include <limits.h>
+
+#include <climits> // Some Linux distributions define PATH_MAX here.
+#endif // GTEST_OS_WINDOWS_MOBILE
+
+#include "gtest/internal/gtest-string.h"
+
+#if GTEST_OS_WINDOWS
+#define GTEST_PATH_MAX_ _MAX_PATH
+#elif defined(PATH_MAX)
+#define GTEST_PATH_MAX_ PATH_MAX
+#elif defined(_XOPEN_PATH_MAX)
+#define GTEST_PATH_MAX_ _XOPEN_PATH_MAX
+#else
+#define GTEST_PATH_MAX_ _POSIX_PATH_MAX
+#endif // GTEST_OS_WINDOWS
+
+namespace testing {
+namespace internal {
+
+#if GTEST_OS_WINDOWS
+// On Windows, '\\' is the standard path separator, but many tools and the
+// Windows API also accept '/' as an alternate path separator. Unless otherwise
+// noted, a file path can contain either kind of path separators, or a mixture
+// of them.
+const char kPathSeparator = '\\';
+const char kAlternatePathSeparator = '/';
+const char kAlternatePathSeparatorString[] = "/";
+#if GTEST_OS_WINDOWS_MOBILE
+// Windows CE doesn't have a current directory. You should not use
+// the current directory in tests on Windows CE, but this at least
+// provides a reasonable fallback.
+const char kCurrentDirectoryString[] = "\\";
+// Windows CE doesn't define INVALID_FILE_ATTRIBUTES
+const DWORD kInvalidFileAttributes = 0xffffffff;
+#else
+const char kCurrentDirectoryString[] = ".\\";
+#endif // GTEST_OS_WINDOWS_MOBILE
+#else
+const char kPathSeparator = '/';
+const char kCurrentDirectoryString[] = "./";
+#endif // GTEST_OS_WINDOWS
+
+// Returns whether the given character is a valid path separator.
+static bool IsPathSeparator(char c) {
+#if GTEST_HAS_ALT_PATH_SEP_
+ return (c == kPathSeparator) || (c == kAlternatePathSeparator);
+#else
+ return c == kPathSeparator;
+#endif
+}
+
+// Returns the current working directory, or "" if unsuccessful.
+FilePath FilePath::GetCurrentDir() {
+#if GTEST_OS_WINDOWS_MOBILE || GTEST_OS_WINDOWS_PHONE || \
+ GTEST_OS_WINDOWS_RT || GTEST_OS_ESP8266 || GTEST_OS_ESP32 || \
+ GTEST_OS_XTENSA
+ // These platforms do not have a current directory, so we just return
+ // something reasonable.
+ return FilePath(kCurrentDirectoryString);
+#elif GTEST_OS_WINDOWS
+ char cwd[GTEST_PATH_MAX_ + 1] = {'\0'};
+ return FilePath(_getcwd(cwd, sizeof(cwd)) == nullptr ? "" : cwd);
+#else
+ char cwd[GTEST_PATH_MAX_ + 1] = {'\0'};
+ char* result = getcwd(cwd, sizeof(cwd));
+#if GTEST_OS_NACL
+ // getcwd will likely fail in NaCl due to the sandbox, so return something
+ // reasonable. The user may have provided a shim implementation for getcwd,
+ // however, so fallback only when failure is detected.
+ return FilePath(result == nullptr ? kCurrentDirectoryString : cwd);
+#endif // GTEST_OS_NACL
+ return FilePath(result == nullptr ? "" : cwd);
+#endif // GTEST_OS_WINDOWS_MOBILE
+}
+
+// Returns a copy of the FilePath with the case-insensitive extension removed.
+// Example: FilePath("dir/file.exe").RemoveExtension("EXE") returns
+// FilePath("dir/file"). If a case-insensitive extension is not
+// found, returns a copy of the original FilePath.
+FilePath FilePath::RemoveExtension(const char* extension) const {
+ const std::string dot_extension = std::string(".") + extension;
+ if (String::EndsWithCaseInsensitive(pathname_, dot_extension)) {
+ return FilePath(
+ pathname_.substr(0, pathname_.length() - dot_extension.length()));
+ }
+ return *this;
+}
+
+// Returns a pointer to the last occurrence of a valid path separator in
+// the FilePath. On Windows, for example, both '/' and '\' are valid path
+// separators. Returns NULL if no path separator was found.
+const char* FilePath::FindLastPathSeparator() const {
+ const char* const last_sep = strrchr(c_str(), kPathSeparator);
+#if GTEST_HAS_ALT_PATH_SEP_
+ const char* const last_alt_sep = strrchr(c_str(), kAlternatePathSeparator);
+ // Comparing two pointers of which only one is NULL is undefined.
+ if (last_alt_sep != nullptr &&
+ (last_sep == nullptr || last_alt_sep > last_sep)) {
+ return last_alt_sep;
+ }
+#endif
+ return last_sep;
+}
+
+// Returns a copy of the FilePath with the directory part removed.
+// Example: FilePath("path/to/file").RemoveDirectoryName() returns
+// FilePath("file"). If there is no directory part ("just_a_file"), it returns
+// the FilePath unmodified. If there is no file part ("just_a_dir/") it
+// returns an empty FilePath ("").
+// On Windows platform, '\' is the path separator, otherwise it is '/'.
+FilePath FilePath::RemoveDirectoryName() const {
+ const char* const last_sep = FindLastPathSeparator();
+ return last_sep ? FilePath(last_sep + 1) : *this;
+}
+
+// RemoveFileName returns the directory path with the filename removed.
+// Example: FilePath("path/to/file").RemoveFileName() returns "path/to/".
+// If the FilePath is "a_file" or "/a_file", RemoveFileName returns
+// FilePath("./") or, on Windows, FilePath(".\\"). If the filepath does
+// not have a file, like "just/a/dir/", it returns the FilePath unmodified.
+// On Windows platform, '\' is the path separator, otherwise it is '/'.
+FilePath FilePath::RemoveFileName() const {
+ const char* const last_sep = FindLastPathSeparator();
+ std::string dir;
+ if (last_sep) {
+ dir = std::string(c_str(), static_cast<size_t>(last_sep + 1 - c_str()));
+ } else {
+ dir = kCurrentDirectoryString;
+ }
+ return FilePath(dir);
+}
+
+// Helper functions for naming files in a directory for xml output.
+
+// Given directory = "dir", base_name = "test", number = 0,
+// extension = "xml", returns "dir/test.xml". If number is greater
+// than zero (e.g., 12), returns "dir/test_12.xml".
+// On Windows platform, uses \ as the separator rather than /.
+FilePath FilePath::MakeFileName(const FilePath& directory,
+ const FilePath& base_name, int number,
+ const char* extension) {
+ std::string file;
+ if (number == 0) {
+ file = base_name.string() + "." + extension;
+ } else {
+ file =
+ base_name.string() + "_" + StreamableToString(number) + "." + extension;
+ }
+ return ConcatPaths(directory, FilePath(file));
+}
+
+// Given directory = "dir", relative_path = "test.xml", returns "dir/test.xml".
+// On Windows, uses \ as the separator rather than /.
+FilePath FilePath::ConcatPaths(const FilePath& directory,
+ const FilePath& relative_path) {
+ if (directory.IsEmpty()) return relative_path;
+ const FilePath dir(directory.RemoveTrailingPathSeparator());
+ return FilePath(dir.string() + kPathSeparator + relative_path.string());
+}
+
+// Returns true if pathname describes something findable in the file-system,
+// either a file, directory, or whatever.
+bool FilePath::FileOrDirectoryExists() const {
+#if GTEST_OS_WINDOWS_MOBILE
+ LPCWSTR unicode = String::AnsiToUtf16(pathname_.c_str());
+ const DWORD attributes = GetFileAttributes(unicode);
+ delete[] unicode;
+ return attributes != kInvalidFileAttributes;
+#else
+ posix::StatStruct file_stat{};
+ return posix::Stat(pathname_.c_str(), &file_stat) == 0;
+#endif // GTEST_OS_WINDOWS_MOBILE
+}
+
+// Returns true if pathname describes a directory in the file-system
+// that exists.
+bool FilePath::DirectoryExists() const {
+ bool result = false;
+#if GTEST_OS_WINDOWS
+ // Don't strip off trailing separator if path is a root directory on
+ // Windows (like "C:\\").
+ const FilePath& path(IsRootDirectory() ? *this
+ : RemoveTrailingPathSeparator());
+#else
+ const FilePath& path(*this);
+#endif
+
+#if GTEST_OS_WINDOWS_MOBILE
+ LPCWSTR unicode = String::AnsiToUtf16(path.c_str());
+ const DWORD attributes = GetFileAttributes(unicode);
+ delete[] unicode;
+ if ((attributes != kInvalidFileAttributes) &&
+ (attributes & FILE_ATTRIBUTE_DIRECTORY)) {
+ result = true;
+ }
+#else
+ posix::StatStruct file_stat{};
+ result =
+ posix::Stat(path.c_str(), &file_stat) == 0 && posix::IsDir(file_stat);
+#endif // GTEST_OS_WINDOWS_MOBILE
+
+ return result;
+}
+
+// Returns true if pathname describes a root directory. (Windows has one
+// root directory per disk drive.)
+bool FilePath::IsRootDirectory() const {
+#if GTEST_OS_WINDOWS
+ return pathname_.length() == 3 && IsAbsolutePath();
+#else
+ return pathname_.length() == 1 && IsPathSeparator(pathname_.c_str()[0]);
+#endif
+}
+
+// Returns true if pathname describes an absolute path.
+bool FilePath::IsAbsolutePath() const {
+ const char* const name = pathname_.c_str();
+#if GTEST_OS_WINDOWS
+ return pathname_.length() >= 3 &&
+ ((name[0] >= 'a' && name[0] <= 'z') ||
+ (name[0] >= 'A' && name[0] <= 'Z')) &&
+ name[1] == ':' && IsPathSeparator(name[2]);
+#else
+ return IsPathSeparator(name[0]);
+#endif
+}
+
+// Returns a pathname for a file that does not currently exist. The pathname
+// will be directory/base_name.extension or
+// directory/base_name_<number>.extension if directory/base_name.extension
+// already exists. The number will be incremented until a pathname is found
+// that does not already exist.
+// Examples: 'dir/foo_test.xml' or 'dir/foo_test_1.xml'.
+// There could be a race condition if two or more processes are calling this
+// function at the same time -- they could both pick the same filename.
+FilePath FilePath::GenerateUniqueFileName(const FilePath& directory,
+ const FilePath& base_name,
+ const char* extension) {
+ FilePath full_pathname;
+ int number = 0;
+ do {
+ full_pathname.Set(MakeFileName(directory, base_name, number++, extension));
+ } while (full_pathname.FileOrDirectoryExists());
+ return full_pathname;
+}
+
+// Returns true if FilePath ends with a path separator, which indicates that
+// it is intended to represent a directory. Returns false otherwise.
+// This does NOT check that a directory (or file) actually exists.
+bool FilePath::IsDirectory() const {
+ return !pathname_.empty() &&
+ IsPathSeparator(pathname_.c_str()[pathname_.length() - 1]);
+}
+
+// Create directories so that path exists. Returns true if successful or if
+// the directories already exist; returns false if unable to create directories
+// for any reason.
+bool FilePath::CreateDirectoriesRecursively() const {
+ if (!this->IsDirectory()) {
+ return false;
+ }
+
+ if (pathname_.length() == 0 || this->DirectoryExists()) {
+ return true;
+ }
+
+ const FilePath parent(this->RemoveTrailingPathSeparator().RemoveFileName());
+ return parent.CreateDirectoriesRecursively() && this->CreateFolder();
+}
+
+// Create the directory so that path exists. Returns true if successful or
+// if the directory already exists; returns false if unable to create the
+// directory for any reason, including if the parent directory does not
+// exist. Not named "CreateDirectory" because that's a macro on Windows.
+bool FilePath::CreateFolder() const {
+#if GTEST_OS_WINDOWS_MOBILE
+ FilePath removed_sep(this->RemoveTrailingPathSeparator());
+ LPCWSTR unicode = String::AnsiToUtf16(removed_sep.c_str());
+ int result = CreateDirectory(unicode, nullptr) ? 0 : -1;
+ delete[] unicode;
+#elif GTEST_OS_WINDOWS
+ int result = _mkdir(pathname_.c_str());
+#elif GTEST_OS_ESP8266 || GTEST_OS_XTENSA
+ // do nothing
+ int result = 0;
+#else
+ int result = mkdir(pathname_.c_str(), 0777);
+#endif // GTEST_OS_WINDOWS_MOBILE
+
+ if (result == -1) {
+ return this->DirectoryExists(); // An error is OK if the directory exists.
+ }
+ return true; // No error.
+}
+
+// If input name has a trailing separator character, remove it and return the
+// name, otherwise return the name string unmodified.
+// On Windows platform, uses \ as the separator, other platforms use /.
+FilePath FilePath::RemoveTrailingPathSeparator() const {
+ return IsDirectory() ? FilePath(pathname_.substr(0, pathname_.length() - 1))
+ : *this;
+}
+
+// Removes any redundant separators that might be in the pathname.
+// For example, "bar///foo" becomes "bar/foo". Does not eliminate other
+// redundancies that might be in a pathname involving "." or "..".
+void FilePath::Normalize() {
+ auto out = pathname_.begin();
+
+ for (const char character : pathname_) {
+ if (!IsPathSeparator(character)) {
+ *(out++) = character;
+ } else if (out == pathname_.begin() || *std::prev(out) != kPathSeparator) {
+ *(out++) = kPathSeparator;
+ } else {
+ continue;
+ }
+ }
+
+ pathname_.erase(out, pathname_.end());
+}
+
+} // namespace internal
+} // namespace testing
diff --git a/media/libvpx/libvpx/third_party/googletest/src/src/gtest-internal-inl.h b/media/libvpx/libvpx/third_party/googletest/src/src/gtest-internal-inl.h
new file mode 100644
index 0000000000..0b9e929c68
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/googletest/src/src/gtest-internal-inl.h
@@ -0,0 +1,1212 @@
+// Copyright 2005, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Utility functions and classes used by the Google C++ testing framework.//
+// This file contains purely Google Test's internal implementation. Please
+// DO NOT #INCLUDE IT IN A USER PROGRAM.
+
+#ifndef GOOGLETEST_SRC_GTEST_INTERNAL_INL_H_
+#define GOOGLETEST_SRC_GTEST_INTERNAL_INL_H_
+
+#ifndef _WIN32_WCE
+#include <errno.h>
+#endif // !_WIN32_WCE
+#include <stddef.h>
+#include <stdlib.h> // For strtoll/_strtoul64/malloc/free.
+#include <string.h> // For memmove.
+
+#include <algorithm>
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "gtest/internal/gtest-port.h"
+
+#if GTEST_CAN_STREAM_RESULTS_
+#include <arpa/inet.h> // NOLINT
+#include <netdb.h> // NOLINT
+#endif
+
+#if GTEST_OS_WINDOWS
+#include <windows.h> // NOLINT
+#endif // GTEST_OS_WINDOWS
+
+#include "gtest/gtest-spi.h"
+#include "gtest/gtest.h"
+
+GTEST_DISABLE_MSC_WARNINGS_PUSH_(4251 \
+/* class A needs to have dll-interface to be used by clients of class B */)
+
+// Declares the flags.
+//
+// We don't want the users to modify this flag in the code, but want
+// Google Test's own unit tests to be able to access it. Therefore we
+// declare it here as opposed to in gtest.h.
+GTEST_DECLARE_bool_(death_test_use_fork);
+
+namespace testing {
+namespace internal {
+
+// The value of GetTestTypeId() as seen from within the Google Test
+// library. This is solely for testing GetTestTypeId().
+GTEST_API_ extern const TypeId kTestTypeIdInGoogleTest;
+
+// A valid random seed must be in [1, kMaxRandomSeed].
+const int kMaxRandomSeed = 99999;
+
+// g_help_flag is true if and only if the --help flag or an equivalent form
+// is specified on the command line.
+GTEST_API_ extern bool g_help_flag;
+
+// Returns the current time in milliseconds.
+GTEST_API_ TimeInMillis GetTimeInMillis();
+
+// Returns true if and only if Google Test should use colors in the output.
+GTEST_API_ bool ShouldUseColor(bool stdout_is_tty);
+
+// Formats the given time in milliseconds as seconds.
+GTEST_API_ std::string FormatTimeInMillisAsSeconds(TimeInMillis ms);
+
+// Converts the given time in milliseconds to a date string in the ISO 8601
+// format, without the timezone information. N.B.: due to the use the
+// non-reentrant localtime() function, this function is not thread safe. Do
+// not use it in any code that can be called from multiple threads.
+GTEST_API_ std::string FormatEpochTimeInMillisAsIso8601(TimeInMillis ms);
+
+// Parses a string for an Int32 flag, in the form of "--flag=value".
+//
+// On success, stores the value of the flag in *value, and returns
+// true. On failure, returns false without changing *value.
+GTEST_API_ bool ParseFlag(const char* str, const char* flag, int32_t* value);
+
+// Returns a random seed in range [1, kMaxRandomSeed] based on the
+// given --gtest_random_seed flag value.
+inline int GetRandomSeedFromFlag(int32_t random_seed_flag) {
+ const unsigned int raw_seed =
+ (random_seed_flag == 0) ? static_cast<unsigned int>(GetTimeInMillis())
+ : static_cast<unsigned int>(random_seed_flag);
+
+ // Normalizes the actual seed to range [1, kMaxRandomSeed] such that
+ // it's easy to type.
+ const int normalized_seed =
+ static_cast<int>((raw_seed - 1U) %
+ static_cast<unsigned int>(kMaxRandomSeed)) +
+ 1;
+ return normalized_seed;
+}
+
+// Returns the first valid random seed after 'seed'. The behavior is
+// undefined if 'seed' is invalid. The seed after kMaxRandomSeed is
+// considered to be 1.
+inline int GetNextRandomSeed(int seed) {
+ GTEST_CHECK_(1 <= seed && seed <= kMaxRandomSeed)
+ << "Invalid random seed " << seed << " - must be in [1, "
+ << kMaxRandomSeed << "].";
+ const int next_seed = seed + 1;
+ return (next_seed > kMaxRandomSeed) ? 1 : next_seed;
+}
+
+// This class saves the values of all Google Test flags in its c'tor, and
+// restores them in its d'tor.
+class GTestFlagSaver {
+ public:
+ // The c'tor.
+ GTestFlagSaver() {
+ also_run_disabled_tests_ = GTEST_FLAG_GET(also_run_disabled_tests);
+ break_on_failure_ = GTEST_FLAG_GET(break_on_failure);
+ catch_exceptions_ = GTEST_FLAG_GET(catch_exceptions);
+ color_ = GTEST_FLAG_GET(color);
+ death_test_style_ = GTEST_FLAG_GET(death_test_style);
+ death_test_use_fork_ = GTEST_FLAG_GET(death_test_use_fork);
+ fail_fast_ = GTEST_FLAG_GET(fail_fast);
+ filter_ = GTEST_FLAG_GET(filter);
+ internal_run_death_test_ = GTEST_FLAG_GET(internal_run_death_test);
+ list_tests_ = GTEST_FLAG_GET(list_tests);
+ output_ = GTEST_FLAG_GET(output);
+ brief_ = GTEST_FLAG_GET(brief);
+ print_time_ = GTEST_FLAG_GET(print_time);
+ print_utf8_ = GTEST_FLAG_GET(print_utf8);
+ random_seed_ = GTEST_FLAG_GET(random_seed);
+ repeat_ = GTEST_FLAG_GET(repeat);
+ recreate_environments_when_repeating_ =
+ GTEST_FLAG_GET(recreate_environments_when_repeating);
+ shuffle_ = GTEST_FLAG_GET(shuffle);
+ stack_trace_depth_ = GTEST_FLAG_GET(stack_trace_depth);
+ stream_result_to_ = GTEST_FLAG_GET(stream_result_to);
+ throw_on_failure_ = GTEST_FLAG_GET(throw_on_failure);
+ }
+
+ // The d'tor is not virtual. DO NOT INHERIT FROM THIS CLASS.
+ ~GTestFlagSaver() {
+ GTEST_FLAG_SET(also_run_disabled_tests, also_run_disabled_tests_);
+ GTEST_FLAG_SET(break_on_failure, break_on_failure_);
+ GTEST_FLAG_SET(catch_exceptions, catch_exceptions_);
+ GTEST_FLAG_SET(color, color_);
+ GTEST_FLAG_SET(death_test_style, death_test_style_);
+ GTEST_FLAG_SET(death_test_use_fork, death_test_use_fork_);
+ GTEST_FLAG_SET(filter, filter_);
+ GTEST_FLAG_SET(fail_fast, fail_fast_);
+ GTEST_FLAG_SET(internal_run_death_test, internal_run_death_test_);
+ GTEST_FLAG_SET(list_tests, list_tests_);
+ GTEST_FLAG_SET(output, output_);
+ GTEST_FLAG_SET(brief, brief_);
+ GTEST_FLAG_SET(print_time, print_time_);
+ GTEST_FLAG_SET(print_utf8, print_utf8_);
+ GTEST_FLAG_SET(random_seed, random_seed_);
+ GTEST_FLAG_SET(repeat, repeat_);
+ GTEST_FLAG_SET(recreate_environments_when_repeating,
+ recreate_environments_when_repeating_);
+ GTEST_FLAG_SET(shuffle, shuffle_);
+ GTEST_FLAG_SET(stack_trace_depth, stack_trace_depth_);
+ GTEST_FLAG_SET(stream_result_to, stream_result_to_);
+ GTEST_FLAG_SET(throw_on_failure, throw_on_failure_);
+ }
+
+ private:
+ // Fields for saving the original values of flags.
+ bool also_run_disabled_tests_;
+ bool break_on_failure_;
+ bool catch_exceptions_;
+ std::string color_;
+ std::string death_test_style_;
+ bool death_test_use_fork_;
+ bool fail_fast_;
+ std::string filter_;
+ std::string internal_run_death_test_;
+ bool list_tests_;
+ std::string output_;
+ bool brief_;
+ bool print_time_;
+ bool print_utf8_;
+ int32_t random_seed_;
+ int32_t repeat_;
+ bool recreate_environments_when_repeating_;
+ bool shuffle_;
+ int32_t stack_trace_depth_;
+ std::string stream_result_to_;
+ bool throw_on_failure_;
+} GTEST_ATTRIBUTE_UNUSED_;
+
+// Converts a Unicode code point to a narrow string in UTF-8 encoding.
+// code_point parameter is of type UInt32 because wchar_t may not be
+// wide enough to contain a code point.
+// If the code_point is not a valid Unicode code point
+// (i.e. outside of Unicode range U+0 to U+10FFFF) it will be converted
+// to "(Invalid Unicode 0xXXXXXXXX)".
+GTEST_API_ std::string CodePointToUtf8(uint32_t code_point);
+
+// Converts a wide string to a narrow string in UTF-8 encoding.
+// The wide string is assumed to have the following encoding:
+// UTF-16 if sizeof(wchar_t) == 2 (on Windows, Cygwin)
+// UTF-32 if sizeof(wchar_t) == 4 (on Linux)
+// Parameter str points to a null-terminated wide string.
+// Parameter num_chars may additionally limit the number
+// of wchar_t characters processed. -1 is used when the entire string
+// should be processed.
+// If the string contains code points that are not valid Unicode code points
+// (i.e. outside of Unicode range U+0 to U+10FFFF) they will be output
+// as '(Invalid Unicode 0xXXXXXXXX)'. If the string is in UTF16 encoding
+// and contains invalid UTF-16 surrogate pairs, values in those pairs
+// will be encoded as individual Unicode characters from Basic Normal Plane.
+GTEST_API_ std::string WideStringToUtf8(const wchar_t* str, int num_chars);
+
+// Reads the GTEST_SHARD_STATUS_FILE environment variable, and creates the file
+// if the variable is present. If a file already exists at this location, this
+// function will write over it. If the variable is present, but the file cannot
+// be created, prints an error and exits.
+void WriteToShardStatusFileIfNeeded();
+
+// Checks whether sharding is enabled by examining the relevant
+// environment variable values. If the variables are present,
+// but inconsistent (e.g., shard_index >= total_shards), prints
+// an error and exits. If in_subprocess_for_death_test, sharding is
+// disabled because it must only be applied to the original test
+// process. Otherwise, we could filter out death tests we intended to execute.
+GTEST_API_ bool ShouldShard(const char* total_shards_str,
+ const char* shard_index_str,
+ bool in_subprocess_for_death_test);
+
+// Parses the environment variable var as a 32-bit integer. If it is unset,
+// returns default_val. If it is not a 32-bit integer, prints an error and
+// and aborts.
+GTEST_API_ int32_t Int32FromEnvOrDie(const char* env_var, int32_t default_val);
+
+// Given the total number of shards, the shard index, and the test id,
+// returns true if and only if the test should be run on this shard. The test id
+// is some arbitrary but unique non-negative integer assigned to each test
+// method. Assumes that 0 <= shard_index < total_shards.
+GTEST_API_ bool ShouldRunTestOnShard(int total_shards, int shard_index,
+ int test_id);
+
+// STL container utilities.
+
+// Returns the number of elements in the given container that satisfy
+// the given predicate.
+template <class Container, typename Predicate>
+inline int CountIf(const Container& c, Predicate predicate) {
+ // Implemented as an explicit loop since std::count_if() in libCstd on
+ // Solaris has a non-standard signature.
+ int count = 0;
+ for (auto it = c.begin(); it != c.end(); ++it) {
+ if (predicate(*it)) ++count;
+ }
+ return count;
+}
+
+// Applies a function/functor to each element in the container.
+template <class Container, typename Functor>
+void ForEach(const Container& c, Functor functor) {
+ std::for_each(c.begin(), c.end(), functor);
+}
+
+// Returns the i-th element of the vector, or default_value if i is not
+// in range [0, v.size()).
+template <typename E>
+inline E GetElementOr(const std::vector<E>& v, int i, E default_value) {
+ return (i < 0 || i >= static_cast<int>(v.size())) ? default_value
+ : v[static_cast<size_t>(i)];
+}
+
+// Performs an in-place shuffle of a range of the vector's elements.
+// 'begin' and 'end' are element indices as an STL-style range;
+// i.e. [begin, end) are shuffled, where 'end' == size() means to
+// shuffle to the end of the vector.
+template <typename E>
+void ShuffleRange(internal::Random* random, int begin, int end,
+ std::vector<E>* v) {
+ const int size = static_cast<int>(v->size());
+ GTEST_CHECK_(0 <= begin && begin <= size)
+ << "Invalid shuffle range start " << begin << ": must be in range [0, "
+ << size << "].";
+ GTEST_CHECK_(begin <= end && end <= size)
+ << "Invalid shuffle range finish " << end << ": must be in range ["
+ << begin << ", " << size << "].";
+
+ // Fisher-Yates shuffle, from
+ // http://en.wikipedia.org/wiki/Fisher-Yates_shuffle
+ for (int range_width = end - begin; range_width >= 2; range_width--) {
+ const int last_in_range = begin + range_width - 1;
+ const int selected =
+ begin +
+ static_cast<int>(random->Generate(static_cast<uint32_t>(range_width)));
+ std::swap((*v)[static_cast<size_t>(selected)],
+ (*v)[static_cast<size_t>(last_in_range)]);
+ }
+}
+
+// Performs an in-place shuffle of the vector's elements.
+template <typename E>
+inline void Shuffle(internal::Random* random, std::vector<E>* v) {
+ ShuffleRange(random, 0, static_cast<int>(v->size()), v);
+}
+
+// A function for deleting an object. Handy for being used as a
+// functor.
+template <typename T>
+static void Delete(T* x) {
+ delete x;
+}
+
+// A predicate that checks the key of a TestProperty against a known key.
+//
+// TestPropertyKeyIs is copyable.
+class TestPropertyKeyIs {
+ public:
+ // Constructor.
+ //
+ // TestPropertyKeyIs has NO default constructor.
+ explicit TestPropertyKeyIs(const std::string& key) : key_(key) {}
+
+ // Returns true if and only if the test name of test property matches on key_.
+ bool operator()(const TestProperty& test_property) const {
+ return test_property.key() == key_;
+ }
+
+ private:
+ std::string key_;
+};
+
+// Class UnitTestOptions.
+//
+// This class contains functions for processing options the user
+// specifies when running the tests. It has only static members.
+//
+// In most cases, the user can specify an option using either an
+// environment variable or a command line flag. E.g. you can set the
+// test filter using either GTEST_FILTER or --gtest_filter. If both
+// the variable and the flag are present, the latter overrides the
+// former.
+class GTEST_API_ UnitTestOptions {
+ public:
+ // Functions for processing the gtest_output flag.
+
+ // Returns the output format, or "" for normal printed output.
+ static std::string GetOutputFormat();
+
+ // Returns the absolute path of the requested output file, or the
+ // default (test_detail.xml in the original working directory) if
+ // none was explicitly specified.
+ static std::string GetAbsolutePathToOutputFile();
+
+ // Functions for processing the gtest_filter flag.
+
+ // Returns true if and only if the user-specified filter matches the test
+ // suite name and the test name.
+ static bool FilterMatchesTest(const std::string& test_suite_name,
+ const std::string& test_name);
+
+#if GTEST_OS_WINDOWS
+ // Function for supporting the gtest_catch_exception flag.
+
+ // Returns EXCEPTION_EXECUTE_HANDLER if Google Test should handle the
+ // given SEH exception, or EXCEPTION_CONTINUE_SEARCH otherwise.
+ // This function is useful as an __except condition.
+ static int GTestShouldProcessSEH(DWORD exception_code);
+#endif // GTEST_OS_WINDOWS
+
+ // Returns true if "name" matches the ':' separated list of glob-style
+ // filters in "filter".
+ static bool MatchesFilter(const std::string& name, const char* filter);
+};
+
+// Returns the current application's name, removing directory path if that
+// is present. Used by UnitTestOptions::GetOutputFile.
+GTEST_API_ FilePath GetCurrentExecutableName();
+
+// The role interface for getting the OS stack trace as a string.
+class OsStackTraceGetterInterface {
+ public:
+ OsStackTraceGetterInterface() {}
+ virtual ~OsStackTraceGetterInterface() {}
+
+ // Returns the current OS stack trace as an std::string. Parameters:
+ //
+ // max_depth - the maximum number of stack frames to be included
+ // in the trace.
+ // skip_count - the number of top frames to be skipped; doesn't count
+ // against max_depth.
+ virtual std::string CurrentStackTrace(int max_depth, int skip_count) = 0;
+
+ // UponLeavingGTest() should be called immediately before Google Test calls
+ // user code. It saves some information about the current stack that
+ // CurrentStackTrace() will use to find and hide Google Test stack frames.
+ virtual void UponLeavingGTest() = 0;
+
+ // This string is inserted in place of stack frames that are part of
+ // Google Test's implementation.
+ static const char* const kElidedFramesMarker;
+
+ private:
+ OsStackTraceGetterInterface(const OsStackTraceGetterInterface&) = delete;
+ OsStackTraceGetterInterface& operator=(const OsStackTraceGetterInterface&) =
+ delete;
+};
+
+// A working implementation of the OsStackTraceGetterInterface interface.
+class OsStackTraceGetter : public OsStackTraceGetterInterface {
+ public:
+ OsStackTraceGetter() {}
+
+ std::string CurrentStackTrace(int max_depth, int skip_count) override;
+ void UponLeavingGTest() override;
+
+ private:
+#if GTEST_HAS_ABSL
+ Mutex mutex_; // Protects all internal state.
+
+ // We save the stack frame below the frame that calls user code.
+ // We do this because the address of the frame immediately below
+ // the user code changes between the call to UponLeavingGTest()
+ // and any calls to the stack trace code from within the user code.
+ void* caller_frame_ = nullptr;
+#endif // GTEST_HAS_ABSL
+
+ OsStackTraceGetter(const OsStackTraceGetter&) = delete;
+ OsStackTraceGetter& operator=(const OsStackTraceGetter&) = delete;
+};
+
+// Information about a Google Test trace point.
+struct TraceInfo {
+ const char* file;
+ int line;
+ std::string message;
+};
+
+// This is the default global test part result reporter used in UnitTestImpl.
+// This class should only be used by UnitTestImpl.
+class DefaultGlobalTestPartResultReporter
+ : public TestPartResultReporterInterface {
+ public:
+ explicit DefaultGlobalTestPartResultReporter(UnitTestImpl* unit_test);
+ // Implements the TestPartResultReporterInterface. Reports the test part
+ // result in the current test.
+ void ReportTestPartResult(const TestPartResult& result) override;
+
+ private:
+ UnitTestImpl* const unit_test_;
+
+ DefaultGlobalTestPartResultReporter(
+ const DefaultGlobalTestPartResultReporter&) = delete;
+ DefaultGlobalTestPartResultReporter& operator=(
+ const DefaultGlobalTestPartResultReporter&) = delete;
+};
+
+// This is the default per thread test part result reporter used in
+// UnitTestImpl. This class should only be used by UnitTestImpl.
+class DefaultPerThreadTestPartResultReporter
+ : public TestPartResultReporterInterface {
+ public:
+ explicit DefaultPerThreadTestPartResultReporter(UnitTestImpl* unit_test);
+ // Implements the TestPartResultReporterInterface. The implementation just
+ // delegates to the current global test part result reporter of *unit_test_.
+ void ReportTestPartResult(const TestPartResult& result) override;
+
+ private:
+ UnitTestImpl* const unit_test_;
+
+ DefaultPerThreadTestPartResultReporter(
+ const DefaultPerThreadTestPartResultReporter&) = delete;
+ DefaultPerThreadTestPartResultReporter& operator=(
+ const DefaultPerThreadTestPartResultReporter&) = delete;
+};
+
+// The private implementation of the UnitTest class. We don't protect
+// the methods under a mutex, as this class is not accessible by a
+// user and the UnitTest class that delegates work to this class does
+// proper locking.
+class GTEST_API_ UnitTestImpl {
+ public:
+ explicit UnitTestImpl(UnitTest* parent);
+ virtual ~UnitTestImpl();
+
+ // There are two different ways to register your own TestPartResultReporter.
+ // You can register your own repoter to listen either only for test results
+ // from the current thread or for results from all threads.
+ // By default, each per-thread test result repoter just passes a new
+ // TestPartResult to the global test result reporter, which registers the
+ // test part result for the currently running test.
+
+ // Returns the global test part result reporter.
+ TestPartResultReporterInterface* GetGlobalTestPartResultReporter();
+
+ // Sets the global test part result reporter.
+ void SetGlobalTestPartResultReporter(
+ TestPartResultReporterInterface* reporter);
+
+ // Returns the test part result reporter for the current thread.
+ TestPartResultReporterInterface* GetTestPartResultReporterForCurrentThread();
+
+ // Sets the test part result reporter for the current thread.
+ void SetTestPartResultReporterForCurrentThread(
+ TestPartResultReporterInterface* reporter);
+
+ // Gets the number of successful test suites.
+ int successful_test_suite_count() const;
+
+ // Gets the number of failed test suites.
+ int failed_test_suite_count() const;
+
+ // Gets the number of all test suites.
+ int total_test_suite_count() const;
+
+ // Gets the number of all test suites that contain at least one test
+ // that should run.
+ int test_suite_to_run_count() const;
+
+ // Gets the number of successful tests.
+ int successful_test_count() const;
+
+ // Gets the number of skipped tests.
+ int skipped_test_count() const;
+
+ // Gets the number of failed tests.
+ int failed_test_count() const;
+
+ // Gets the number of disabled tests that will be reported in the XML report.
+ int reportable_disabled_test_count() const;
+
+ // Gets the number of disabled tests.
+ int disabled_test_count() const;
+
+ // Gets the number of tests to be printed in the XML report.
+ int reportable_test_count() const;
+
+ // Gets the number of all tests.
+ int total_test_count() const;
+
+ // Gets the number of tests that should run.
+ int test_to_run_count() const;
+
+ // Gets the time of the test program start, in ms from the start of the
+ // UNIX epoch.
+ TimeInMillis start_timestamp() const { return start_timestamp_; }
+
+ // Gets the elapsed time, in milliseconds.
+ TimeInMillis elapsed_time() const { return elapsed_time_; }
+
+ // Returns true if and only if the unit test passed (i.e. all test suites
+ // passed).
+ bool Passed() const { return !Failed(); }
+
+ // Returns true if and only if the unit test failed (i.e. some test suite
+ // failed or something outside of all tests failed).
+ bool Failed() const {
+ return failed_test_suite_count() > 0 || ad_hoc_test_result()->Failed();
+ }
+
+ // Gets the i-th test suite among all the test suites. i can range from 0 to
+ // total_test_suite_count() - 1. If i is not in that range, returns NULL.
+ const TestSuite* GetTestSuite(int i) const {
+ const int index = GetElementOr(test_suite_indices_, i, -1);
+ return index < 0 ? nullptr : test_suites_[static_cast<size_t>(i)];
+ }
+
+ // Legacy API is deprecated but still available
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+ const TestCase* GetTestCase(int i) const { return GetTestSuite(i); }
+#endif // GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+
+ // Gets the i-th test suite among all the test suites. i can range from 0 to
+ // total_test_suite_count() - 1. If i is not in that range, returns NULL.
+ TestSuite* GetMutableSuiteCase(int i) {
+ const int index = GetElementOr(test_suite_indices_, i, -1);
+ return index < 0 ? nullptr : test_suites_[static_cast<size_t>(index)];
+ }
+
+ // Provides access to the event listener list.
+ TestEventListeners* listeners() { return &listeners_; }
+
+ // Returns the TestResult for the test that's currently running, or
+ // the TestResult for the ad hoc test if no test is running.
+ TestResult* current_test_result();
+
+ // Returns the TestResult for the ad hoc test.
+ const TestResult* ad_hoc_test_result() const { return &ad_hoc_test_result_; }
+
+ // Sets the OS stack trace getter.
+ //
+ // Does nothing if the input and the current OS stack trace getter
+ // are the same; otherwise, deletes the old getter and makes the
+ // input the current getter.
+ void set_os_stack_trace_getter(OsStackTraceGetterInterface* getter);
+
+ // Returns the current OS stack trace getter if it is not NULL;
+ // otherwise, creates an OsStackTraceGetter, makes it the current
+ // getter, and returns it.
+ OsStackTraceGetterInterface* os_stack_trace_getter();
+
+ // Returns the current OS stack trace as an std::string.
+ //
+ // The maximum number of stack frames to be included is specified by
+ // the gtest_stack_trace_depth flag. The skip_count parameter
+ // specifies the number of top frames to be skipped, which doesn't
+ // count against the number of frames to be included.
+ //
+ // For example, if Foo() calls Bar(), which in turn calls
+ // CurrentOsStackTraceExceptTop(1), Foo() will be included in the
+ // trace but Bar() and CurrentOsStackTraceExceptTop() won't.
+ std::string CurrentOsStackTraceExceptTop(int skip_count)
+ GTEST_NO_INLINE_ GTEST_NO_TAIL_CALL_;
+
+ // Finds and returns a TestSuite with the given name. If one doesn't
+ // exist, creates one and returns it.
+ //
+ // Arguments:
+ //
+ // test_suite_name: name of the test suite
+ // type_param: the name of the test's type parameter, or NULL if
+ // this is not a typed or a type-parameterized test.
+ // set_up_tc: pointer to the function that sets up the test suite
+ // tear_down_tc: pointer to the function that tears down the test suite
+ TestSuite* GetTestSuite(const char* test_suite_name, const char* type_param,
+ internal::SetUpTestSuiteFunc set_up_tc,
+ internal::TearDownTestSuiteFunc tear_down_tc);
+
+// Legacy API is deprecated but still available
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+ TestCase* GetTestCase(const char* test_case_name, const char* type_param,
+ internal::SetUpTestSuiteFunc set_up_tc,
+ internal::TearDownTestSuiteFunc tear_down_tc) {
+ return GetTestSuite(test_case_name, type_param, set_up_tc, tear_down_tc);
+ }
+#endif // GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+
+ // Adds a TestInfo to the unit test.
+ //
+ // Arguments:
+ //
+ // set_up_tc: pointer to the function that sets up the test suite
+ // tear_down_tc: pointer to the function that tears down the test suite
+ // test_info: the TestInfo object
+ void AddTestInfo(internal::SetUpTestSuiteFunc set_up_tc,
+ internal::TearDownTestSuiteFunc tear_down_tc,
+ TestInfo* test_info) {
+#if GTEST_HAS_DEATH_TEST
+ // In order to support thread-safe death tests, we need to
+ // remember the original working directory when the test program
+ // was first invoked. We cannot do this in RUN_ALL_TESTS(), as
+ // the user may have changed the current directory before calling
+ // RUN_ALL_TESTS(). Therefore we capture the current directory in
+ // AddTestInfo(), which is called to register a TEST or TEST_F
+ // before main() is reached.
+ if (original_working_dir_.IsEmpty()) {
+ original_working_dir_.Set(FilePath::GetCurrentDir());
+ GTEST_CHECK_(!original_working_dir_.IsEmpty())
+ << "Failed to get the current working directory.";
+ }
+#endif // GTEST_HAS_DEATH_TEST
+
+ GetTestSuite(test_info->test_suite_name(), test_info->type_param(),
+ set_up_tc, tear_down_tc)
+ ->AddTestInfo(test_info);
+ }
+
+ // Returns ParameterizedTestSuiteRegistry object used to keep track of
+ // value-parameterized tests and instantiate and register them.
+ internal::ParameterizedTestSuiteRegistry& parameterized_test_registry() {
+ return parameterized_test_registry_;
+ }
+
+ std::set<std::string>* ignored_parameterized_test_suites() {
+ return &ignored_parameterized_test_suites_;
+ }
+
+ // Returns TypeParameterizedTestSuiteRegistry object used to keep track of
+ // type-parameterized tests and instantiations of them.
+ internal::TypeParameterizedTestSuiteRegistry&
+ type_parameterized_test_registry() {
+ return type_parameterized_test_registry_;
+ }
+
+ // Sets the TestSuite object for the test that's currently running.
+ void set_current_test_suite(TestSuite* a_current_test_suite) {
+ current_test_suite_ = a_current_test_suite;
+ }
+
+ // Sets the TestInfo object for the test that's currently running. If
+ // current_test_info is NULL, the assertion results will be stored in
+ // ad_hoc_test_result_.
+ void set_current_test_info(TestInfo* a_current_test_info) {
+ current_test_info_ = a_current_test_info;
+ }
+
+ // Registers all parameterized tests defined using TEST_P and
+ // INSTANTIATE_TEST_SUITE_P, creating regular tests for each test/parameter
+ // combination. This method can be called more then once; it has guards
+ // protecting from registering the tests more then once. If
+ // value-parameterized tests are disabled, RegisterParameterizedTests is
+ // present but does nothing.
+ void RegisterParameterizedTests();
+
+ // Runs all tests in this UnitTest object, prints the result, and
+ // returns true if all tests are successful. If any exception is
+ // thrown during a test, this test is considered to be failed, but
+ // the rest of the tests will still be run.
+ bool RunAllTests();
+
+ // Clears the results of all tests, except the ad hoc tests.
+ void ClearNonAdHocTestResult() {
+ ForEach(test_suites_, TestSuite::ClearTestSuiteResult);
+ }
+
+ // Clears the results of ad-hoc test assertions.
+ void ClearAdHocTestResult() { ad_hoc_test_result_.Clear(); }
+
+ // Adds a TestProperty to the current TestResult object when invoked in a
+ // context of a test or a test suite, or to the global property set. If the
+ // result already contains a property with the same key, the value will be
+ // updated.
+ void RecordProperty(const TestProperty& test_property);
+
+ enum ReactionToSharding { HONOR_SHARDING_PROTOCOL, IGNORE_SHARDING_PROTOCOL };
+
+ // Matches the full name of each test against the user-specified
+ // filter to decide whether the test should run, then records the
+ // result in each TestSuite and TestInfo object.
+ // If shard_tests == HONOR_SHARDING_PROTOCOL, further filters tests
+ // based on sharding variables in the environment.
+ // Returns the number of tests that should run.
+ int FilterTests(ReactionToSharding shard_tests);
+
+ // Prints the names of the tests matching the user-specified filter flag.
+ void ListTestsMatchingFilter();
+
+ const TestSuite* current_test_suite() const { return current_test_suite_; }
+ TestInfo* current_test_info() { return current_test_info_; }
+ const TestInfo* current_test_info() const { return current_test_info_; }
+
+ // Returns the vector of environments that need to be set-up/torn-down
+ // before/after the tests are run.
+ std::vector<Environment*>& environments() { return environments_; }
+
+ // Getters for the per-thread Google Test trace stack.
+ std::vector<TraceInfo>& gtest_trace_stack() {
+ return *(gtest_trace_stack_.pointer());
+ }
+ const std::vector<TraceInfo>& gtest_trace_stack() const {
+ return gtest_trace_stack_.get();
+ }
+
+#if GTEST_HAS_DEATH_TEST
+ void InitDeathTestSubprocessControlInfo() {
+ internal_run_death_test_flag_.reset(ParseInternalRunDeathTestFlag());
+ }
+ // Returns a pointer to the parsed --gtest_internal_run_death_test
+ // flag, or NULL if that flag was not specified.
+ // This information is useful only in a death test child process.
+ // Must not be called before a call to InitGoogleTest.
+ const InternalRunDeathTestFlag* internal_run_death_test_flag() const {
+ return internal_run_death_test_flag_.get();
+ }
+
+ // Returns a pointer to the current death test factory.
+ internal::DeathTestFactory* death_test_factory() {
+ return death_test_factory_.get();
+ }
+
+ void SuppressTestEventsIfInSubprocess();
+
+ friend class ReplaceDeathTestFactory;
+#endif // GTEST_HAS_DEATH_TEST
+
+ // Initializes the event listener performing XML output as specified by
+ // UnitTestOptions. Must not be called before InitGoogleTest.
+ void ConfigureXmlOutput();
+
+#if GTEST_CAN_STREAM_RESULTS_
+ // Initializes the event listener for streaming test results to a socket.
+ // Must not be called before InitGoogleTest.
+ void ConfigureStreamingOutput();
+#endif
+
+ // Performs initialization dependent upon flag values obtained in
+ // ParseGoogleTestFlagsOnly. Is called from InitGoogleTest after the call to
+ // ParseGoogleTestFlagsOnly. In case a user neglects to call InitGoogleTest
+ // this function is also called from RunAllTests. Since this function can be
+ // called more than once, it has to be idempotent.
+ void PostFlagParsingInit();
+
+ // Gets the random seed used at the start of the current test iteration.
+ int random_seed() const { return random_seed_; }
+
+ // Gets the random number generator.
+ internal::Random* random() { return &random_; }
+
+ // Shuffles all test suites, and the tests within each test suite,
+ // making sure that death tests are still run first.
+ void ShuffleTests();
+
+ // Restores the test suites and tests to their order before the first shuffle.
+ void UnshuffleTests();
+
+ // Returns the value of GTEST_FLAG(catch_exceptions) at the moment
+ // UnitTest::Run() starts.
+ bool catch_exceptions() const { return catch_exceptions_; }
+
+ private:
+ friend class ::testing::UnitTest;
+
+ // Used by UnitTest::Run() to capture the state of
+ // GTEST_FLAG(catch_exceptions) at the moment it starts.
+ void set_catch_exceptions(bool value) { catch_exceptions_ = value; }
+
+ // The UnitTest object that owns this implementation object.
+ UnitTest* const parent_;
+
+ // The working directory when the first TEST() or TEST_F() was
+ // executed.
+ internal::FilePath original_working_dir_;
+
+ // The default test part result reporters.
+ DefaultGlobalTestPartResultReporter default_global_test_part_result_reporter_;
+ DefaultPerThreadTestPartResultReporter
+ default_per_thread_test_part_result_reporter_;
+
+ // Points to (but doesn't own) the global test part result reporter.
+ TestPartResultReporterInterface* global_test_part_result_repoter_;
+
+ // Protects read and write access to global_test_part_result_reporter_.
+ internal::Mutex global_test_part_result_reporter_mutex_;
+
+ // Points to (but doesn't own) the per-thread test part result reporter.
+ internal::ThreadLocal<TestPartResultReporterInterface*>
+ per_thread_test_part_result_reporter_;
+
+ // The vector of environments that need to be set-up/torn-down
+ // before/after the tests are run.
+ std::vector<Environment*> environments_;
+
+ // The vector of TestSuites in their original order. It owns the
+ // elements in the vector.
+ std::vector<TestSuite*> test_suites_;
+
+ // Provides a level of indirection for the test suite list to allow
+ // easy shuffling and restoring the test suite order. The i-th
+ // element of this vector is the index of the i-th test suite in the
+ // shuffled order.
+ std::vector<int> test_suite_indices_;
+
+ // ParameterizedTestRegistry object used to register value-parameterized
+ // tests.
+ internal::ParameterizedTestSuiteRegistry parameterized_test_registry_;
+ internal::TypeParameterizedTestSuiteRegistry
+ type_parameterized_test_registry_;
+
+ // The set holding the name of parameterized
+ // test suites that may go uninstantiated.
+ std::set<std::string> ignored_parameterized_test_suites_;
+
+ // Indicates whether RegisterParameterizedTests() has been called already.
+ bool parameterized_tests_registered_;
+
+ // Index of the last death test suite registered. Initially -1.
+ int last_death_test_suite_;
+
+ // This points to the TestSuite for the currently running test. It
+ // changes as Google Test goes through one test suite after another.
+ // When no test is running, this is set to NULL and Google Test
+ // stores assertion results in ad_hoc_test_result_. Initially NULL.
+ TestSuite* current_test_suite_;
+
+ // This points to the TestInfo for the currently running test. It
+ // changes as Google Test goes through one test after another. When
+ // no test is running, this is set to NULL and Google Test stores
+ // assertion results in ad_hoc_test_result_. Initially NULL.
+ TestInfo* current_test_info_;
+
+ // Normally, a user only writes assertions inside a TEST or TEST_F,
+ // or inside a function called by a TEST or TEST_F. Since Google
+ // Test keeps track of which test is current running, it can
+ // associate such an assertion with the test it belongs to.
+ //
+ // If an assertion is encountered when no TEST or TEST_F is running,
+ // Google Test attributes the assertion result to an imaginary "ad hoc"
+ // test, and records the result in ad_hoc_test_result_.
+ TestResult ad_hoc_test_result_;
+
+ // The list of event listeners that can be used to track events inside
+ // Google Test.
+ TestEventListeners listeners_;
+
+ // The OS stack trace getter. Will be deleted when the UnitTest
+ // object is destructed. By default, an OsStackTraceGetter is used,
+ // but the user can set this field to use a custom getter if that is
+ // desired.
+ OsStackTraceGetterInterface* os_stack_trace_getter_;
+
+ // True if and only if PostFlagParsingInit() has been called.
+ bool post_flag_parse_init_performed_;
+
+ // The random number seed used at the beginning of the test run.
+ int random_seed_;
+
+ // Our random number generator.
+ internal::Random random_;
+
+ // The time of the test program start, in ms from the start of the
+ // UNIX epoch.
+ TimeInMillis start_timestamp_;
+
+ // How long the test took to run, in milliseconds.
+ TimeInMillis elapsed_time_;
+
+#if GTEST_HAS_DEATH_TEST
+ // The decomposed components of the gtest_internal_run_death_test flag,
+ // parsed when RUN_ALL_TESTS is called.
+ std::unique_ptr<InternalRunDeathTestFlag> internal_run_death_test_flag_;
+ std::unique_ptr<internal::DeathTestFactory> death_test_factory_;
+#endif // GTEST_HAS_DEATH_TEST
+
+ // A per-thread stack of traces created by the SCOPED_TRACE() macro.
+ internal::ThreadLocal<std::vector<TraceInfo> > gtest_trace_stack_;
+
+ // The value of GTEST_FLAG(catch_exceptions) at the moment RunAllTests()
+ // starts.
+ bool catch_exceptions_;
+
+ UnitTestImpl(const UnitTestImpl&) = delete;
+ UnitTestImpl& operator=(const UnitTestImpl&) = delete;
+}; // class UnitTestImpl
+
+// Convenience function for accessing the global UnitTest
+// implementation object.
+inline UnitTestImpl* GetUnitTestImpl() {
+ return UnitTest::GetInstance()->impl();
+}
+
+#if GTEST_USES_SIMPLE_RE
+
+// Internal helper functions for implementing the simple regular
+// expression matcher.
+GTEST_API_ bool IsInSet(char ch, const char* str);
+GTEST_API_ bool IsAsciiDigit(char ch);
+GTEST_API_ bool IsAsciiPunct(char ch);
+GTEST_API_ bool IsRepeat(char ch);
+GTEST_API_ bool IsAsciiWhiteSpace(char ch);
+GTEST_API_ bool IsAsciiWordChar(char ch);
+GTEST_API_ bool IsValidEscape(char ch);
+GTEST_API_ bool AtomMatchesChar(bool escaped, char pattern, char ch);
+GTEST_API_ bool ValidateRegex(const char* regex);
+GTEST_API_ bool MatchRegexAtHead(const char* regex, const char* str);
+GTEST_API_ bool MatchRepetitionAndRegexAtHead(bool escaped, char ch,
+ char repeat, const char* regex,
+ const char* str);
+GTEST_API_ bool MatchRegexAnywhere(const char* regex, const char* str);
+
+#endif // GTEST_USES_SIMPLE_RE
+
+// Parses the command line for Google Test flags, without initializing
+// other parts of Google Test.
+GTEST_API_ void ParseGoogleTestFlagsOnly(int* argc, char** argv);
+GTEST_API_ void ParseGoogleTestFlagsOnly(int* argc, wchar_t** argv);
+
+#if GTEST_HAS_DEATH_TEST
+
+// Returns the message describing the last system error, regardless of the
+// platform.
+GTEST_API_ std::string GetLastErrnoDescription();
+
+// Attempts to parse a string into a positive integer pointed to by the
+// number parameter. Returns true if that is possible.
+// GTEST_HAS_DEATH_TEST implies that we have ::std::string, so we can use
+// it here.
+template <typename Integer>
+bool ParseNaturalNumber(const ::std::string& str, Integer* number) {
+ // Fail fast if the given string does not begin with a digit;
+ // this bypasses strtoXXX's "optional leading whitespace and plus
+ // or minus sign" semantics, which are undesirable here.
+ if (str.empty() || !IsDigit(str[0])) {
+ return false;
+ }
+ errno = 0;
+
+ char* end;
+ // BiggestConvertible is the largest integer type that system-provided
+ // string-to-number conversion routines can return.
+ using BiggestConvertible = unsigned long long; // NOLINT
+
+ const BiggestConvertible parsed = strtoull(str.c_str(), &end, 10); // NOLINT
+ const bool parse_success = *end == '\0' && errno == 0;
+
+ GTEST_CHECK_(sizeof(Integer) <= sizeof(parsed));
+
+ const Integer result = static_cast<Integer>(parsed);
+ if (parse_success && static_cast<BiggestConvertible>(result) == parsed) {
+ *number = result;
+ return true;
+ }
+ return false;
+}
+#endif // GTEST_HAS_DEATH_TEST
+
+// TestResult contains some private methods that should be hidden from
+// Google Test user but are required for testing. This class allow our tests
+// to access them.
+//
+// This class is supplied only for the purpose of testing Google Test's own
+// constructs. Do not use it in user tests, either directly or indirectly.
+class TestResultAccessor {
+ public:
+ static void RecordProperty(TestResult* test_result,
+ const std::string& xml_element,
+ const TestProperty& property) {
+ test_result->RecordProperty(xml_element, property);
+ }
+
+ static void ClearTestPartResults(TestResult* test_result) {
+ test_result->ClearTestPartResults();
+ }
+
+ static const std::vector<testing::TestPartResult>& test_part_results(
+ const TestResult& test_result) {
+ return test_result.test_part_results();
+ }
+};
+
+#if GTEST_CAN_STREAM_RESULTS_
+
+// Streams test results to the given port on the given host machine.
+class StreamingListener : public EmptyTestEventListener {
+ public:
+ // Abstract base class for writing strings to a socket.
+ class AbstractSocketWriter {
+ public:
+ virtual ~AbstractSocketWriter() {}
+
+ // Sends a string to the socket.
+ virtual void Send(const std::string& message) = 0;
+
+ // Closes the socket.
+ virtual void CloseConnection() {}
+
+ // Sends a string and a newline to the socket.
+ void SendLn(const std::string& message) { Send(message + "\n"); }
+ };
+
+ // Concrete class for actually writing strings to a socket.
+ class SocketWriter : public AbstractSocketWriter {
+ public:
+ SocketWriter(const std::string& host, const std::string& port)
+ : sockfd_(-1), host_name_(host), port_num_(port) {
+ MakeConnection();
+ }
+
+ ~SocketWriter() override {
+ if (sockfd_ != -1) CloseConnection();
+ }
+
+ // Sends a string to the socket.
+ void Send(const std::string& message) override {
+ GTEST_CHECK_(sockfd_ != -1)
+ << "Send() can be called only when there is a connection.";
+
+ const auto len = static_cast<size_t>(message.length());
+ if (write(sockfd_, message.c_str(), len) != static_cast<ssize_t>(len)) {
+ GTEST_LOG_(WARNING) << "stream_result_to: failed to stream to "
+ << host_name_ << ":" << port_num_;
+ }
+ }
+
+ private:
+ // Creates a client socket and connects to the server.
+ void MakeConnection();
+
+ // Closes the socket.
+ void CloseConnection() override {
+ GTEST_CHECK_(sockfd_ != -1)
+ << "CloseConnection() can be called only when there is a connection.";
+
+ close(sockfd_);
+ sockfd_ = -1;
+ }
+
+ int sockfd_; // socket file descriptor
+ const std::string host_name_;
+ const std::string port_num_;
+
+ SocketWriter(const SocketWriter&) = delete;
+ SocketWriter& operator=(const SocketWriter&) = delete;
+ }; // class SocketWriter
+
+ // Escapes '=', '&', '%', and '\n' characters in str as "%xx".
+ static std::string UrlEncode(const char* str);
+
+ StreamingListener(const std::string& host, const std::string& port)
+ : socket_writer_(new SocketWriter(host, port)) {
+ Start();
+ }
+
+ explicit StreamingListener(AbstractSocketWriter* socket_writer)
+ : socket_writer_(socket_writer) {
+ Start();
+ }
+
+ void OnTestProgramStart(const UnitTest& /* unit_test */) override {
+ SendLn("event=TestProgramStart");
+ }
+
+ void OnTestProgramEnd(const UnitTest& unit_test) override {
+ // Note that Google Test current only report elapsed time for each
+ // test iteration, not for the entire test program.
+ SendLn("event=TestProgramEnd&passed=" + FormatBool(unit_test.Passed()));
+
+ // Notify the streaming server to stop.
+ socket_writer_->CloseConnection();
+ }
+
+ void OnTestIterationStart(const UnitTest& /* unit_test */,
+ int iteration) override {
+ SendLn("event=TestIterationStart&iteration=" +
+ StreamableToString(iteration));
+ }
+
+ void OnTestIterationEnd(const UnitTest& unit_test,
+ int /* iteration */) override {
+ SendLn("event=TestIterationEnd&passed=" + FormatBool(unit_test.Passed()) +
+ "&elapsed_time=" + StreamableToString(unit_test.elapsed_time()) +
+ "ms");
+ }
+
+ // Note that "event=TestCaseStart" is a wire format and has to remain
+ // "case" for compatibility
+ void OnTestSuiteStart(const TestSuite& test_suite) override {
+ SendLn(std::string("event=TestCaseStart&name=") + test_suite.name());
+ }
+
+ // Note that "event=TestCaseEnd" is a wire format and has to remain
+ // "case" for compatibility
+ void OnTestSuiteEnd(const TestSuite& test_suite) override {
+ SendLn("event=TestCaseEnd&passed=" + FormatBool(test_suite.Passed()) +
+ "&elapsed_time=" + StreamableToString(test_suite.elapsed_time()) +
+ "ms");
+ }
+
+ void OnTestStart(const TestInfo& test_info) override {
+ SendLn(std::string("event=TestStart&name=") + test_info.name());
+ }
+
+ void OnTestEnd(const TestInfo& test_info) override {
+ SendLn("event=TestEnd&passed=" +
+ FormatBool((test_info.result())->Passed()) + "&elapsed_time=" +
+ StreamableToString((test_info.result())->elapsed_time()) + "ms");
+ }
+
+ void OnTestPartResult(const TestPartResult& test_part_result) override {
+ const char* file_name = test_part_result.file_name();
+ if (file_name == nullptr) file_name = "";
+ SendLn("event=TestPartResult&file=" + UrlEncode(file_name) +
+ "&line=" + StreamableToString(test_part_result.line_number()) +
+ "&message=" + UrlEncode(test_part_result.message()));
+ }
+
+ private:
+ // Sends the given message and a newline to the socket.
+ void SendLn(const std::string& message) { socket_writer_->SendLn(message); }
+
+ // Called at the start of streaming to notify the receiver what
+ // protocol we are using.
+ void Start() { SendLn("gtest_streaming_protocol_version=1.0"); }
+
+ std::string FormatBool(bool value) { return value ? "1" : "0"; }
+
+ const std::unique_ptr<AbstractSocketWriter> socket_writer_;
+
+ StreamingListener(const StreamingListener&) = delete;
+ StreamingListener& operator=(const StreamingListener&) = delete;
+}; // class StreamingListener
+
+#endif // GTEST_CAN_STREAM_RESULTS_
+
+} // namespace internal
+} // namespace testing
+
+GTEST_DISABLE_MSC_WARNINGS_POP_() // 4251
+
+#endif // GOOGLETEST_SRC_GTEST_INTERNAL_INL_H_
diff --git a/media/libvpx/libvpx/third_party/googletest/src/src/gtest-matchers.cc b/media/libvpx/libvpx/third_party/googletest/src/src/gtest-matchers.cc
new file mode 100644
index 0000000000..7e3bcc0cff
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/googletest/src/src/gtest-matchers.cc
@@ -0,0 +1,98 @@
+// Copyright 2007, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// The Google C++ Testing and Mocking Framework (Google Test)
+//
+// This file implements just enough of the matcher interface to allow
+// EXPECT_DEATH and friends to accept a matcher argument.
+
+#include "gtest/gtest-matchers.h"
+
+#include <string>
+
+#include "gtest/internal/gtest-internal.h"
+#include "gtest/internal/gtest-port.h"
+
+namespace testing {
+
+// Constructs a matcher that matches a const std::string& whose value is
+// equal to s.
+Matcher<const std::string&>::Matcher(const std::string& s) { *this = Eq(s); }
+
+// Constructs a matcher that matches a const std::string& whose value is
+// equal to s.
+Matcher<const std::string&>::Matcher(const char* s) {
+ *this = Eq(std::string(s));
+}
+
+// Constructs a matcher that matches a std::string whose value is equal to
+// s.
+Matcher<std::string>::Matcher(const std::string& s) { *this = Eq(s); }
+
+// Constructs a matcher that matches a std::string whose value is equal to
+// s.
+Matcher<std::string>::Matcher(const char* s) { *this = Eq(std::string(s)); }
+
+#if GTEST_INTERNAL_HAS_STRING_VIEW
+// Constructs a matcher that matches a const StringView& whose value is
+// equal to s.
+Matcher<const internal::StringView&>::Matcher(const std::string& s) {
+ *this = Eq(s);
+}
+
+// Constructs a matcher that matches a const StringView& whose value is
+// equal to s.
+Matcher<const internal::StringView&>::Matcher(const char* s) {
+ *this = Eq(std::string(s));
+}
+
+// Constructs a matcher that matches a const StringView& whose value is
+// equal to s.
+Matcher<const internal::StringView&>::Matcher(internal::StringView s) {
+ *this = Eq(std::string(s));
+}
+
+// Constructs a matcher that matches a StringView whose value is equal to
+// s.
+Matcher<internal::StringView>::Matcher(const std::string& s) { *this = Eq(s); }
+
+// Constructs a matcher that matches a StringView whose value is equal to
+// s.
+Matcher<internal::StringView>::Matcher(const char* s) {
+ *this = Eq(std::string(s));
+}
+
+// Constructs a matcher that matches a StringView whose value is equal to
+// s.
+Matcher<internal::StringView>::Matcher(internal::StringView s) {
+ *this = Eq(std::string(s));
+}
+#endif // GTEST_INTERNAL_HAS_STRING_VIEW
+
+} // namespace testing
diff --git a/media/libvpx/libvpx/third_party/googletest/src/src/gtest-port.cc b/media/libvpx/libvpx/third_party/googletest/src/src/gtest-port.cc
new file mode 100644
index 0000000000..d797fe4d58
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/googletest/src/src/gtest-port.cc
@@ -0,0 +1,1394 @@
+// Copyright 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "gtest/internal/gtest-port.h"
+
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <cstdint>
+#include <fstream>
+#include <memory>
+
+#if GTEST_OS_WINDOWS
+#include <io.h>
+#include <sys/stat.h>
+#include <windows.h>
+
+#include <map> // Used in ThreadLocal.
+#ifdef _MSC_VER
+#include <crtdbg.h>
+#endif // _MSC_VER
+#else
+#include <unistd.h>
+#endif // GTEST_OS_WINDOWS
+
+#if GTEST_OS_MAC
+#include <mach/mach_init.h>
+#include <mach/task.h>
+#include <mach/vm_map.h>
+#endif // GTEST_OS_MAC
+
+#if GTEST_OS_DRAGONFLY || GTEST_OS_FREEBSD || GTEST_OS_GNU_KFREEBSD || \
+ GTEST_OS_NETBSD || GTEST_OS_OPENBSD
+#include <sys/sysctl.h>
+#if GTEST_OS_DRAGONFLY || GTEST_OS_FREEBSD || GTEST_OS_GNU_KFREEBSD
+#include <sys/user.h>
+#endif
+#endif
+
+#if GTEST_OS_QNX
+#include <devctl.h>
+#include <fcntl.h>
+#include <sys/procfs.h>
+#endif // GTEST_OS_QNX
+
+#if GTEST_OS_AIX
+#include <procinfo.h>
+#include <sys/types.h>
+#endif // GTEST_OS_AIX
+
+#if GTEST_OS_FUCHSIA
+#include <zircon/process.h>
+#include <zircon/syscalls.h>
+#endif // GTEST_OS_FUCHSIA
+
+#include "gtest/gtest-message.h"
+#include "gtest/gtest-spi.h"
+#include "gtest/internal/gtest-internal.h"
+#include "gtest/internal/gtest-string.h"
+#include "src/gtest-internal-inl.h"
+
+namespace testing {
+namespace internal {
+
+#if GTEST_OS_LINUX || GTEST_OS_GNU_HURD
+
+namespace {
+template <typename T>
+T ReadProcFileField(const std::string& filename, int field) {
+ std::string dummy;
+ std::ifstream file(filename.c_str());
+ while (field-- > 0) {
+ file >> dummy;
+ }
+ T output = 0;
+ file >> output;
+ return output;
+}
+} // namespace
+
+// Returns the number of active threads, or 0 when there is an error.
+size_t GetThreadCount() {
+ const std::string filename =
+ (Message() << "/proc/" << getpid() << "/stat").GetString();
+ return ReadProcFileField<size_t>(filename, 19);
+}
+
+#elif GTEST_OS_MAC
+
+size_t GetThreadCount() {
+ const task_t task = mach_task_self();
+ mach_msg_type_number_t thread_count;
+ thread_act_array_t thread_list;
+ const kern_return_t status = task_threads(task, &thread_list, &thread_count);
+ if (status == KERN_SUCCESS) {
+ // task_threads allocates resources in thread_list and we need to free them
+ // to avoid leaks.
+ vm_deallocate(task, reinterpret_cast<vm_address_t>(thread_list),
+ sizeof(thread_t) * thread_count);
+ return static_cast<size_t>(thread_count);
+ } else {
+ return 0;
+ }
+}
+
+#elif GTEST_OS_DRAGONFLY || GTEST_OS_FREEBSD || GTEST_OS_GNU_KFREEBSD || \
+ GTEST_OS_NETBSD
+
+#if GTEST_OS_NETBSD
+#undef KERN_PROC
+#define KERN_PROC KERN_PROC2
+#define kinfo_proc kinfo_proc2
+#endif
+
+#if GTEST_OS_DRAGONFLY
+#define KP_NLWP(kp) (kp.kp_nthreads)
+#elif GTEST_OS_FREEBSD || GTEST_OS_GNU_KFREEBSD
+#define KP_NLWP(kp) (kp.ki_numthreads)
+#elif GTEST_OS_NETBSD
+#define KP_NLWP(kp) (kp.p_nlwps)
+#endif
+
+// Returns the number of threads running in the process, or 0 to indicate that
+// we cannot detect it.
+size_t GetThreadCount() {
+ int mib[] = {
+ CTL_KERN,
+ KERN_PROC,
+ KERN_PROC_PID,
+ getpid(),
+#if GTEST_OS_NETBSD
+ sizeof(struct kinfo_proc),
+ 1,
+#endif
+ };
+ u_int miblen = sizeof(mib) / sizeof(mib[0]);
+ struct kinfo_proc info;
+ size_t size = sizeof(info);
+ if (sysctl(mib, miblen, &info, &size, NULL, 0)) {
+ return 0;
+ }
+ return static_cast<size_t>(KP_NLWP(info));
+}
+#elif GTEST_OS_OPENBSD
+
+// Returns the number of threads running in the process, or 0 to indicate that
+// we cannot detect it.
+size_t GetThreadCount() {
+ int mib[] = {
+ CTL_KERN,
+ KERN_PROC,
+ KERN_PROC_PID | KERN_PROC_SHOW_THREADS,
+ getpid(),
+ sizeof(struct kinfo_proc),
+ 0,
+ };
+ u_int miblen = sizeof(mib) / sizeof(mib[0]);
+
+ // get number of structs
+ size_t size;
+ if (sysctl(mib, miblen, NULL, &size, NULL, 0)) {
+ return 0;
+ }
+
+ mib[5] = static_cast<int>(size / static_cast<size_t>(mib[4]));
+
+ // populate array of structs
+ struct kinfo_proc info[mib[5]];
+ if (sysctl(mib, miblen, &info, &size, NULL, 0)) {
+ return 0;
+ }
+
+ // exclude empty members
+ size_t nthreads = 0;
+ for (size_t i = 0; i < size / static_cast<size_t>(mib[4]); i++) {
+ if (info[i].p_tid != -1) nthreads++;
+ }
+ return nthreads;
+}
+
+#elif GTEST_OS_QNX
+
+// Returns the number of threads running in the process, or 0 to indicate that
+// we cannot detect it.
+size_t GetThreadCount() {
+ const int fd = open("/proc/self/as", O_RDONLY);
+ if (fd < 0) {
+ return 0;
+ }
+ procfs_info process_info;
+ const int status =
+ devctl(fd, DCMD_PROC_INFO, &process_info, sizeof(process_info), nullptr);
+ close(fd);
+ if (status == EOK) {
+ return static_cast<size_t>(process_info.num_threads);
+ } else {
+ return 0;
+ }
+}
+
+#elif GTEST_OS_AIX
+
+size_t GetThreadCount() {
+ struct procentry64 entry;
+ pid_t pid = getpid();
+ int status = getprocs64(&entry, sizeof(entry), nullptr, 0, &pid, 1);
+ if (status == 1) {
+ return entry.pi_thcount;
+ } else {
+ return 0;
+ }
+}
+
+#elif GTEST_OS_FUCHSIA
+
+size_t GetThreadCount() {
+ int dummy_buffer;
+ size_t avail;
+ zx_status_t status =
+ zx_object_get_info(zx_process_self(), ZX_INFO_PROCESS_THREADS,
+ &dummy_buffer, 0, nullptr, &avail);
+ if (status == ZX_OK) {
+ return avail;
+ } else {
+ return 0;
+ }
+}
+
+#else
+
+size_t GetThreadCount() {
+ // There's no portable way to detect the number of threads, so we just
+ // return 0 to indicate that we cannot detect it.
+ return 0;
+}
+
+#endif // GTEST_OS_LINUX
+
+#if GTEST_IS_THREADSAFE && GTEST_OS_WINDOWS
+
+AutoHandle::AutoHandle() : handle_(INVALID_HANDLE_VALUE) {}
+
+AutoHandle::AutoHandle(Handle handle) : handle_(handle) {}
+
+AutoHandle::~AutoHandle() { Reset(); }
+
+AutoHandle::Handle AutoHandle::Get() const { return handle_; }
+
+void AutoHandle::Reset() { Reset(INVALID_HANDLE_VALUE); }
+
+void AutoHandle::Reset(HANDLE handle) {
+ // Resetting with the same handle we already own is invalid.
+ if (handle_ != handle) {
+ if (IsCloseable()) {
+ ::CloseHandle(handle_);
+ }
+ handle_ = handle;
+ } else {
+ GTEST_CHECK_(!IsCloseable())
+ << "Resetting a valid handle to itself is likely a programmer error "
+ "and thus not allowed.";
+ }
+}
+
+bool AutoHandle::IsCloseable() const {
+ // Different Windows APIs may use either of these values to represent an
+ // invalid handle.
+ return handle_ != nullptr && handle_ != INVALID_HANDLE_VALUE;
+}
+
+Mutex::Mutex()
+ : owner_thread_id_(0),
+ type_(kDynamic),
+ critical_section_init_phase_(0),
+ critical_section_(new CRITICAL_SECTION) {
+ ::InitializeCriticalSection(critical_section_);
+}
+
+Mutex::~Mutex() {
+ // Static mutexes are leaked intentionally. It is not thread-safe to try
+ // to clean them up.
+ if (type_ == kDynamic) {
+ ::DeleteCriticalSection(critical_section_);
+ delete critical_section_;
+ critical_section_ = nullptr;
+ }
+}
+
+void Mutex::Lock() {
+ ThreadSafeLazyInit();
+ ::EnterCriticalSection(critical_section_);
+ owner_thread_id_ = ::GetCurrentThreadId();
+}
+
+void Mutex::Unlock() {
+ ThreadSafeLazyInit();
+ // We don't protect writing to owner_thread_id_ here, as it's the
+ // caller's responsibility to ensure that the current thread holds the
+ // mutex when this is called.
+ owner_thread_id_ = 0;
+ ::LeaveCriticalSection(critical_section_);
+}
+
+// Does nothing if the current thread holds the mutex. Otherwise, crashes
+// with high probability.
+void Mutex::AssertHeld() {
+ ThreadSafeLazyInit();
+ GTEST_CHECK_(owner_thread_id_ == ::GetCurrentThreadId())
+ << "The current thread is not holding the mutex @" << this;
+}
+
+namespace {
+
+#ifdef _MSC_VER
+// Use the RAII idiom to flag mem allocs that are intentionally never
+// deallocated. The motivation is to silence the false positive mem leaks
+// that are reported by the debug version of MS's CRT which can only detect
+// if an alloc is missing a matching deallocation.
+// Example:
+// MemoryIsNotDeallocated memory_is_not_deallocated;
+// critical_section_ = new CRITICAL_SECTION;
+//
+class MemoryIsNotDeallocated {
+ public:
+ MemoryIsNotDeallocated() : old_crtdbg_flag_(0) {
+ old_crtdbg_flag_ = _CrtSetDbgFlag(_CRTDBG_REPORT_FLAG);
+ // Set heap allocation block type to _IGNORE_BLOCK so that MS debug CRT
+ // doesn't report mem leak if there's no matching deallocation.
+ (void)_CrtSetDbgFlag(old_crtdbg_flag_ & ~_CRTDBG_ALLOC_MEM_DF);
+ }
+
+ ~MemoryIsNotDeallocated() {
+ // Restore the original _CRTDBG_ALLOC_MEM_DF flag
+ (void)_CrtSetDbgFlag(old_crtdbg_flag_);
+ }
+
+ private:
+ int old_crtdbg_flag_;
+
+ MemoryIsNotDeallocated(const MemoryIsNotDeallocated&) = delete;
+ MemoryIsNotDeallocated& operator=(const MemoryIsNotDeallocated&) = delete;
+};
+#endif // _MSC_VER
+
+} // namespace
+
+// Initializes owner_thread_id_ and critical_section_ in static mutexes.
+void Mutex::ThreadSafeLazyInit() {
+ // Dynamic mutexes are initialized in the constructor.
+ if (type_ == kStatic) {
+ switch (
+ ::InterlockedCompareExchange(&critical_section_init_phase_, 1L, 0L)) {
+ case 0:
+ // If critical_section_init_phase_ was 0 before the exchange, we
+ // are the first to test it and need to perform the initialization.
+ owner_thread_id_ = 0;
+ {
+ // Use RAII to flag that following mem alloc is never deallocated.
+#ifdef _MSC_VER
+ MemoryIsNotDeallocated memory_is_not_deallocated;
+#endif // _MSC_VER
+ critical_section_ = new CRITICAL_SECTION;
+ }
+ ::InitializeCriticalSection(critical_section_);
+ // Updates the critical_section_init_phase_ to 2 to signal
+ // initialization complete.
+ GTEST_CHECK_(::InterlockedCompareExchange(&critical_section_init_phase_,
+ 2L, 1L) == 1L);
+ break;
+ case 1:
+ // Somebody else is already initializing the mutex; spin until they
+ // are done.
+ while (::InterlockedCompareExchange(&critical_section_init_phase_, 2L,
+ 2L) != 2L) {
+ // Possibly yields the rest of the thread's time slice to other
+ // threads.
+ ::Sleep(0);
+ }
+ break;
+
+ case 2:
+ break; // The mutex is already initialized and ready for use.
+
+ default:
+ GTEST_CHECK_(false)
+ << "Unexpected value of critical_section_init_phase_ "
+ << "while initializing a static mutex.";
+ }
+ }
+}
+
+namespace {
+
+class ThreadWithParamSupport : public ThreadWithParamBase {
+ public:
+ static HANDLE CreateThread(Runnable* runnable,
+ Notification* thread_can_start) {
+ ThreadMainParam* param = new ThreadMainParam(runnable, thread_can_start);
+ DWORD thread_id;
+ HANDLE thread_handle = ::CreateThread(
+ nullptr, // Default security.
+ 0, // Default stack size.
+ &ThreadWithParamSupport::ThreadMain,
+ param, // Parameter to ThreadMainStatic
+ 0x0, // Default creation flags.
+ &thread_id); // Need a valid pointer for the call to work under Win98.
+ GTEST_CHECK_(thread_handle != nullptr)
+ << "CreateThread failed with error " << ::GetLastError() << ".";
+ if (thread_handle == nullptr) {
+ delete param;
+ }
+ return thread_handle;
+ }
+
+ private:
+ struct ThreadMainParam {
+ ThreadMainParam(Runnable* runnable, Notification* thread_can_start)
+ : runnable_(runnable), thread_can_start_(thread_can_start) {}
+ std::unique_ptr<Runnable> runnable_;
+ // Does not own.
+ Notification* thread_can_start_;
+ };
+
+ static DWORD WINAPI ThreadMain(void* ptr) {
+ // Transfers ownership.
+ std::unique_ptr<ThreadMainParam> param(static_cast<ThreadMainParam*>(ptr));
+ if (param->thread_can_start_ != nullptr)
+ param->thread_can_start_->WaitForNotification();
+ param->runnable_->Run();
+ return 0;
+ }
+
+ // Prohibit instantiation.
+ ThreadWithParamSupport();
+
+ ThreadWithParamSupport(const ThreadWithParamSupport&) = delete;
+ ThreadWithParamSupport& operator=(const ThreadWithParamSupport&) = delete;
+};
+
+} // namespace
+
+ThreadWithParamBase::ThreadWithParamBase(Runnable* runnable,
+ Notification* thread_can_start)
+ : thread_(
+ ThreadWithParamSupport::CreateThread(runnable, thread_can_start)) {}
+
+ThreadWithParamBase::~ThreadWithParamBase() { Join(); }
+
+void ThreadWithParamBase::Join() {
+ GTEST_CHECK_(::WaitForSingleObject(thread_.Get(), INFINITE) == WAIT_OBJECT_0)
+ << "Failed to join the thread with error " << ::GetLastError() << ".";
+}
+
+// Maps a thread to a set of ThreadIdToThreadLocals that have values
+// instantiated on that thread and notifies them when the thread exits. A
+// ThreadLocal instance is expected to persist until all threads it has
+// values on have terminated.
+class ThreadLocalRegistryImpl {
+ public:
+ // Registers thread_local_instance as having value on the current thread.
+ // Returns a value that can be used to identify the thread from other threads.
+ static ThreadLocalValueHolderBase* GetValueOnCurrentThread(
+ const ThreadLocalBase* thread_local_instance) {
+#ifdef _MSC_VER
+ MemoryIsNotDeallocated memory_is_not_deallocated;
+#endif // _MSC_VER
+ DWORD current_thread = ::GetCurrentThreadId();
+ MutexLock lock(&mutex_);
+ ThreadIdToThreadLocals* const thread_to_thread_locals =
+ GetThreadLocalsMapLocked();
+ ThreadIdToThreadLocals::iterator thread_local_pos =
+ thread_to_thread_locals->find(current_thread);
+ if (thread_local_pos == thread_to_thread_locals->end()) {
+ thread_local_pos =
+ thread_to_thread_locals
+ ->insert(std::make_pair(current_thread, ThreadLocalValues()))
+ .first;
+ StartWatcherThreadFor(current_thread);
+ }
+ ThreadLocalValues& thread_local_values = thread_local_pos->second;
+ ThreadLocalValues::iterator value_pos =
+ thread_local_values.find(thread_local_instance);
+ if (value_pos == thread_local_values.end()) {
+ value_pos =
+ thread_local_values
+ .insert(std::make_pair(
+ thread_local_instance,
+ std::shared_ptr<ThreadLocalValueHolderBase>(
+ thread_local_instance->NewValueForCurrentThread())))
+ .first;
+ }
+ return value_pos->second.get();
+ }
+
+ static void OnThreadLocalDestroyed(
+ const ThreadLocalBase* thread_local_instance) {
+ std::vector<std::shared_ptr<ThreadLocalValueHolderBase> > value_holders;
+ // Clean up the ThreadLocalValues data structure while holding the lock, but
+ // defer the destruction of the ThreadLocalValueHolderBases.
+ {
+ MutexLock lock(&mutex_);
+ ThreadIdToThreadLocals* const thread_to_thread_locals =
+ GetThreadLocalsMapLocked();
+ for (ThreadIdToThreadLocals::iterator it =
+ thread_to_thread_locals->begin();
+ it != thread_to_thread_locals->end(); ++it) {
+ ThreadLocalValues& thread_local_values = it->second;
+ ThreadLocalValues::iterator value_pos =
+ thread_local_values.find(thread_local_instance);
+ if (value_pos != thread_local_values.end()) {
+ value_holders.push_back(value_pos->second);
+ thread_local_values.erase(value_pos);
+ // This 'if' can only be successful at most once, so theoretically we
+ // could break out of the loop here, but we don't bother doing so.
+ }
+ }
+ }
+ // Outside the lock, let the destructor for 'value_holders' deallocate the
+ // ThreadLocalValueHolderBases.
+ }
+
+ static void OnThreadExit(DWORD thread_id) {
+ GTEST_CHECK_(thread_id != 0) << ::GetLastError();
+ std::vector<std::shared_ptr<ThreadLocalValueHolderBase> > value_holders;
+ // Clean up the ThreadIdToThreadLocals data structure while holding the
+ // lock, but defer the destruction of the ThreadLocalValueHolderBases.
+ {
+ MutexLock lock(&mutex_);
+ ThreadIdToThreadLocals* const thread_to_thread_locals =
+ GetThreadLocalsMapLocked();
+ ThreadIdToThreadLocals::iterator thread_local_pos =
+ thread_to_thread_locals->find(thread_id);
+ if (thread_local_pos != thread_to_thread_locals->end()) {
+ ThreadLocalValues& thread_local_values = thread_local_pos->second;
+ for (ThreadLocalValues::iterator value_pos =
+ thread_local_values.begin();
+ value_pos != thread_local_values.end(); ++value_pos) {
+ value_holders.push_back(value_pos->second);
+ }
+ thread_to_thread_locals->erase(thread_local_pos);
+ }
+ }
+ // Outside the lock, let the destructor for 'value_holders' deallocate the
+ // ThreadLocalValueHolderBases.
+ }
+
+ private:
+ // In a particular thread, maps a ThreadLocal object to its value.
+ typedef std::map<const ThreadLocalBase*,
+ std::shared_ptr<ThreadLocalValueHolderBase> >
+ ThreadLocalValues;
+ // Stores all ThreadIdToThreadLocals having values in a thread, indexed by
+ // thread's ID.
+ typedef std::map<DWORD, ThreadLocalValues> ThreadIdToThreadLocals;
+
+ // Holds the thread id and thread handle that we pass from
+ // StartWatcherThreadFor to WatcherThreadFunc.
+ typedef std::pair<DWORD, HANDLE> ThreadIdAndHandle;
+
+ static void StartWatcherThreadFor(DWORD thread_id) {
+ // The returned handle will be kept in thread_map and closed by
+ // watcher_thread in WatcherThreadFunc.
+ HANDLE thread =
+ ::OpenThread(SYNCHRONIZE | THREAD_QUERY_INFORMATION, FALSE, thread_id);
+ GTEST_CHECK_(thread != nullptr);
+ // We need to pass a valid thread ID pointer into CreateThread for it
+ // to work correctly under Win98.
+ DWORD watcher_thread_id;
+ HANDLE watcher_thread = ::CreateThread(
+ nullptr, // Default security.
+ 0, // Default stack size
+ &ThreadLocalRegistryImpl::WatcherThreadFunc,
+ reinterpret_cast<LPVOID>(new ThreadIdAndHandle(thread_id, thread)),
+ CREATE_SUSPENDED, &watcher_thread_id);
+ GTEST_CHECK_(watcher_thread != nullptr)
+ << "CreateThread failed with error " << ::GetLastError() << ".";
+ // Give the watcher thread the same priority as ours to avoid being
+ // blocked by it.
+ ::SetThreadPriority(watcher_thread,
+ ::GetThreadPriority(::GetCurrentThread()));
+ ::ResumeThread(watcher_thread);
+ ::CloseHandle(watcher_thread);
+ }
+
+ // Monitors exit from a given thread and notifies those
+ // ThreadIdToThreadLocals about thread termination.
+ static DWORD WINAPI WatcherThreadFunc(LPVOID param) {
+ const ThreadIdAndHandle* tah =
+ reinterpret_cast<const ThreadIdAndHandle*>(param);
+ GTEST_CHECK_(::WaitForSingleObject(tah->second, INFINITE) == WAIT_OBJECT_0);
+ OnThreadExit(tah->first);
+ ::CloseHandle(tah->second);
+ delete tah;
+ return 0;
+ }
+
+ // Returns map of thread local instances.
+ static ThreadIdToThreadLocals* GetThreadLocalsMapLocked() {
+ mutex_.AssertHeld();
+#ifdef _MSC_VER
+ MemoryIsNotDeallocated memory_is_not_deallocated;
+#endif // _MSC_VER
+ static ThreadIdToThreadLocals* map = new ThreadIdToThreadLocals();
+ return map;
+ }
+
+ // Protects access to GetThreadLocalsMapLocked() and its return value.
+ static Mutex mutex_;
+ // Protects access to GetThreadMapLocked() and its return value.
+ static Mutex thread_map_mutex_;
+};
+
+Mutex ThreadLocalRegistryImpl::mutex_(Mutex::kStaticMutex); // NOLINT
+Mutex ThreadLocalRegistryImpl::thread_map_mutex_(
+ Mutex::kStaticMutex); // NOLINT
+
+ThreadLocalValueHolderBase* ThreadLocalRegistry::GetValueOnCurrentThread(
+ const ThreadLocalBase* thread_local_instance) {
+ return ThreadLocalRegistryImpl::GetValueOnCurrentThread(
+ thread_local_instance);
+}
+
+void ThreadLocalRegistry::OnThreadLocalDestroyed(
+ const ThreadLocalBase* thread_local_instance) {
+ ThreadLocalRegistryImpl::OnThreadLocalDestroyed(thread_local_instance);
+}
+
+#endif // GTEST_IS_THREADSAFE && GTEST_OS_WINDOWS
+
+#if GTEST_USES_POSIX_RE
+
+// Implements RE. Currently only needed for death tests.
+
+RE::~RE() {
+ if (is_valid_) {
+ // regfree'ing an invalid regex might crash because the content
+ // of the regex is undefined. Since the regex's are essentially
+ // the same, one cannot be valid (or invalid) without the other
+ // being so too.
+ regfree(&partial_regex_);
+ regfree(&full_regex_);
+ }
+ free(const_cast<char*>(pattern_));
+}
+
+// Returns true if and only if regular expression re matches the entire str.
+bool RE::FullMatch(const char* str, const RE& re) {
+ if (!re.is_valid_) return false;
+
+ regmatch_t match;
+ return regexec(&re.full_regex_, str, 1, &match, 0) == 0;
+}
+
+// Returns true if and only if regular expression re matches a substring of
+// str (including str itself).
+bool RE::PartialMatch(const char* str, const RE& re) {
+ if (!re.is_valid_) return false;
+
+ regmatch_t match;
+ return regexec(&re.partial_regex_, str, 1, &match, 0) == 0;
+}
+
+// Initializes an RE from its string representation.
+void RE::Init(const char* regex) {
+ pattern_ = posix::StrDup(regex);
+
+ // Reserves enough bytes to hold the regular expression used for a
+ // full match.
+ const size_t full_regex_len = strlen(regex) + 10;
+ char* const full_pattern = new char[full_regex_len];
+
+ snprintf(full_pattern, full_regex_len, "^(%s)$", regex);
+ is_valid_ = regcomp(&full_regex_, full_pattern, REG_EXTENDED) == 0;
+ // We want to call regcomp(&partial_regex_, ...) even if the
+ // previous expression returns false. Otherwise partial_regex_ may
+ // not be properly initialized can may cause trouble when it's
+ // freed.
+ //
+ // Some implementation of POSIX regex (e.g. on at least some
+ // versions of Cygwin) doesn't accept the empty string as a valid
+ // regex. We change it to an equivalent form "()" to be safe.
+ if (is_valid_) {
+ const char* const partial_regex = (*regex == '\0') ? "()" : regex;
+ is_valid_ = regcomp(&partial_regex_, partial_regex, REG_EXTENDED) == 0;
+ }
+ EXPECT_TRUE(is_valid_)
+ << "Regular expression \"" << regex
+ << "\" is not a valid POSIX Extended regular expression.";
+
+ delete[] full_pattern;
+}
+
+#elif GTEST_USES_SIMPLE_RE
+
+// Returns true if and only if ch appears anywhere in str (excluding the
+// terminating '\0' character).
+bool IsInSet(char ch, const char* str) {
+ return ch != '\0' && strchr(str, ch) != nullptr;
+}
+
+// Returns true if and only if ch belongs to the given classification.
+// Unlike similar functions in <ctype.h>, these aren't affected by the
+// current locale.
+bool IsAsciiDigit(char ch) { return '0' <= ch && ch <= '9'; }
+bool IsAsciiPunct(char ch) {
+ return IsInSet(ch, "^-!\"#$%&'()*+,./:;<=>?@[\\]_`{|}~");
+}
+bool IsRepeat(char ch) { return IsInSet(ch, "?*+"); }
+bool IsAsciiWhiteSpace(char ch) { return IsInSet(ch, " \f\n\r\t\v"); }
+bool IsAsciiWordChar(char ch) {
+ return ('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z') ||
+ ('0' <= ch && ch <= '9') || ch == '_';
+}
+
+// Returns true if and only if "\\c" is a supported escape sequence.
+bool IsValidEscape(char c) {
+ return (IsAsciiPunct(c) || IsInSet(c, "dDfnrsStvwW"));
+}
+
+// Returns true if and only if the given atom (specified by escaped and
+// pattern) matches ch. The result is undefined if the atom is invalid.
+bool AtomMatchesChar(bool escaped, char pattern_char, char ch) {
+ if (escaped) { // "\\p" where p is pattern_char.
+ switch (pattern_char) {
+ case 'd':
+ return IsAsciiDigit(ch);
+ case 'D':
+ return !IsAsciiDigit(ch);
+ case 'f':
+ return ch == '\f';
+ case 'n':
+ return ch == '\n';
+ case 'r':
+ return ch == '\r';
+ case 's':
+ return IsAsciiWhiteSpace(ch);
+ case 'S':
+ return !IsAsciiWhiteSpace(ch);
+ case 't':
+ return ch == '\t';
+ case 'v':
+ return ch == '\v';
+ case 'w':
+ return IsAsciiWordChar(ch);
+ case 'W':
+ return !IsAsciiWordChar(ch);
+ }
+ return IsAsciiPunct(pattern_char) && pattern_char == ch;
+ }
+
+ return (pattern_char == '.' && ch != '\n') || pattern_char == ch;
+}
+
+// Helper function used by ValidateRegex() to format error messages.
+static std::string FormatRegexSyntaxError(const char* regex, int index) {
+ return (Message() << "Syntax error at index " << index
+ << " in simple regular expression \"" << regex << "\": ")
+ .GetString();
+}
+
+// Generates non-fatal failures and returns false if regex is invalid;
+// otherwise returns true.
+bool ValidateRegex(const char* regex) {
+ if (regex == nullptr) {
+ ADD_FAILURE() << "NULL is not a valid simple regular expression.";
+ return false;
+ }
+
+ bool is_valid = true;
+
+ // True if and only if ?, *, or + can follow the previous atom.
+ bool prev_repeatable = false;
+ for (int i = 0; regex[i]; i++) {
+ if (regex[i] == '\\') { // An escape sequence
+ i++;
+ if (regex[i] == '\0') {
+ ADD_FAILURE() << FormatRegexSyntaxError(regex, i - 1)
+ << "'\\' cannot appear at the end.";
+ return false;
+ }
+
+ if (!IsValidEscape(regex[i])) {
+ ADD_FAILURE() << FormatRegexSyntaxError(regex, i - 1)
+ << "invalid escape sequence \"\\" << regex[i] << "\".";
+ is_valid = false;
+ }
+ prev_repeatable = true;
+ } else { // Not an escape sequence.
+ const char ch = regex[i];
+
+ if (ch == '^' && i > 0) {
+ ADD_FAILURE() << FormatRegexSyntaxError(regex, i)
+ << "'^' can only appear at the beginning.";
+ is_valid = false;
+ } else if (ch == '$' && regex[i + 1] != '\0') {
+ ADD_FAILURE() << FormatRegexSyntaxError(regex, i)
+ << "'$' can only appear at the end.";
+ is_valid = false;
+ } else if (IsInSet(ch, "()[]{}|")) {
+ ADD_FAILURE() << FormatRegexSyntaxError(regex, i) << "'" << ch
+ << "' is unsupported.";
+ is_valid = false;
+ } else if (IsRepeat(ch) && !prev_repeatable) {
+ ADD_FAILURE() << FormatRegexSyntaxError(regex, i) << "'" << ch
+ << "' can only follow a repeatable token.";
+ is_valid = false;
+ }
+
+ prev_repeatable = !IsInSet(ch, "^$?*+");
+ }
+ }
+
+ return is_valid;
+}
+
+// Matches a repeated regex atom followed by a valid simple regular
+// expression. The regex atom is defined as c if escaped is false,
+// or \c otherwise. repeat is the repetition meta character (?, *,
+// or +). The behavior is undefined if str contains too many
+// characters to be indexable by size_t, in which case the test will
+// probably time out anyway. We are fine with this limitation as
+// std::string has it too.
+bool MatchRepetitionAndRegexAtHead(bool escaped, char c, char repeat,
+ const char* regex, const char* str) {
+ const size_t min_count = (repeat == '+') ? 1 : 0;
+ const size_t max_count = (repeat == '?') ? 1 : static_cast<size_t>(-1) - 1;
+ // We cannot call numeric_limits::max() as it conflicts with the
+ // max() macro on Windows.
+
+ for (size_t i = 0; i <= max_count; ++i) {
+ // We know that the atom matches each of the first i characters in str.
+ if (i >= min_count && MatchRegexAtHead(regex, str + i)) {
+ // We have enough matches at the head, and the tail matches too.
+ // Since we only care about *whether* the pattern matches str
+ // (as opposed to *how* it matches), there is no need to find a
+ // greedy match.
+ return true;
+ }
+ if (str[i] == '\0' || !AtomMatchesChar(escaped, c, str[i])) return false;
+ }
+ return false;
+}
+
+// Returns true if and only if regex matches a prefix of str. regex must
+// be a valid simple regular expression and not start with "^", or the
+// result is undefined.
+bool MatchRegexAtHead(const char* regex, const char* str) {
+ if (*regex == '\0') // An empty regex matches a prefix of anything.
+ return true;
+
+ // "$" only matches the end of a string. Note that regex being
+ // valid guarantees that there's nothing after "$" in it.
+ if (*regex == '$') return *str == '\0';
+
+ // Is the first thing in regex an escape sequence?
+ const bool escaped = *regex == '\\';
+ if (escaped) ++regex;
+ if (IsRepeat(regex[1])) {
+ // MatchRepetitionAndRegexAtHead() calls MatchRegexAtHead(), so
+ // here's an indirect recursion. It terminates as the regex gets
+ // shorter in each recursion.
+ return MatchRepetitionAndRegexAtHead(escaped, regex[0], regex[1], regex + 2,
+ str);
+ } else {
+ // regex isn't empty, isn't "$", and doesn't start with a
+ // repetition. We match the first atom of regex with the first
+ // character of str and recurse.
+ return (*str != '\0') && AtomMatchesChar(escaped, *regex, *str) &&
+ MatchRegexAtHead(regex + 1, str + 1);
+ }
+}
+
+// Returns true if and only if regex matches any substring of str. regex must
+// be a valid simple regular expression, or the result is undefined.
+//
+// The algorithm is recursive, but the recursion depth doesn't exceed
+// the regex length, so we won't need to worry about running out of
+// stack space normally. In rare cases the time complexity can be
+// exponential with respect to the regex length + the string length,
+// but usually it's must faster (often close to linear).
+bool MatchRegexAnywhere(const char* regex, const char* str) {
+ if (regex == nullptr || str == nullptr) return false;
+
+ if (*regex == '^') return MatchRegexAtHead(regex + 1, str);
+
+ // A successful match can be anywhere in str.
+ do {
+ if (MatchRegexAtHead(regex, str)) return true;
+ } while (*str++ != '\0');
+ return false;
+}
+
+// Implements the RE class.
+
+RE::~RE() {
+ free(const_cast<char*>(pattern_));
+ free(const_cast<char*>(full_pattern_));
+}
+
+// Returns true if and only if regular expression re matches the entire str.
+bool RE::FullMatch(const char* str, const RE& re) {
+ return re.is_valid_ && MatchRegexAnywhere(re.full_pattern_, str);
+}
+
+// Returns true if and only if regular expression re matches a substring of
+// str (including str itself).
+bool RE::PartialMatch(const char* str, const RE& re) {
+ return re.is_valid_ && MatchRegexAnywhere(re.pattern_, str);
+}
+
+// Initializes an RE from its string representation.
+void RE::Init(const char* regex) {
+ pattern_ = full_pattern_ = nullptr;
+ if (regex != nullptr) {
+ pattern_ = posix::StrDup(regex);
+ }
+
+ is_valid_ = ValidateRegex(regex);
+ if (!is_valid_) {
+ // No need to calculate the full pattern when the regex is invalid.
+ return;
+ }
+
+ const size_t len = strlen(regex);
+ // Reserves enough bytes to hold the regular expression used for a
+ // full match: we need space to prepend a '^', append a '$', and
+ // terminate the string with '\0'.
+ char* buffer = static_cast<char*>(malloc(len + 3));
+ full_pattern_ = buffer;
+
+ if (*regex != '^')
+ *buffer++ = '^'; // Makes sure full_pattern_ starts with '^'.
+
+ // We don't use snprintf or strncpy, as they trigger a warning when
+ // compiled with VC++ 8.0.
+ memcpy(buffer, regex, len);
+ buffer += len;
+
+ if (len == 0 || regex[len - 1] != '$')
+ *buffer++ = '$'; // Makes sure full_pattern_ ends with '$'.
+
+ *buffer = '\0';
+}
+
+#endif // GTEST_USES_POSIX_RE
+
+const char kUnknownFile[] = "unknown file";
+
+// Formats a source file path and a line number as they would appear
+// in an error message from the compiler used to compile this code.
+GTEST_API_ ::std::string FormatFileLocation(const char* file, int line) {
+ const std::string file_name(file == nullptr ? kUnknownFile : file);
+
+ if (line < 0) {
+ return file_name + ":";
+ }
+#ifdef _MSC_VER
+ return file_name + "(" + StreamableToString(line) + "):";
+#else
+ return file_name + ":" + StreamableToString(line) + ":";
+#endif // _MSC_VER
+}
+
+// Formats a file location for compiler-independent XML output.
+// Although this function is not platform dependent, we put it next to
+// FormatFileLocation in order to contrast the two functions.
+// Note that FormatCompilerIndependentFileLocation() does NOT append colon
+// to the file location it produces, unlike FormatFileLocation().
+GTEST_API_ ::std::string FormatCompilerIndependentFileLocation(const char* file,
+ int line) {
+ const std::string file_name(file == nullptr ? kUnknownFile : file);
+
+ if (line < 0)
+ return file_name;
+ else
+ return file_name + ":" + StreamableToString(line);
+}
+
+GTestLog::GTestLog(GTestLogSeverity severity, const char* file, int line)
+ : severity_(severity) {
+ const char* const marker = severity == GTEST_INFO ? "[ INFO ]"
+ : severity == GTEST_WARNING ? "[WARNING]"
+ : severity == GTEST_ERROR ? "[ ERROR ]"
+ : "[ FATAL ]";
+ GetStream() << ::std::endl
+ << marker << " " << FormatFileLocation(file, line).c_str()
+ << ": ";
+}
+
+// Flushes the buffers and, if severity is GTEST_FATAL, aborts the program.
+GTestLog::~GTestLog() {
+ GetStream() << ::std::endl;
+ if (severity_ == GTEST_FATAL) {
+ fflush(stderr);
+ posix::Abort();
+ }
+}
+
+// Disable Microsoft deprecation warnings for POSIX functions called from
+// this class (creat, dup, dup2, and close)
+GTEST_DISABLE_MSC_DEPRECATED_PUSH_()
+
+#if GTEST_HAS_STREAM_REDIRECTION
+
+// Object that captures an output stream (stdout/stderr).
+class CapturedStream {
+ public:
+ // The ctor redirects the stream to a temporary file.
+ explicit CapturedStream(int fd) : fd_(fd), uncaptured_fd_(dup(fd)) {
+#if GTEST_OS_WINDOWS
+ char temp_dir_path[MAX_PATH + 1] = {'\0'}; // NOLINT
+ char temp_file_path[MAX_PATH + 1] = {'\0'}; // NOLINT
+
+ ::GetTempPathA(sizeof(temp_dir_path), temp_dir_path);
+ const UINT success = ::GetTempFileNameA(temp_dir_path, "gtest_redir",
+ 0, // Generate unique file name.
+ temp_file_path);
+ GTEST_CHECK_(success != 0)
+ << "Unable to create a temporary file in " << temp_dir_path;
+ const int captured_fd = creat(temp_file_path, _S_IREAD | _S_IWRITE);
+ GTEST_CHECK_(captured_fd != -1)
+ << "Unable to open temporary file " << temp_file_path;
+ filename_ = temp_file_path;
+#else
+ // There's no guarantee that a test has write access to the current
+ // directory, so we create the temporary file in a temporary directory.
+ std::string name_template;
+
+#if GTEST_OS_LINUX_ANDROID
+ // Note: Android applications are expected to call the framework's
+ // Context.getExternalStorageDirectory() method through JNI to get
+ // the location of the world-writable SD Card directory. However,
+ // this requires a Context handle, which cannot be retrieved
+ // globally from native code. Doing so also precludes running the
+ // code as part of a regular standalone executable, which doesn't
+ // run in a Dalvik process (e.g. when running it through 'adb shell').
+ //
+ // The location /data/local/tmp is directly accessible from native code.
+ // '/sdcard' and other variants cannot be relied on, as they are not
+ // guaranteed to be mounted, or may have a delay in mounting.
+ name_template = "/data/local/tmp/";
+#elif GTEST_OS_IOS
+ char user_temp_dir[PATH_MAX + 1];
+
+ // Documented alternative to NSTemporaryDirectory() (for obtaining creating
+ // a temporary directory) at
+ // https://developer.apple.com/library/archive/documentation/Security/Conceptual/SecureCodingGuide/Articles/RaceConditions.html#//apple_ref/doc/uid/TP40002585-SW10
+ //
+ // _CS_DARWIN_USER_TEMP_DIR (as well as _CS_DARWIN_USER_CACHE_DIR) is not
+ // documented in the confstr() man page at
+ // https://developer.apple.com/library/archive/documentation/System/Conceptual/ManPages_iPhoneOS/man3/confstr.3.html#//apple_ref/doc/man/3/confstr
+ // but are still available, according to the WebKit patches at
+ // https://trac.webkit.org/changeset/262004/webkit
+ // https://trac.webkit.org/changeset/263705/webkit
+ //
+ // The confstr() implementation falls back to getenv("TMPDIR"). See
+ // https://opensource.apple.com/source/Libc/Libc-1439.100.3/gen/confstr.c.auto.html
+ ::confstr(_CS_DARWIN_USER_TEMP_DIR, user_temp_dir, sizeof(user_temp_dir));
+
+ name_template = user_temp_dir;
+ if (name_template.back() != GTEST_PATH_SEP_[0])
+ name_template.push_back(GTEST_PATH_SEP_[0]);
+#else
+ name_template = "/tmp/";
+#endif
+ name_template.append("gtest_captured_stream.XXXXXX");
+
+ // mkstemp() modifies the string bytes in place, and does not go beyond the
+ // string's length. This results in well-defined behavior in C++17.
+ //
+ // The const_cast is needed below C++17. The constraints on std::string
+ // implementations in C++11 and above make assumption behind the const_cast
+ // fairly safe.
+ const int captured_fd = ::mkstemp(const_cast<char*>(name_template.data()));
+ if (captured_fd == -1) {
+ GTEST_LOG_(WARNING)
+ << "Failed to create tmp file " << name_template
+ << " for test; does the test have access to the /tmp directory?";
+ }
+ filename_ = std::move(name_template);
+#endif // GTEST_OS_WINDOWS
+ fflush(nullptr);
+ dup2(captured_fd, fd_);
+ close(captured_fd);
+ }
+
+ ~CapturedStream() { remove(filename_.c_str()); }
+
+ std::string GetCapturedString() {
+ if (uncaptured_fd_ != -1) {
+ // Restores the original stream.
+ fflush(nullptr);
+ dup2(uncaptured_fd_, fd_);
+ close(uncaptured_fd_);
+ uncaptured_fd_ = -1;
+ }
+
+ FILE* const file = posix::FOpen(filename_.c_str(), "r");
+ if (file == nullptr) {
+ GTEST_LOG_(FATAL) << "Failed to open tmp file " << filename_
+ << " for capturing stream.";
+ }
+ const std::string content = ReadEntireFile(file);
+ posix::FClose(file);
+ return content;
+ }
+
+ private:
+ const int fd_; // A stream to capture.
+ int uncaptured_fd_;
+ // Name of the temporary file holding the stderr output.
+ ::std::string filename_;
+
+ CapturedStream(const CapturedStream&) = delete;
+ CapturedStream& operator=(const CapturedStream&) = delete;
+};
+
+GTEST_DISABLE_MSC_DEPRECATED_POP_()
+
+static CapturedStream* g_captured_stderr = nullptr;
+static CapturedStream* g_captured_stdout = nullptr;
+
+// Starts capturing an output stream (stdout/stderr).
+static void CaptureStream(int fd, const char* stream_name,
+ CapturedStream** stream) {
+ if (*stream != nullptr) {
+ GTEST_LOG_(FATAL) << "Only one " << stream_name
+ << " capturer can exist at a time.";
+ }
+ *stream = new CapturedStream(fd);
+}
+
+// Stops capturing the output stream and returns the captured string.
+static std::string GetCapturedStream(CapturedStream** captured_stream) {
+ const std::string content = (*captured_stream)->GetCapturedString();
+
+ delete *captured_stream;
+ *captured_stream = nullptr;
+
+ return content;
+}
+
+#if defined(_MSC_VER) || defined(__BORLANDC__)
+// MSVC and C++Builder do not provide a definition of STDERR_FILENO.
+const int kStdOutFileno = 1;
+const int kStdErrFileno = 2;
+#else
+const int kStdOutFileno = STDOUT_FILENO;
+const int kStdErrFileno = STDERR_FILENO;
+#endif // defined(_MSC_VER) || defined(__BORLANDC__)
+
+// Starts capturing stdout.
+void CaptureStdout() {
+ CaptureStream(kStdOutFileno, "stdout", &g_captured_stdout);
+}
+
+// Starts capturing stderr.
+void CaptureStderr() {
+ CaptureStream(kStdErrFileno, "stderr", &g_captured_stderr);
+}
+
+// Stops capturing stdout and returns the captured string.
+std::string GetCapturedStdout() {
+ return GetCapturedStream(&g_captured_stdout);
+}
+
+// Stops capturing stderr and returns the captured string.
+std::string GetCapturedStderr() {
+ return GetCapturedStream(&g_captured_stderr);
+}
+
+#endif // GTEST_HAS_STREAM_REDIRECTION
+
+size_t GetFileSize(FILE* file) {
+ fseek(file, 0, SEEK_END);
+ return static_cast<size_t>(ftell(file));
+}
+
+std::string ReadEntireFile(FILE* file) {
+ const size_t file_size = GetFileSize(file);
+ char* const buffer = new char[file_size];
+
+ size_t bytes_last_read = 0; // # of bytes read in the last fread()
+ size_t bytes_read = 0; // # of bytes read so far
+
+ fseek(file, 0, SEEK_SET);
+
+ // Keeps reading the file until we cannot read further or the
+ // pre-determined file size is reached.
+ do {
+ bytes_last_read =
+ fread(buffer + bytes_read, 1, file_size - bytes_read, file);
+ bytes_read += bytes_last_read;
+ } while (bytes_last_read > 0 && bytes_read < file_size);
+
+ const std::string content(buffer, bytes_read);
+ delete[] buffer;
+
+ return content;
+}
+
+#if GTEST_HAS_DEATH_TEST
+static const std::vector<std::string>* g_injected_test_argvs =
+ nullptr; // Owned.
+
+std::vector<std::string> GetInjectableArgvs() {
+ if (g_injected_test_argvs != nullptr) {
+ return *g_injected_test_argvs;
+ }
+ return GetArgvs();
+}
+
+void SetInjectableArgvs(const std::vector<std::string>* new_argvs) {
+ if (g_injected_test_argvs != new_argvs) delete g_injected_test_argvs;
+ g_injected_test_argvs = new_argvs;
+}
+
+void SetInjectableArgvs(const std::vector<std::string>& new_argvs) {
+ SetInjectableArgvs(
+ new std::vector<std::string>(new_argvs.begin(), new_argvs.end()));
+}
+
+void ClearInjectableArgvs() {
+ delete g_injected_test_argvs;
+ g_injected_test_argvs = nullptr;
+}
+#endif // GTEST_HAS_DEATH_TEST
+
+#if GTEST_OS_WINDOWS_MOBILE
+namespace posix {
+void Abort() {
+ DebugBreak();
+ TerminateProcess(GetCurrentProcess(), 1);
+}
+} // namespace posix
+#endif // GTEST_OS_WINDOWS_MOBILE
+
+// Returns the name of the environment variable corresponding to the
+// given flag. For example, FlagToEnvVar("foo") will return
+// "GTEST_FOO" in the open-source version.
+static std::string FlagToEnvVar(const char* flag) {
+ const std::string full_flag =
+ (Message() << GTEST_FLAG_PREFIX_ << flag).GetString();
+
+ Message env_var;
+ for (size_t i = 0; i != full_flag.length(); i++) {
+ env_var << ToUpper(full_flag.c_str()[i]);
+ }
+
+ return env_var.GetString();
+}
+
+// Parses 'str' for a 32-bit signed integer. If successful, writes
+// the result to *value and returns true; otherwise leaves *value
+// unchanged and returns false.
+bool ParseInt32(const Message& src_text, const char* str, int32_t* value) {
+ // Parses the environment variable as a decimal integer.
+ char* end = nullptr;
+ const long long_value = strtol(str, &end, 10); // NOLINT
+
+ // Has strtol() consumed all characters in the string?
+ if (*end != '\0') {
+ // No - an invalid character was encountered.
+ Message msg;
+ msg << "WARNING: " << src_text
+ << " is expected to be a 32-bit integer, but actually"
+ << " has value \"" << str << "\".\n";
+ printf("%s", msg.GetString().c_str());
+ fflush(stdout);
+ return false;
+ }
+
+ // Is the parsed value in the range of an int32_t?
+ const auto result = static_cast<int32_t>(long_value);
+ if (long_value == LONG_MAX || long_value == LONG_MIN ||
+ // The parsed value overflows as a long. (strtol() returns
+ // LONG_MAX or LONG_MIN when the input overflows.)
+ result != long_value
+ // The parsed value overflows as an int32_t.
+ ) {
+ Message msg;
+ msg << "WARNING: " << src_text
+ << " is expected to be a 32-bit integer, but actually"
+ << " has value " << str << ", which overflows.\n";
+ printf("%s", msg.GetString().c_str());
+ fflush(stdout);
+ return false;
+ }
+
+ *value = result;
+ return true;
+}
+
+// Reads and returns the Boolean environment variable corresponding to
+// the given flag; if it's not set, returns default_value.
+//
+// The value is considered true if and only if it's not "0".
+bool BoolFromGTestEnv(const char* flag, bool default_value) {
+#if defined(GTEST_GET_BOOL_FROM_ENV_)
+ return GTEST_GET_BOOL_FROM_ENV_(flag, default_value);
+#else
+ const std::string env_var = FlagToEnvVar(flag);
+ const char* const string_value = posix::GetEnv(env_var.c_str());
+ return string_value == nullptr ? default_value
+ : strcmp(string_value, "0") != 0;
+#endif // defined(GTEST_GET_BOOL_FROM_ENV_)
+}
+
+// Reads and returns a 32-bit integer stored in the environment
+// variable corresponding to the given flag; if it isn't set or
+// doesn't represent a valid 32-bit integer, returns default_value.
+int32_t Int32FromGTestEnv(const char* flag, int32_t default_value) {
+#if defined(GTEST_GET_INT32_FROM_ENV_)
+ return GTEST_GET_INT32_FROM_ENV_(flag, default_value);
+#else
+ const std::string env_var = FlagToEnvVar(flag);
+ const char* const string_value = posix::GetEnv(env_var.c_str());
+ if (string_value == nullptr) {
+ // The environment variable is not set.
+ return default_value;
+ }
+
+ int32_t result = default_value;
+ if (!ParseInt32(Message() << "Environment variable " << env_var, string_value,
+ &result)) {
+ printf("The default value %s is used.\n",
+ (Message() << default_value).GetString().c_str());
+ fflush(stdout);
+ return default_value;
+ }
+
+ return result;
+#endif // defined(GTEST_GET_INT32_FROM_ENV_)
+}
+
+// As a special case for the 'output' flag, if GTEST_OUTPUT is not
+// set, we look for XML_OUTPUT_FILE, which is set by the Bazel build
+// system. The value of XML_OUTPUT_FILE is a filename without the
+// "xml:" prefix of GTEST_OUTPUT.
+// Note that this is meant to be called at the call site so it does
+// not check that the flag is 'output'
+// In essence this checks an env variable called XML_OUTPUT_FILE
+// and if it is set we prepend "xml:" to its value, if it not set we return ""
+std::string OutputFlagAlsoCheckEnvVar() {
+ std::string default_value_for_output_flag = "";
+ const char* xml_output_file_env = posix::GetEnv("XML_OUTPUT_FILE");
+ if (nullptr != xml_output_file_env) {
+ default_value_for_output_flag = std::string("xml:") + xml_output_file_env;
+ }
+ return default_value_for_output_flag;
+}
+
+// Reads and returns the string environment variable corresponding to
+// the given flag; if it's not set, returns default_value.
+const char* StringFromGTestEnv(const char* flag, const char* default_value) {
+#if defined(GTEST_GET_STRING_FROM_ENV_)
+ return GTEST_GET_STRING_FROM_ENV_(flag, default_value);
+#else
+ const std::string env_var = FlagToEnvVar(flag);
+ const char* const value = posix::GetEnv(env_var.c_str());
+ return value == nullptr ? default_value : value;
+#endif // defined(GTEST_GET_STRING_FROM_ENV_)
+}
+
+} // namespace internal
+} // namespace testing
diff --git a/media/libvpx/libvpx/third_party/googletest/src/src/gtest-printers.cc b/media/libvpx/libvpx/third_party/googletest/src/src/gtest-printers.cc
new file mode 100644
index 0000000000..f3976d230d
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/googletest/src/src/gtest-printers.cc
@@ -0,0 +1,553 @@
+// Copyright 2007, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Google Test - The Google C++ Testing and Mocking Framework
+//
+// This file implements a universal value printer that can print a
+// value of any type T:
+//
+// void ::testing::internal::UniversalPrinter<T>::Print(value, ostream_ptr);
+//
+// It uses the << operator when possible, and prints the bytes in the
+// object otherwise. A user can override its behavior for a class
+// type Foo by defining either operator<<(::std::ostream&, const Foo&)
+// or void PrintTo(const Foo&, ::std::ostream*) in the namespace that
+// defines Foo.
+
+#include "gtest/gtest-printers.h"
+
+#include <stdio.h>
+
+#include <cctype>
+#include <cstdint>
+#include <cwchar>
+#include <ostream> // NOLINT
+#include <string>
+#include <type_traits>
+
+#include "gtest/internal/gtest-port.h"
+#include "src/gtest-internal-inl.h"
+
+namespace testing {
+
+namespace {
+
+using ::std::ostream;
+
+// Prints a segment of bytes in the given object.
+GTEST_ATTRIBUTE_NO_SANITIZE_MEMORY_
+GTEST_ATTRIBUTE_NO_SANITIZE_ADDRESS_
+GTEST_ATTRIBUTE_NO_SANITIZE_HWADDRESS_
+GTEST_ATTRIBUTE_NO_SANITIZE_THREAD_
+void PrintByteSegmentInObjectTo(const unsigned char* obj_bytes, size_t start,
+ size_t count, ostream* os) {
+ char text[5] = "";
+ for (size_t i = 0; i != count; i++) {
+ const size_t j = start + i;
+ if (i != 0) {
+ // Organizes the bytes into groups of 2 for easy parsing by
+ // human.
+ if ((j % 2) == 0)
+ *os << ' ';
+ else
+ *os << '-';
+ }
+ GTEST_SNPRINTF_(text, sizeof(text), "%02X", obj_bytes[j]);
+ *os << text;
+ }
+}
+
+// Prints the bytes in the given value to the given ostream.
+void PrintBytesInObjectToImpl(const unsigned char* obj_bytes, size_t count,
+ ostream* os) {
+ // Tells the user how big the object is.
+ *os << count << "-byte object <";
+
+ const size_t kThreshold = 132;
+ const size_t kChunkSize = 64;
+ // If the object size is bigger than kThreshold, we'll have to omit
+ // some details by printing only the first and the last kChunkSize
+ // bytes.
+ if (count < kThreshold) {
+ PrintByteSegmentInObjectTo(obj_bytes, 0, count, os);
+ } else {
+ PrintByteSegmentInObjectTo(obj_bytes, 0, kChunkSize, os);
+ *os << " ... ";
+ // Rounds up to 2-byte boundary.
+ const size_t resume_pos = (count - kChunkSize + 1) / 2 * 2;
+ PrintByteSegmentInObjectTo(obj_bytes, resume_pos, count - resume_pos, os);
+ }
+ *os << ">";
+}
+
+// Helpers for widening a character to char32_t. Since the standard does not
+// specify if char / wchar_t is signed or unsigned, it is important to first
+// convert it to the unsigned type of the same width before widening it to
+// char32_t.
+template <typename CharType>
+char32_t ToChar32(CharType in) {
+ return static_cast<char32_t>(
+ static_cast<typename std::make_unsigned<CharType>::type>(in));
+}
+
+} // namespace
+
+namespace internal {
+
+// Delegates to PrintBytesInObjectToImpl() to print the bytes in the
+// given object. The delegation simplifies the implementation, which
+// uses the << operator and thus is easier done outside of the
+// ::testing::internal namespace, which contains a << operator that
+// sometimes conflicts with the one in STL.
+void PrintBytesInObjectTo(const unsigned char* obj_bytes, size_t count,
+ ostream* os) {
+ PrintBytesInObjectToImpl(obj_bytes, count, os);
+}
+
+// Depending on the value of a char (or wchar_t), we print it in one
+// of three formats:
+// - as is if it's a printable ASCII (e.g. 'a', '2', ' '),
+// - as a hexadecimal escape sequence (e.g. '\x7F'), or
+// - as a special escape sequence (e.g. '\r', '\n').
+enum CharFormat { kAsIs, kHexEscape, kSpecialEscape };
+
+// Returns true if c is a printable ASCII character. We test the
+// value of c directly instead of calling isprint(), which is buggy on
+// Windows Mobile.
+inline bool IsPrintableAscii(char32_t c) { return 0x20 <= c && c <= 0x7E; }
+
+// Prints c (of type char, char8_t, char16_t, char32_t, or wchar_t) as a
+// character literal without the quotes, escaping it when necessary; returns how
+// c was formatted.
+template <typename Char>
+static CharFormat PrintAsCharLiteralTo(Char c, ostream* os) {
+ const char32_t u_c = ToChar32(c);
+ switch (u_c) {
+ case L'\0':
+ *os << "\\0";
+ break;
+ case L'\'':
+ *os << "\\'";
+ break;
+ case L'\\':
+ *os << "\\\\";
+ break;
+ case L'\a':
+ *os << "\\a";
+ break;
+ case L'\b':
+ *os << "\\b";
+ break;
+ case L'\f':
+ *os << "\\f";
+ break;
+ case L'\n':
+ *os << "\\n";
+ break;
+ case L'\r':
+ *os << "\\r";
+ break;
+ case L'\t':
+ *os << "\\t";
+ break;
+ case L'\v':
+ *os << "\\v";
+ break;
+ default:
+ if (IsPrintableAscii(u_c)) {
+ *os << static_cast<char>(c);
+ return kAsIs;
+ } else {
+ ostream::fmtflags flags = os->flags();
+ *os << "\\x" << std::hex << std::uppercase << static_cast<int>(u_c);
+ os->flags(flags);
+ return kHexEscape;
+ }
+ }
+ return kSpecialEscape;
+}
+
+// Prints a char32_t c as if it's part of a string literal, escaping it when
+// necessary; returns how c was formatted.
+static CharFormat PrintAsStringLiteralTo(char32_t c, ostream* os) {
+ switch (c) {
+ case L'\'':
+ *os << "'";
+ return kAsIs;
+ case L'"':
+ *os << "\\\"";
+ return kSpecialEscape;
+ default:
+ return PrintAsCharLiteralTo(c, os);
+ }
+}
+
+static const char* GetCharWidthPrefix(char) { return ""; }
+
+static const char* GetCharWidthPrefix(signed char) { return ""; }
+
+static const char* GetCharWidthPrefix(unsigned char) { return ""; }
+
+#ifdef __cpp_char8_t
+static const char* GetCharWidthPrefix(char8_t) { return "u8"; }
+#endif
+
+static const char* GetCharWidthPrefix(char16_t) { return "u"; }
+
+static const char* GetCharWidthPrefix(char32_t) { return "U"; }
+
+static const char* GetCharWidthPrefix(wchar_t) { return "L"; }
+
+// Prints a char c as if it's part of a string literal, escaping it when
+// necessary; returns how c was formatted.
+static CharFormat PrintAsStringLiteralTo(char c, ostream* os) {
+ return PrintAsStringLiteralTo(ToChar32(c), os);
+}
+
+#ifdef __cpp_char8_t
+static CharFormat PrintAsStringLiteralTo(char8_t c, ostream* os) {
+ return PrintAsStringLiteralTo(ToChar32(c), os);
+}
+#endif
+
+static CharFormat PrintAsStringLiteralTo(char16_t c, ostream* os) {
+ return PrintAsStringLiteralTo(ToChar32(c), os);
+}
+
+static CharFormat PrintAsStringLiteralTo(wchar_t c, ostream* os) {
+ return PrintAsStringLiteralTo(ToChar32(c), os);
+}
+
+// Prints a character c (of type char, char8_t, char16_t, char32_t, or wchar_t)
+// and its code. '\0' is printed as "'\\0'", other unprintable characters are
+// also properly escaped using the standard C++ escape sequence.
+template <typename Char>
+void PrintCharAndCodeTo(Char c, ostream* os) {
+ // First, print c as a literal in the most readable form we can find.
+ *os << GetCharWidthPrefix(c) << "'";
+ const CharFormat format = PrintAsCharLiteralTo(c, os);
+ *os << "'";
+
+ // To aid user debugging, we also print c's code in decimal, unless
+ // it's 0 (in which case c was printed as '\\0', making the code
+ // obvious).
+ if (c == 0) return;
+ *os << " (" << static_cast<int>(c);
+
+ // For more convenience, we print c's code again in hexadecimal,
+ // unless c was already printed in the form '\x##' or the code is in
+ // [1, 9].
+ if (format == kHexEscape || (1 <= c && c <= 9)) {
+ // Do nothing.
+ } else {
+ *os << ", 0x" << String::FormatHexInt(static_cast<int>(c));
+ }
+ *os << ")";
+}
+
+void PrintTo(unsigned char c, ::std::ostream* os) { PrintCharAndCodeTo(c, os); }
+void PrintTo(signed char c, ::std::ostream* os) { PrintCharAndCodeTo(c, os); }
+
+// Prints a wchar_t as a symbol if it is printable or as its internal
+// code otherwise and also as its code. L'\0' is printed as "L'\\0'".
+void PrintTo(wchar_t wc, ostream* os) { PrintCharAndCodeTo(wc, os); }
+
+// TODO(dcheng): Consider making this delegate to PrintCharAndCodeTo() as well.
+void PrintTo(char32_t c, ::std::ostream* os) {
+ *os << std::hex << "U+" << std::uppercase << std::setfill('0') << std::setw(4)
+ << static_cast<uint32_t>(c);
+}
+
+// gcc/clang __{u,}int128_t
+#if defined(__SIZEOF_INT128__)
+void PrintTo(__uint128_t v, ::std::ostream* os) {
+ if (v == 0) {
+ *os << "0";
+ return;
+ }
+
+ // Buffer large enough for ceil(log10(2^128))==39 and the null terminator
+ char buf[40];
+ char* p = buf + sizeof(buf);
+
+ // Some configurations have a __uint128_t, but no support for built in
+ // division. Do manual long division instead.
+
+ uint64_t high = static_cast<uint64_t>(v >> 64);
+ uint64_t low = static_cast<uint64_t>(v);
+
+ *--p = 0;
+ while (high != 0 || low != 0) {
+ uint64_t high_mod = high % 10;
+ high = high / 10;
+ // This is the long division algorithm specialized for a divisor of 10 and
+ // only two elements.
+ // Notable values:
+ // 2^64 / 10 == 1844674407370955161
+ // 2^64 % 10 == 6
+ const uint64_t carry = 6 * high_mod + low % 10;
+ low = low / 10 + high_mod * 1844674407370955161 + carry / 10;
+
+ char digit = static_cast<char>(carry % 10);
+ *--p = '0' + digit;
+ }
+ *os << p;
+}
+void PrintTo(__int128_t v, ::std::ostream* os) {
+ __uint128_t uv = static_cast<__uint128_t>(v);
+ if (v < 0) {
+ *os << "-";
+ uv = -uv;
+ }
+ PrintTo(uv, os);
+}
+#endif // __SIZEOF_INT128__
+
+// Prints the given array of characters to the ostream. CharType must be either
+// char, char8_t, char16_t, char32_t, or wchar_t.
+// The array starts at begin, the length is len, it may include '\0' characters
+// and may not be NUL-terminated.
+template <typename CharType>
+GTEST_ATTRIBUTE_NO_SANITIZE_MEMORY_ GTEST_ATTRIBUTE_NO_SANITIZE_ADDRESS_
+ GTEST_ATTRIBUTE_NO_SANITIZE_HWADDRESS_
+ GTEST_ATTRIBUTE_NO_SANITIZE_THREAD_ static CharFormat
+ PrintCharsAsStringTo(const CharType* begin, size_t len, ostream* os) {
+ const char* const quote_prefix = GetCharWidthPrefix(*begin);
+ *os << quote_prefix << "\"";
+ bool is_previous_hex = false;
+ CharFormat print_format = kAsIs;
+ for (size_t index = 0; index < len; ++index) {
+ const CharType cur = begin[index];
+ if (is_previous_hex && IsXDigit(cur)) {
+ // Previous character is of '\x..' form and this character can be
+ // interpreted as another hexadecimal digit in its number. Break string to
+ // disambiguate.
+ *os << "\" " << quote_prefix << "\"";
+ }
+ is_previous_hex = PrintAsStringLiteralTo(cur, os) == kHexEscape;
+ // Remember if any characters required hex escaping.
+ if (is_previous_hex) {
+ print_format = kHexEscape;
+ }
+ }
+ *os << "\"";
+ return print_format;
+}
+
+// Prints a (const) char/wchar_t array of 'len' elements, starting at address
+// 'begin'. CharType must be either char or wchar_t.
+template <typename CharType>
+GTEST_ATTRIBUTE_NO_SANITIZE_MEMORY_ GTEST_ATTRIBUTE_NO_SANITIZE_ADDRESS_
+ GTEST_ATTRIBUTE_NO_SANITIZE_HWADDRESS_
+ GTEST_ATTRIBUTE_NO_SANITIZE_THREAD_ static void
+ UniversalPrintCharArray(const CharType* begin, size_t len,
+ ostream* os) {
+ // The code
+ // const char kFoo[] = "foo";
+ // generates an array of 4, not 3, elements, with the last one being '\0'.
+ //
+ // Therefore when printing a char array, we don't print the last element if
+ // it's '\0', such that the output matches the string literal as it's
+ // written in the source code.
+ if (len > 0 && begin[len - 1] == '\0') {
+ PrintCharsAsStringTo(begin, len - 1, os);
+ return;
+ }
+
+ // If, however, the last element in the array is not '\0', e.g.
+ // const char kFoo[] = { 'f', 'o', 'o' };
+ // we must print the entire array. We also print a message to indicate
+ // that the array is not NUL-terminated.
+ PrintCharsAsStringTo(begin, len, os);
+ *os << " (no terminating NUL)";
+}
+
+// Prints a (const) char array of 'len' elements, starting at address 'begin'.
+void UniversalPrintArray(const char* begin, size_t len, ostream* os) {
+ UniversalPrintCharArray(begin, len, os);
+}
+
+#ifdef __cpp_char8_t
+// Prints a (const) char8_t array of 'len' elements, starting at address
+// 'begin'.
+void UniversalPrintArray(const char8_t* begin, size_t len, ostream* os) {
+ UniversalPrintCharArray(begin, len, os);
+}
+#endif
+
+// Prints a (const) char16_t array of 'len' elements, starting at address
+// 'begin'.
+void UniversalPrintArray(const char16_t* begin, size_t len, ostream* os) {
+ UniversalPrintCharArray(begin, len, os);
+}
+
+// Prints a (const) char32_t array of 'len' elements, starting at address
+// 'begin'.
+void UniversalPrintArray(const char32_t* begin, size_t len, ostream* os) {
+ UniversalPrintCharArray(begin, len, os);
+}
+
+// Prints a (const) wchar_t array of 'len' elements, starting at address
+// 'begin'.
+void UniversalPrintArray(const wchar_t* begin, size_t len, ostream* os) {
+ UniversalPrintCharArray(begin, len, os);
+}
+
+namespace {
+
+// Prints a null-terminated C-style string to the ostream.
+template <typename Char>
+void PrintCStringTo(const Char* s, ostream* os) {
+ if (s == nullptr) {
+ *os << "NULL";
+ } else {
+ *os << ImplicitCast_<const void*>(s) << " pointing to ";
+ PrintCharsAsStringTo(s, std::char_traits<Char>::length(s), os);
+ }
+}
+
+} // anonymous namespace
+
+void PrintTo(const char* s, ostream* os) { PrintCStringTo(s, os); }
+
+#ifdef __cpp_char8_t
+void PrintTo(const char8_t* s, ostream* os) { PrintCStringTo(s, os); }
+#endif
+
+void PrintTo(const char16_t* s, ostream* os) { PrintCStringTo(s, os); }
+
+void PrintTo(const char32_t* s, ostream* os) { PrintCStringTo(s, os); }
+
+// MSVC compiler can be configured to define whar_t as a typedef
+// of unsigned short. Defining an overload for const wchar_t* in that case
+// would cause pointers to unsigned shorts be printed as wide strings,
+// possibly accessing more memory than intended and causing invalid
+// memory accesses. MSVC defines _NATIVE_WCHAR_T_DEFINED symbol when
+// wchar_t is implemented as a native type.
+#if !defined(_MSC_VER) || defined(_NATIVE_WCHAR_T_DEFINED)
+// Prints the given wide C string to the ostream.
+void PrintTo(const wchar_t* s, ostream* os) { PrintCStringTo(s, os); }
+#endif // wchar_t is native
+
+namespace {
+
+bool ContainsUnprintableControlCodes(const char* str, size_t length) {
+ const unsigned char* s = reinterpret_cast<const unsigned char*>(str);
+
+ for (size_t i = 0; i < length; i++) {
+ unsigned char ch = *s++;
+ if (std::iscntrl(ch)) {
+ switch (ch) {
+ case '\t':
+ case '\n':
+ case '\r':
+ break;
+ default:
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+bool IsUTF8TrailByte(unsigned char t) { return 0x80 <= t && t <= 0xbf; }
+
+bool IsValidUTF8(const char* str, size_t length) {
+ const unsigned char* s = reinterpret_cast<const unsigned char*>(str);
+
+ for (size_t i = 0; i < length;) {
+ unsigned char lead = s[i++];
+
+ if (lead <= 0x7f) {
+ continue; // single-byte character (ASCII) 0..7F
+ }
+ if (lead < 0xc2) {
+ return false; // trail byte or non-shortest form
+ } else if (lead <= 0xdf && (i + 1) <= length && IsUTF8TrailByte(s[i])) {
+ ++i; // 2-byte character
+ } else if (0xe0 <= lead && lead <= 0xef && (i + 2) <= length &&
+ IsUTF8TrailByte(s[i]) && IsUTF8TrailByte(s[i + 1]) &&
+ // check for non-shortest form and surrogate
+ (lead != 0xe0 || s[i] >= 0xa0) &&
+ (lead != 0xed || s[i] < 0xa0)) {
+ i += 2; // 3-byte character
+ } else if (0xf0 <= lead && lead <= 0xf4 && (i + 3) <= length &&
+ IsUTF8TrailByte(s[i]) && IsUTF8TrailByte(s[i + 1]) &&
+ IsUTF8TrailByte(s[i + 2]) &&
+ // check for non-shortest form
+ (lead != 0xf0 || s[i] >= 0x90) &&
+ (lead != 0xf4 || s[i] < 0x90)) {
+ i += 3; // 4-byte character
+ } else {
+ return false;
+ }
+ }
+ return true;
+}
+
+void ConditionalPrintAsText(const char* str, size_t length, ostream* os) {
+ if (!ContainsUnprintableControlCodes(str, length) &&
+ IsValidUTF8(str, length)) {
+ *os << "\n As Text: \"" << str << "\"";
+ }
+}
+
+} // anonymous namespace
+
+void PrintStringTo(const ::std::string& s, ostream* os) {
+ if (PrintCharsAsStringTo(s.data(), s.size(), os) == kHexEscape) {
+ if (GTEST_FLAG_GET(print_utf8)) {
+ ConditionalPrintAsText(s.data(), s.size(), os);
+ }
+ }
+}
+
+#ifdef __cpp_char8_t
+void PrintU8StringTo(const ::std::u8string& s, ostream* os) {
+ PrintCharsAsStringTo(s.data(), s.size(), os);
+}
+#endif
+
+void PrintU16StringTo(const ::std::u16string& s, ostream* os) {
+ PrintCharsAsStringTo(s.data(), s.size(), os);
+}
+
+void PrintU32StringTo(const ::std::u32string& s, ostream* os) {
+ PrintCharsAsStringTo(s.data(), s.size(), os);
+}
+
+#if GTEST_HAS_STD_WSTRING
+void PrintWideStringTo(const ::std::wstring& s, ostream* os) {
+ PrintCharsAsStringTo(s.data(), s.size(), os);
+}
+#endif // GTEST_HAS_STD_WSTRING
+
+} // namespace internal
+
+} // namespace testing
diff --git a/media/libvpx/libvpx/third_party/googletest/src/src/gtest-test-part.cc b/media/libvpx/libvpx/third_party/googletest/src/src/gtest-test-part.cc
new file mode 100644
index 0000000000..eb7c8d1cf9
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/googletest/src/src/gtest-test-part.cc
@@ -0,0 +1,105 @@
+// Copyright 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+//
+// The Google C++ Testing and Mocking Framework (Google Test)
+
+#include "gtest/gtest-test-part.h"
+
+#include "gtest/internal/gtest-port.h"
+#include "src/gtest-internal-inl.h"
+
+namespace testing {
+
+using internal::GetUnitTestImpl;
+
+// Gets the summary of the failure message by omitting the stack trace
+// in it.
+std::string TestPartResult::ExtractSummary(const char* message) {
+ const char* const stack_trace = strstr(message, internal::kStackTraceMarker);
+ return stack_trace == nullptr ? message : std::string(message, stack_trace);
+}
+
+// Prints a TestPartResult object.
+std::ostream& operator<<(std::ostream& os, const TestPartResult& result) {
+ return os << internal::FormatFileLocation(result.file_name(),
+ result.line_number())
+ << " "
+ << (result.type() == TestPartResult::kSuccess ? "Success"
+ : result.type() == TestPartResult::kSkip ? "Skipped"
+ : result.type() == TestPartResult::kFatalFailure
+ ? "Fatal failure"
+ : "Non-fatal failure")
+ << ":\n"
+ << result.message() << std::endl;
+}
+
+// Appends a TestPartResult to the array.
+void TestPartResultArray::Append(const TestPartResult& result) {
+ array_.push_back(result);
+}
+
+// Returns the TestPartResult at the given index (0-based).
+const TestPartResult& TestPartResultArray::GetTestPartResult(int index) const {
+ if (index < 0 || index >= size()) {
+ printf("\nInvalid index (%d) into TestPartResultArray.\n", index);
+ internal::posix::Abort();
+ }
+
+ return array_[static_cast<size_t>(index)];
+}
+
+// Returns the number of TestPartResult objects in the array.
+int TestPartResultArray::size() const {
+ return static_cast<int>(array_.size());
+}
+
+namespace internal {
+
+HasNewFatalFailureHelper::HasNewFatalFailureHelper()
+ : has_new_fatal_failure_(false),
+ original_reporter_(
+ GetUnitTestImpl()->GetTestPartResultReporterForCurrentThread()) {
+ GetUnitTestImpl()->SetTestPartResultReporterForCurrentThread(this);
+}
+
+HasNewFatalFailureHelper::~HasNewFatalFailureHelper() {
+ GetUnitTestImpl()->SetTestPartResultReporterForCurrentThread(
+ original_reporter_);
+}
+
+void HasNewFatalFailureHelper::ReportTestPartResult(
+ const TestPartResult& result) {
+ if (result.fatally_failed()) has_new_fatal_failure_ = true;
+ original_reporter_->ReportTestPartResult(result);
+}
+
+} // namespace internal
+
+} // namespace testing
diff --git a/media/libvpx/libvpx/third_party/googletest/src/src/gtest-typed-test.cc b/media/libvpx/libvpx/third_party/googletest/src/src/gtest-typed-test.cc
new file mode 100644
index 0000000000..a2828b83c6
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/googletest/src/src/gtest-typed-test.cc
@@ -0,0 +1,104 @@
+// Copyright 2008 Google Inc.
+// All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "gtest/gtest-typed-test.h"
+
+#include "gtest/gtest.h"
+
+namespace testing {
+namespace internal {
+
+// Skips to the first non-space char in str. Returns an empty string if str
+// contains only whitespace characters.
+static const char* SkipSpaces(const char* str) {
+ while (IsSpace(*str)) str++;
+ return str;
+}
+
+static std::vector<std::string> SplitIntoTestNames(const char* src) {
+ std::vector<std::string> name_vec;
+ src = SkipSpaces(src);
+ for (; src != nullptr; src = SkipComma(src)) {
+ name_vec.push_back(StripTrailingSpaces(GetPrefixUntilComma(src)));
+ }
+ return name_vec;
+}
+
+// Verifies that registered_tests match the test names in
+// registered_tests_; returns registered_tests if successful, or
+// aborts the program otherwise.
+const char* TypedTestSuitePState::VerifyRegisteredTestNames(
+ const char* test_suite_name, const char* file, int line,
+ const char* registered_tests) {
+ RegisterTypeParameterizedTestSuite(test_suite_name, CodeLocation(file, line));
+
+ typedef RegisteredTestsMap::const_iterator RegisteredTestIter;
+ registered_ = true;
+
+ std::vector<std::string> name_vec = SplitIntoTestNames(registered_tests);
+
+ Message errors;
+
+ std::set<std::string> tests;
+ for (std::vector<std::string>::const_iterator name_it = name_vec.begin();
+ name_it != name_vec.end(); ++name_it) {
+ const std::string& name = *name_it;
+ if (tests.count(name) != 0) {
+ errors << "Test " << name << " is listed more than once.\n";
+ continue;
+ }
+
+ if (registered_tests_.count(name) != 0) {
+ tests.insert(name);
+ } else {
+ errors << "No test named " << name
+ << " can be found in this test suite.\n";
+ }
+ }
+
+ for (RegisteredTestIter it = registered_tests_.begin();
+ it != registered_tests_.end(); ++it) {
+ if (tests.count(it->first) == 0) {
+ errors << "You forgot to list test " << it->first << ".\n";
+ }
+ }
+
+ const std::string& errors_str = errors.GetString();
+ if (errors_str != "") {
+ fprintf(stderr, "%s %s", FormatFileLocation(file, line).c_str(),
+ errors_str.c_str());
+ fflush(stderr);
+ posix::Abort();
+ }
+
+ return registered_tests;
+}
+
+} // namespace internal
+} // namespace testing
diff --git a/media/libvpx/libvpx/third_party/googletest/src/src/gtest.cc b/media/libvpx/libvpx/third_party/googletest/src/src/gtest.cc
new file mode 100644
index 0000000000..6f31dd2260
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/googletest/src/src/gtest.cc
@@ -0,0 +1,6795 @@
+// Copyright 2005, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+//
+// The Google C++ Testing and Mocking Framework (Google Test)
+
+#include "gtest/gtest.h"
+
+#include <ctype.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <wchar.h>
+#include <wctype.h>
+
+#include <algorithm>
+#include <chrono> // NOLINT
+#include <cmath>
+#include <cstdint>
+#include <initializer_list>
+#include <iomanip>
+#include <iterator>
+#include <limits>
+#include <list>
+#include <map>
+#include <ostream> // NOLINT
+#include <sstream>
+#include <unordered_set>
+#include <vector>
+
+#include "gtest/gtest-assertion-result.h"
+#include "gtest/gtest-spi.h"
+#include "gtest/internal/custom/gtest.h"
+
+#if GTEST_OS_LINUX
+
+#include <fcntl.h> // NOLINT
+#include <limits.h> // NOLINT
+#include <sched.h> // NOLINT
+// Declares vsnprintf(). This header is not available on Windows.
+#include <strings.h> // NOLINT
+#include <sys/mman.h> // NOLINT
+#include <sys/time.h> // NOLINT
+#include <unistd.h> // NOLINT
+
+#include <string>
+
+#elif GTEST_OS_ZOS
+#include <sys/time.h> // NOLINT
+
+// On z/OS we additionally need strings.h for strcasecmp.
+#include <strings.h> // NOLINT
+
+#elif GTEST_OS_WINDOWS_MOBILE // We are on Windows CE.
+
+#include <windows.h> // NOLINT
+#undef min
+
+#elif GTEST_OS_WINDOWS // We are on Windows proper.
+
+#include <windows.h> // NOLINT
+#undef min
+
+#ifdef _MSC_VER
+#include <crtdbg.h> // NOLINT
+#endif
+
+#include <io.h> // NOLINT
+#include <sys/stat.h> // NOLINT
+#include <sys/timeb.h> // NOLINT
+#include <sys/types.h> // NOLINT
+
+#if GTEST_OS_WINDOWS_MINGW
+#include <sys/time.h> // NOLINT
+#endif // GTEST_OS_WINDOWS_MINGW
+
+#else
+
+// cpplint thinks that the header is already included, so we want to
+// silence it.
+#include <sys/time.h> // NOLINT
+#include <unistd.h> // NOLINT
+
+#endif // GTEST_OS_LINUX
+
+#if GTEST_HAS_EXCEPTIONS
+#include <stdexcept>
+#endif
+
+#if GTEST_CAN_STREAM_RESULTS_
+#include <arpa/inet.h> // NOLINT
+#include <netdb.h> // NOLINT
+#include <sys/socket.h> // NOLINT
+#include <sys/types.h> // NOLINT
+#endif
+
+#include "src/gtest-internal-inl.h"
+
+#if GTEST_OS_WINDOWS
+#define vsnprintf _vsnprintf
+#endif // GTEST_OS_WINDOWS
+
+#if GTEST_OS_MAC
+#ifndef GTEST_OS_IOS
+#include <crt_externs.h>
+#endif
+#endif
+
+#if GTEST_HAS_ABSL
+#include "absl/debugging/failure_signal_handler.h"
+#include "absl/debugging/stacktrace.h"
+#include "absl/debugging/symbolize.h"
+#include "absl/flags/parse.h"
+#include "absl/flags/usage.h"
+#include "absl/strings/str_cat.h"
+#include "absl/strings/str_replace.h"
+#endif // GTEST_HAS_ABSL
+
+namespace testing {
+
+using internal::CountIf;
+using internal::ForEach;
+using internal::GetElementOr;
+using internal::Shuffle;
+
+// Constants.
+
+// A test whose test suite name or test name matches this filter is
+// disabled and not run.
+static const char kDisableTestFilter[] = "DISABLED_*:*/DISABLED_*";
+
+// A test suite whose name matches this filter is considered a death
+// test suite and will be run before test suites whose name doesn't
+// match this filter.
+static const char kDeathTestSuiteFilter[] = "*DeathTest:*DeathTest/*";
+
+// A test filter that matches everything.
+static const char kUniversalFilter[] = "*";
+
+// The default output format.
+static const char kDefaultOutputFormat[] = "xml";
+// The default output file.
+static const char kDefaultOutputFile[] = "test_detail";
+
+// The environment variable name for the test shard index.
+static const char kTestShardIndex[] = "GTEST_SHARD_INDEX";
+// The environment variable name for the total number of test shards.
+static const char kTestTotalShards[] = "GTEST_TOTAL_SHARDS";
+// The environment variable name for the test shard status file.
+static const char kTestShardStatusFile[] = "GTEST_SHARD_STATUS_FILE";
+
+namespace internal {
+
+// The text used in failure messages to indicate the start of the
+// stack trace.
+const char kStackTraceMarker[] = "\nStack trace:\n";
+
+// g_help_flag is true if and only if the --help flag or an equivalent form
+// is specified on the command line.
+bool g_help_flag = false;
+
+// Utility function to Open File for Writing
+static FILE* OpenFileForWriting(const std::string& output_file) {
+ FILE* fileout = nullptr;
+ FilePath output_file_path(output_file);
+ FilePath output_dir(output_file_path.RemoveFileName());
+
+ if (output_dir.CreateDirectoriesRecursively()) {
+ fileout = posix::FOpen(output_file.c_str(), "w");
+ }
+ if (fileout == nullptr) {
+ GTEST_LOG_(FATAL) << "Unable to open file \"" << output_file << "\"";
+ }
+ return fileout;
+}
+
+} // namespace internal
+
+// Bazel passes in the argument to '--test_filter' via the TESTBRIDGE_TEST_ONLY
+// environment variable.
+static const char* GetDefaultFilter() {
+ const char* const testbridge_test_only =
+ internal::posix::GetEnv("TESTBRIDGE_TEST_ONLY");
+ if (testbridge_test_only != nullptr) {
+ return testbridge_test_only;
+ }
+ return kUniversalFilter;
+}
+
+// Bazel passes in the argument to '--test_runner_fail_fast' via the
+// TESTBRIDGE_TEST_RUNNER_FAIL_FAST environment variable.
+static bool GetDefaultFailFast() {
+ const char* const testbridge_test_runner_fail_fast =
+ internal::posix::GetEnv("TESTBRIDGE_TEST_RUNNER_FAIL_FAST");
+ if (testbridge_test_runner_fail_fast != nullptr) {
+ return strcmp(testbridge_test_runner_fail_fast, "1") == 0;
+ }
+ return false;
+}
+
+} // namespace testing
+
+GTEST_DEFINE_bool_(
+ fail_fast,
+ testing::internal::BoolFromGTestEnv("fail_fast",
+ testing::GetDefaultFailFast()),
+ "True if and only if a test failure should stop further test execution.");
+
+GTEST_DEFINE_bool_(
+ also_run_disabled_tests,
+ testing::internal::BoolFromGTestEnv("also_run_disabled_tests", false),
+ "Run disabled tests too, in addition to the tests normally being run.");
+
+GTEST_DEFINE_bool_(
+ break_on_failure,
+ testing::internal::BoolFromGTestEnv("break_on_failure", false),
+ "True if and only if a failed assertion should be a debugger "
+ "break-point.");
+
+GTEST_DEFINE_bool_(catch_exceptions,
+ testing::internal::BoolFromGTestEnv("catch_exceptions",
+ true),
+ "True if and only if " GTEST_NAME_
+ " should catch exceptions and treat them as test failures.");
+
+GTEST_DEFINE_string_(
+ color, testing::internal::StringFromGTestEnv("color", "auto"),
+ "Whether to use colors in the output. Valid values: yes, no, "
+ "and auto. 'auto' means to use colors if the output is "
+ "being sent to a terminal and the TERM environment variable "
+ "is set to a terminal type that supports colors.");
+
+GTEST_DEFINE_string_(
+ filter,
+ testing::internal::StringFromGTestEnv("filter",
+ testing::GetDefaultFilter()),
+ "A colon-separated list of glob (not regex) patterns "
+ "for filtering the tests to run, optionally followed by a "
+ "'-' and a : separated list of negative patterns (tests to "
+ "exclude). A test is run if it matches one of the positive "
+ "patterns and does not match any of the negative patterns.");
+
+GTEST_DEFINE_bool_(
+ install_failure_signal_handler,
+ testing::internal::BoolFromGTestEnv("install_failure_signal_handler",
+ false),
+ "If true and supported on the current platform, " GTEST_NAME_
+ " should "
+ "install a signal handler that dumps debugging information when fatal "
+ "signals are raised.");
+
+GTEST_DEFINE_bool_(list_tests, false, "List all tests without running them.");
+
+// The net priority order after flag processing is thus:
+// --gtest_output command line flag
+// GTEST_OUTPUT environment variable
+// XML_OUTPUT_FILE environment variable
+// ''
+GTEST_DEFINE_string_(
+ output,
+ testing::internal::StringFromGTestEnv(
+ "output", testing::internal::OutputFlagAlsoCheckEnvVar().c_str()),
+ "A format (defaults to \"xml\" but can be specified to be \"json\"), "
+ "optionally followed by a colon and an output file name or directory. "
+ "A directory is indicated by a trailing pathname separator. "
+ "Examples: \"xml:filename.xml\", \"xml::directoryname/\". "
+ "If a directory is specified, output files will be created "
+ "within that directory, with file-names based on the test "
+ "executable's name and, if necessary, made unique by adding "
+ "digits.");
+
+GTEST_DEFINE_bool_(
+ brief, testing::internal::BoolFromGTestEnv("brief", false),
+ "True if only test failures should be displayed in text output.");
+
+GTEST_DEFINE_bool_(print_time,
+ testing::internal::BoolFromGTestEnv("print_time", true),
+ "True if and only if " GTEST_NAME_
+ " should display elapsed time in text output.");
+
+GTEST_DEFINE_bool_(print_utf8,
+ testing::internal::BoolFromGTestEnv("print_utf8", true),
+ "True if and only if " GTEST_NAME_
+ " prints UTF8 characters as text.");
+
+GTEST_DEFINE_int32_(
+ random_seed, testing::internal::Int32FromGTestEnv("random_seed", 0),
+ "Random number seed to use when shuffling test orders. Must be in range "
+ "[1, 99999], or 0 to use a seed based on the current time.");
+
+GTEST_DEFINE_int32_(
+ repeat, testing::internal::Int32FromGTestEnv("repeat", 1),
+ "How many times to repeat each test. Specify a negative number "
+ "for repeating forever. Useful for shaking out flaky tests.");
+
+GTEST_DEFINE_bool_(
+ recreate_environments_when_repeating,
+ testing::internal::BoolFromGTestEnv("recreate_environments_when_repeating",
+ false),
+ "Controls whether global test environments are recreated for each repeat "
+ "of the tests. If set to false the global test environments are only set "
+ "up once, for the first iteration, and only torn down once, for the last. "
+ "Useful for shaking out flaky tests with stable, expensive test "
+ "environments. If --gtest_repeat is set to a negative number, meaning "
+ "there is no last run, the environments will always be recreated to avoid "
+ "leaks.");
+
+GTEST_DEFINE_bool_(show_internal_stack_frames, false,
+ "True if and only if " GTEST_NAME_
+ " should include internal stack frames when "
+ "printing test failure stack traces.");
+
+GTEST_DEFINE_bool_(shuffle,
+ testing::internal::BoolFromGTestEnv("shuffle", false),
+ "True if and only if " GTEST_NAME_
+ " should randomize tests' order on every run.");
+
+GTEST_DEFINE_int32_(
+ stack_trace_depth,
+ testing::internal::Int32FromGTestEnv("stack_trace_depth",
+ testing::kMaxStackTraceDepth),
+ "The maximum number of stack frames to print when an "
+ "assertion fails. The valid range is 0 through 100, inclusive.");
+
+GTEST_DEFINE_string_(
+ stream_result_to,
+ testing::internal::StringFromGTestEnv("stream_result_to", ""),
+ "This flag specifies the host name and the port number on which to stream "
+ "test results. Example: \"localhost:555\". The flag is effective only on "
+ "Linux.");
+
+GTEST_DEFINE_bool_(
+ throw_on_failure,
+ testing::internal::BoolFromGTestEnv("throw_on_failure", false),
+ "When this flag is specified, a failed assertion will throw an exception "
+ "if exceptions are enabled or exit the program with a non-zero code "
+ "otherwise. For use with an external test framework.");
+
+#if GTEST_USE_OWN_FLAGFILE_FLAG_
+GTEST_DEFINE_string_(
+ flagfile, testing::internal::StringFromGTestEnv("flagfile", ""),
+ "This flag specifies the flagfile to read command-line flags from.");
+#endif // GTEST_USE_OWN_FLAGFILE_FLAG_
+
+namespace testing {
+namespace internal {
+
+// Generates a random number from [0, range), using a Linear
+// Congruential Generator (LCG). Crashes if 'range' is 0 or greater
+// than kMaxRange.
+uint32_t Random::Generate(uint32_t range) {
+ // These constants are the same as are used in glibc's rand(3).
+ // Use wider types than necessary to prevent unsigned overflow diagnostics.
+ state_ = static_cast<uint32_t>(1103515245ULL * state_ + 12345U) % kMaxRange;
+
+ GTEST_CHECK_(range > 0) << "Cannot generate a number in the range [0, 0).";
+ GTEST_CHECK_(range <= kMaxRange)
+ << "Generation of a number in [0, " << range << ") was requested, "
+ << "but this can only generate numbers in [0, " << kMaxRange << ").";
+
+ // Converting via modulus introduces a bit of downward bias, but
+ // it's simple, and a linear congruential generator isn't too good
+ // to begin with.
+ return state_ % range;
+}
+
+// GTestIsInitialized() returns true if and only if the user has initialized
+// Google Test. Useful for catching the user mistake of not initializing
+// Google Test before calling RUN_ALL_TESTS().
+static bool GTestIsInitialized() { return GetArgvs().size() > 0; }
+
+// Iterates over a vector of TestSuites, keeping a running sum of the
+// results of calling a given int-returning method on each.
+// Returns the sum.
+static int SumOverTestSuiteList(const std::vector<TestSuite*>& case_list,
+ int (TestSuite::*method)() const) {
+ int sum = 0;
+ for (size_t i = 0; i < case_list.size(); i++) {
+ sum += (case_list[i]->*method)();
+ }
+ return sum;
+}
+
+// Returns true if and only if the test suite passed.
+static bool TestSuitePassed(const TestSuite* test_suite) {
+ return test_suite->should_run() && test_suite->Passed();
+}
+
+// Returns true if and only if the test suite failed.
+static bool TestSuiteFailed(const TestSuite* test_suite) {
+ return test_suite->should_run() && test_suite->Failed();
+}
+
+// Returns true if and only if test_suite contains at least one test that
+// should run.
+static bool ShouldRunTestSuite(const TestSuite* test_suite) {
+ return test_suite->should_run();
+}
+
+// AssertHelper constructor.
+AssertHelper::AssertHelper(TestPartResult::Type type, const char* file,
+ int line, const char* message)
+ : data_(new AssertHelperData(type, file, line, message)) {}
+
+AssertHelper::~AssertHelper() { delete data_; }
+
+// Message assignment, for assertion streaming support.
+void AssertHelper::operator=(const Message& message) const {
+ UnitTest::GetInstance()->AddTestPartResult(
+ data_->type, data_->file, data_->line,
+ AppendUserMessage(data_->message, message),
+ UnitTest::GetInstance()->impl()->CurrentOsStackTraceExceptTop(1)
+ // Skips the stack frame for this function itself.
+ ); // NOLINT
+}
+
+namespace {
+
+// When TEST_P is found without a matching INSTANTIATE_TEST_SUITE_P
+// to creates test cases for it, a synthetic test case is
+// inserted to report ether an error or a log message.
+//
+// This configuration bit will likely be removed at some point.
+constexpr bool kErrorOnUninstantiatedParameterizedTest = true;
+constexpr bool kErrorOnUninstantiatedTypeParameterizedTest = true;
+
+// A test that fails at a given file/line location with a given message.
+class FailureTest : public Test {
+ public:
+ explicit FailureTest(const CodeLocation& loc, std::string error_message,
+ bool as_error)
+ : loc_(loc),
+ error_message_(std::move(error_message)),
+ as_error_(as_error) {}
+
+ void TestBody() override {
+ if (as_error_) {
+ AssertHelper(TestPartResult::kNonFatalFailure, loc_.file.c_str(),
+ loc_.line, "") = Message() << error_message_;
+ } else {
+ std::cout << error_message_ << std::endl;
+ }
+ }
+
+ private:
+ const CodeLocation loc_;
+ const std::string error_message_;
+ const bool as_error_;
+};
+
+} // namespace
+
+std::set<std::string>* GetIgnoredParameterizedTestSuites() {
+ return UnitTest::GetInstance()->impl()->ignored_parameterized_test_suites();
+}
+
+// Add a given test_suit to the list of them allow to go un-instantiated.
+MarkAsIgnored::MarkAsIgnored(const char* test_suite) {
+ GetIgnoredParameterizedTestSuites()->insert(test_suite);
+}
+
+// If this parameterized test suite has no instantiations (and that
+// has not been marked as okay), emit a test case reporting that.
+void InsertSyntheticTestCase(const std::string& name, CodeLocation location,
+ bool has_test_p) {
+ const auto& ignored = *GetIgnoredParameterizedTestSuites();
+ if (ignored.find(name) != ignored.end()) return;
+
+ const char kMissingInstantiation[] = //
+ " is defined via TEST_P, but never instantiated. None of the test cases "
+ "will run. Either no INSTANTIATE_TEST_SUITE_P is provided or the only "
+ "ones provided expand to nothing."
+ "\n\n"
+ "Ideally, TEST_P definitions should only ever be included as part of "
+ "binaries that intend to use them. (As opposed to, for example, being "
+ "placed in a library that may be linked in to get other utilities.)";
+
+ const char kMissingTestCase[] = //
+ " is instantiated via INSTANTIATE_TEST_SUITE_P, but no tests are "
+ "defined via TEST_P . No test cases will run."
+ "\n\n"
+ "Ideally, INSTANTIATE_TEST_SUITE_P should only ever be invoked from "
+ "code that always depend on code that provides TEST_P. Failing to do "
+ "so is often an indication of dead code, e.g. the last TEST_P was "
+ "removed but the rest got left behind.";
+
+ std::string message =
+ "Parameterized test suite " + name +
+ (has_test_p ? kMissingInstantiation : kMissingTestCase) +
+ "\n\n"
+ "To suppress this error for this test suite, insert the following line "
+ "(in a non-header) in the namespace it is defined in:"
+ "\n\n"
+ "GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(" +
+ name + ");";
+
+ std::string full_name = "UninstantiatedParameterizedTestSuite<" + name + ">";
+ RegisterTest( //
+ "GoogleTestVerification", full_name.c_str(),
+ nullptr, // No type parameter.
+ nullptr, // No value parameter.
+ location.file.c_str(), location.line, [message, location] {
+ return new FailureTest(location, message,
+ kErrorOnUninstantiatedParameterizedTest);
+ });
+}
+
+void RegisterTypeParameterizedTestSuite(const char* test_suite_name,
+ CodeLocation code_location) {
+ GetUnitTestImpl()->type_parameterized_test_registry().RegisterTestSuite(
+ test_suite_name, code_location);
+}
+
+void RegisterTypeParameterizedTestSuiteInstantiation(const char* case_name) {
+ GetUnitTestImpl()->type_parameterized_test_registry().RegisterInstantiation(
+ case_name);
+}
+
+void TypeParameterizedTestSuiteRegistry::RegisterTestSuite(
+ const char* test_suite_name, CodeLocation code_location) {
+ suites_.emplace(std::string(test_suite_name),
+ TypeParameterizedTestSuiteInfo(code_location));
+}
+
+void TypeParameterizedTestSuiteRegistry::RegisterInstantiation(
+ const char* test_suite_name) {
+ auto it = suites_.find(std::string(test_suite_name));
+ if (it != suites_.end()) {
+ it->second.instantiated = true;
+ } else {
+ GTEST_LOG_(ERROR) << "Unknown type parameterized test suit '"
+ << test_suite_name << "'";
+ }
+}
+
+void TypeParameterizedTestSuiteRegistry::CheckForInstantiations() {
+ const auto& ignored = *GetIgnoredParameterizedTestSuites();
+ for (const auto& testcase : suites_) {
+ if (testcase.second.instantiated) continue;
+ if (ignored.find(testcase.first) != ignored.end()) continue;
+
+ std::string message =
+ "Type parameterized test suite " + testcase.first +
+ " is defined via REGISTER_TYPED_TEST_SUITE_P, but never instantiated "
+ "via INSTANTIATE_TYPED_TEST_SUITE_P. None of the test cases will run."
+ "\n\n"
+ "Ideally, TYPED_TEST_P definitions should only ever be included as "
+ "part of binaries that intend to use them. (As opposed to, for "
+ "example, being placed in a library that may be linked in to get other "
+ "utilities.)"
+ "\n\n"
+ "To suppress this error for this test suite, insert the following line "
+ "(in a non-header) in the namespace it is defined in:"
+ "\n\n"
+ "GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(" +
+ testcase.first + ");";
+
+ std::string full_name =
+ "UninstantiatedTypeParameterizedTestSuite<" + testcase.first + ">";
+ RegisterTest( //
+ "GoogleTestVerification", full_name.c_str(),
+ nullptr, // No type parameter.
+ nullptr, // No value parameter.
+ testcase.second.code_location.file.c_str(),
+ testcase.second.code_location.line, [message, testcase] {
+ return new FailureTest(testcase.second.code_location, message,
+ kErrorOnUninstantiatedTypeParameterizedTest);
+ });
+ }
+}
+
+// A copy of all command line arguments. Set by InitGoogleTest().
+static ::std::vector<std::string> g_argvs;
+
+::std::vector<std::string> GetArgvs() {
+#if defined(GTEST_CUSTOM_GET_ARGVS_)
+ // GTEST_CUSTOM_GET_ARGVS_() may return a container of std::string or
+ // ::string. This code converts it to the appropriate type.
+ const auto& custom = GTEST_CUSTOM_GET_ARGVS_();
+ return ::std::vector<std::string>(custom.begin(), custom.end());
+#else // defined(GTEST_CUSTOM_GET_ARGVS_)
+ return g_argvs;
+#endif // defined(GTEST_CUSTOM_GET_ARGVS_)
+}
+
+// Returns the current application's name, removing directory path if that
+// is present.
+FilePath GetCurrentExecutableName() {
+ FilePath result;
+
+#if GTEST_OS_WINDOWS || GTEST_OS_OS2
+ result.Set(FilePath(GetArgvs()[0]).RemoveExtension("exe"));
+#else
+ result.Set(FilePath(GetArgvs()[0]));
+#endif // GTEST_OS_WINDOWS
+
+ return result.RemoveDirectoryName();
+}
+
+// Functions for processing the gtest_output flag.
+
+// Returns the output format, or "" for normal printed output.
+std::string UnitTestOptions::GetOutputFormat() {
+ std::string s = GTEST_FLAG_GET(output);
+ const char* const gtest_output_flag = s.c_str();
+ const char* const colon = strchr(gtest_output_flag, ':');
+ return (colon == nullptr)
+ ? std::string(gtest_output_flag)
+ : std::string(gtest_output_flag,
+ static_cast<size_t>(colon - gtest_output_flag));
+}
+
+// Returns the name of the requested output file, or the default if none
+// was explicitly specified.
+std::string UnitTestOptions::GetAbsolutePathToOutputFile() {
+ std::string s = GTEST_FLAG_GET(output);
+ const char* const gtest_output_flag = s.c_str();
+
+ std::string format = GetOutputFormat();
+ if (format.empty()) format = std::string(kDefaultOutputFormat);
+
+ const char* const colon = strchr(gtest_output_flag, ':');
+ if (colon == nullptr)
+ return internal::FilePath::MakeFileName(
+ internal::FilePath(
+ UnitTest::GetInstance()->original_working_dir()),
+ internal::FilePath(kDefaultOutputFile), 0, format.c_str())
+ .string();
+
+ internal::FilePath output_name(colon + 1);
+ if (!output_name.IsAbsolutePath())
+ output_name = internal::FilePath::ConcatPaths(
+ internal::FilePath(UnitTest::GetInstance()->original_working_dir()),
+ internal::FilePath(colon + 1));
+
+ if (!output_name.IsDirectory()) return output_name.string();
+
+ internal::FilePath result(internal::FilePath::GenerateUniqueFileName(
+ output_name, internal::GetCurrentExecutableName(),
+ GetOutputFormat().c_str()));
+ return result.string();
+}
+
+// Returns true if and only if the wildcard pattern matches the string. Each
+// pattern consists of regular characters, single-character wildcards (?), and
+// multi-character wildcards (*).
+//
+// This function implements a linear-time string globbing algorithm based on
+// https://research.swtch.com/glob.
+static bool PatternMatchesString(const std::string& name_str,
+ const char* pattern, const char* pattern_end) {
+ const char* name = name_str.c_str();
+ const char* const name_begin = name;
+ const char* const name_end = name + name_str.size();
+
+ const char* pattern_next = pattern;
+ const char* name_next = name;
+
+ while (pattern < pattern_end || name < name_end) {
+ if (pattern < pattern_end) {
+ switch (*pattern) {
+ default: // Match an ordinary character.
+ if (name < name_end && *name == *pattern) {
+ ++pattern;
+ ++name;
+ continue;
+ }
+ break;
+ case '?': // Match any single character.
+ if (name < name_end) {
+ ++pattern;
+ ++name;
+ continue;
+ }
+ break;
+ case '*':
+ // Match zero or more characters. Start by skipping over the wildcard
+ // and matching zero characters from name. If that fails, restart and
+ // match one more character than the last attempt.
+ pattern_next = pattern;
+ name_next = name + 1;
+ ++pattern;
+ continue;
+ }
+ }
+ // Failed to match a character. Restart if possible.
+ if (name_begin < name_next && name_next <= name_end) {
+ pattern = pattern_next;
+ name = name_next;
+ continue;
+ }
+ return false;
+ }
+ return true;
+}
+
+namespace {
+
+bool IsGlobPattern(const std::string& pattern) {
+ return std::any_of(pattern.begin(), pattern.end(),
+ [](const char c) { return c == '?' || c == '*'; });
+}
+
+class UnitTestFilter {
+ public:
+ UnitTestFilter() = default;
+
+ // Constructs a filter from a string of patterns separated by `:`.
+ explicit UnitTestFilter(const std::string& filter) {
+ // By design "" filter matches "" string.
+ std::vector<std::string> all_patterns;
+ SplitString(filter, ':', &all_patterns);
+ const auto exact_match_patterns_begin = std::partition(
+ all_patterns.begin(), all_patterns.end(), &IsGlobPattern);
+
+ glob_patterns_.reserve(static_cast<size_t>(
+ std::distance(all_patterns.begin(), exact_match_patterns_begin)));
+ std::move(all_patterns.begin(), exact_match_patterns_begin,
+ std::inserter(glob_patterns_, glob_patterns_.begin()));
+ std::move(
+ exact_match_patterns_begin, all_patterns.end(),
+ std::inserter(exact_match_patterns_, exact_match_patterns_.begin()));
+ }
+
+ // Returns true if and only if name matches at least one of the patterns in
+ // the filter.
+ bool MatchesName(const std::string& name) const {
+ return exact_match_patterns_.count(name) > 0 ||
+ std::any_of(glob_patterns_.begin(), glob_patterns_.end(),
+ [&name](const std::string& pattern) {
+ return PatternMatchesString(
+ name, pattern.c_str(),
+ pattern.c_str() + pattern.size());
+ });
+ }
+
+ private:
+ std::vector<std::string> glob_patterns_;
+ std::unordered_set<std::string> exact_match_patterns_;
+};
+
+class PositiveAndNegativeUnitTestFilter {
+ public:
+ // Constructs a positive and a negative filter from a string. The string
+ // contains a positive filter optionally followed by a '-' character and a
+ // negative filter. In case only a negative filter is provided the positive
+ // filter will be assumed "*".
+ // A filter is a list of patterns separated by ':'.
+ explicit PositiveAndNegativeUnitTestFilter(const std::string& filter) {
+ std::vector<std::string> positive_and_negative_filters;
+
+ // NOTE: `SplitString` always returns a non-empty container.
+ SplitString(filter, '-', &positive_and_negative_filters);
+ const auto& positive_filter = positive_and_negative_filters.front();
+
+ if (positive_and_negative_filters.size() > 1) {
+ positive_filter_ = UnitTestFilter(
+ positive_filter.empty() ? kUniversalFilter : positive_filter);
+
+ // TODO(b/214626361): Fail on multiple '-' characters
+ // For the moment to preserve old behavior we concatenate the rest of the
+ // string parts with `-` as separator to generate the negative filter.
+ auto negative_filter_string = positive_and_negative_filters[1];
+ for (std::size_t i = 2; i < positive_and_negative_filters.size(); i++)
+ negative_filter_string =
+ negative_filter_string + '-' + positive_and_negative_filters[i];
+ negative_filter_ = UnitTestFilter(negative_filter_string);
+ } else {
+ // In case we don't have a negative filter and positive filter is ""
+ // we do not use kUniversalFilter by design as opposed to when we have a
+ // negative filter.
+ positive_filter_ = UnitTestFilter(positive_filter);
+ }
+ }
+
+ // Returns true if and only if test name (this is generated by appending test
+ // suit name and test name via a '.' character) matches the positive filter
+ // and does not match the negative filter.
+ bool MatchesTest(const std::string& test_suite_name,
+ const std::string& test_name) const {
+ return MatchesName(test_suite_name + "." + test_name);
+ }
+
+ // Returns true if and only if name matches the positive filter and does not
+ // match the negative filter.
+ bool MatchesName(const std::string& name) const {
+ return positive_filter_.MatchesName(name) &&
+ !negative_filter_.MatchesName(name);
+ }
+
+ private:
+ UnitTestFilter positive_filter_;
+ UnitTestFilter negative_filter_;
+};
+} // namespace
+
+bool UnitTestOptions::MatchesFilter(const std::string& name_str,
+ const char* filter) {
+ return UnitTestFilter(filter).MatchesName(name_str);
+}
+
+// Returns true if and only if the user-specified filter matches the test
+// suite name and the test name.
+bool UnitTestOptions::FilterMatchesTest(const std::string& test_suite_name,
+ const std::string& test_name) {
+ // Split --gtest_filter at '-', if there is one, to separate into
+ // positive filter and negative filter portions
+ return PositiveAndNegativeUnitTestFilter(GTEST_FLAG_GET(filter))
+ .MatchesTest(test_suite_name, test_name);
+}
+
+#if GTEST_HAS_SEH
+// Returns EXCEPTION_EXECUTE_HANDLER if Google Test should handle the
+// given SEH exception, or EXCEPTION_CONTINUE_SEARCH otherwise.
+// This function is useful as an __except condition.
+int UnitTestOptions::GTestShouldProcessSEH(DWORD exception_code) {
+ // Google Test should handle a SEH exception if:
+ // 1. the user wants it to, AND
+ // 2. this is not a breakpoint exception, AND
+ // 3. this is not a C++ exception (VC++ implements them via SEH,
+ // apparently).
+ //
+ // SEH exception code for C++ exceptions.
+ // (see http://support.microsoft.com/kb/185294 for more information).
+ const DWORD kCxxExceptionCode = 0xe06d7363;
+
+ bool should_handle = true;
+
+ if (!GTEST_FLAG_GET(catch_exceptions))
+ should_handle = false;
+ else if (exception_code == EXCEPTION_BREAKPOINT)
+ should_handle = false;
+ else if (exception_code == kCxxExceptionCode)
+ should_handle = false;
+
+ return should_handle ? EXCEPTION_EXECUTE_HANDLER : EXCEPTION_CONTINUE_SEARCH;
+}
+#endif // GTEST_HAS_SEH
+
+} // namespace internal
+
+// The c'tor sets this object as the test part result reporter used by
+// Google Test. The 'result' parameter specifies where to report the
+// results. Intercepts only failures from the current thread.
+ScopedFakeTestPartResultReporter::ScopedFakeTestPartResultReporter(
+ TestPartResultArray* result)
+ : intercept_mode_(INTERCEPT_ONLY_CURRENT_THREAD), result_(result) {
+ Init();
+}
+
+// The c'tor sets this object as the test part result reporter used by
+// Google Test. The 'result' parameter specifies where to report the
+// results.
+ScopedFakeTestPartResultReporter::ScopedFakeTestPartResultReporter(
+ InterceptMode intercept_mode, TestPartResultArray* result)
+ : intercept_mode_(intercept_mode), result_(result) {
+ Init();
+}
+
+void ScopedFakeTestPartResultReporter::Init() {
+ internal::UnitTestImpl* const impl = internal::GetUnitTestImpl();
+ if (intercept_mode_ == INTERCEPT_ALL_THREADS) {
+ old_reporter_ = impl->GetGlobalTestPartResultReporter();
+ impl->SetGlobalTestPartResultReporter(this);
+ } else {
+ old_reporter_ = impl->GetTestPartResultReporterForCurrentThread();
+ impl->SetTestPartResultReporterForCurrentThread(this);
+ }
+}
+
+// The d'tor restores the test part result reporter used by Google Test
+// before.
+ScopedFakeTestPartResultReporter::~ScopedFakeTestPartResultReporter() {
+ internal::UnitTestImpl* const impl = internal::GetUnitTestImpl();
+ if (intercept_mode_ == INTERCEPT_ALL_THREADS) {
+ impl->SetGlobalTestPartResultReporter(old_reporter_);
+ } else {
+ impl->SetTestPartResultReporterForCurrentThread(old_reporter_);
+ }
+}
+
+// Increments the test part result count and remembers the result.
+// This method is from the TestPartResultReporterInterface interface.
+void ScopedFakeTestPartResultReporter::ReportTestPartResult(
+ const TestPartResult& result) {
+ result_->Append(result);
+}
+
+namespace internal {
+
+// Returns the type ID of ::testing::Test. We should always call this
+// instead of GetTypeId< ::testing::Test>() to get the type ID of
+// testing::Test. This is to work around a suspected linker bug when
+// using Google Test as a framework on Mac OS X. The bug causes
+// GetTypeId< ::testing::Test>() to return different values depending
+// on whether the call is from the Google Test framework itself or
+// from user test code. GetTestTypeId() is guaranteed to always
+// return the same value, as it always calls GetTypeId<>() from the
+// gtest.cc, which is within the Google Test framework.
+TypeId GetTestTypeId() { return GetTypeId<Test>(); }
+
+// The value of GetTestTypeId() as seen from within the Google Test
+// library. This is solely for testing GetTestTypeId().
+extern const TypeId kTestTypeIdInGoogleTest = GetTestTypeId();
+
+// This predicate-formatter checks that 'results' contains a test part
+// failure of the given type and that the failure message contains the
+// given substring.
+static AssertionResult HasOneFailure(const char* /* results_expr */,
+ const char* /* type_expr */,
+ const char* /* substr_expr */,
+ const TestPartResultArray& results,
+ TestPartResult::Type type,
+ const std::string& substr) {
+ const std::string expected(type == TestPartResult::kFatalFailure
+ ? "1 fatal failure"
+ : "1 non-fatal failure");
+ Message msg;
+ if (results.size() != 1) {
+ msg << "Expected: " << expected << "\n"
+ << " Actual: " << results.size() << " failures";
+ for (int i = 0; i < results.size(); i++) {
+ msg << "\n" << results.GetTestPartResult(i);
+ }
+ return AssertionFailure() << msg;
+ }
+
+ const TestPartResult& r = results.GetTestPartResult(0);
+ if (r.type() != type) {
+ return AssertionFailure() << "Expected: " << expected << "\n"
+ << " Actual:\n"
+ << r;
+ }
+
+ if (strstr(r.message(), substr.c_str()) == nullptr) {
+ return AssertionFailure()
+ << "Expected: " << expected << " containing \"" << substr << "\"\n"
+ << " Actual:\n"
+ << r;
+ }
+
+ return AssertionSuccess();
+}
+
+// The constructor of SingleFailureChecker remembers where to look up
+// test part results, what type of failure we expect, and what
+// substring the failure message should contain.
+SingleFailureChecker::SingleFailureChecker(const TestPartResultArray* results,
+ TestPartResult::Type type,
+ const std::string& substr)
+ : results_(results), type_(type), substr_(substr) {}
+
+// The destructor of SingleFailureChecker verifies that the given
+// TestPartResultArray contains exactly one failure that has the given
+// type and contains the given substring. If that's not the case, a
+// non-fatal failure will be generated.
+SingleFailureChecker::~SingleFailureChecker() {
+ EXPECT_PRED_FORMAT3(HasOneFailure, *results_, type_, substr_);
+}
+
+DefaultGlobalTestPartResultReporter::DefaultGlobalTestPartResultReporter(
+ UnitTestImpl* unit_test)
+ : unit_test_(unit_test) {}
+
+void DefaultGlobalTestPartResultReporter::ReportTestPartResult(
+ const TestPartResult& result) {
+ unit_test_->current_test_result()->AddTestPartResult(result);
+ unit_test_->listeners()->repeater()->OnTestPartResult(result);
+}
+
+DefaultPerThreadTestPartResultReporter::DefaultPerThreadTestPartResultReporter(
+ UnitTestImpl* unit_test)
+ : unit_test_(unit_test) {}
+
+void DefaultPerThreadTestPartResultReporter::ReportTestPartResult(
+ const TestPartResult& result) {
+ unit_test_->GetGlobalTestPartResultReporter()->ReportTestPartResult(result);
+}
+
+// Returns the global test part result reporter.
+TestPartResultReporterInterface*
+UnitTestImpl::GetGlobalTestPartResultReporter() {
+ internal::MutexLock lock(&global_test_part_result_reporter_mutex_);
+ return global_test_part_result_repoter_;
+}
+
+// Sets the global test part result reporter.
+void UnitTestImpl::SetGlobalTestPartResultReporter(
+ TestPartResultReporterInterface* reporter) {
+ internal::MutexLock lock(&global_test_part_result_reporter_mutex_);
+ global_test_part_result_repoter_ = reporter;
+}
+
+// Returns the test part result reporter for the current thread.
+TestPartResultReporterInterface*
+UnitTestImpl::GetTestPartResultReporterForCurrentThread() {
+ return per_thread_test_part_result_reporter_.get();
+}
+
+// Sets the test part result reporter for the current thread.
+void UnitTestImpl::SetTestPartResultReporterForCurrentThread(
+ TestPartResultReporterInterface* reporter) {
+ per_thread_test_part_result_reporter_.set(reporter);
+}
+
+// Gets the number of successful test suites.
+int UnitTestImpl::successful_test_suite_count() const {
+ return CountIf(test_suites_, TestSuitePassed);
+}
+
+// Gets the number of failed test suites.
+int UnitTestImpl::failed_test_suite_count() const {
+ return CountIf(test_suites_, TestSuiteFailed);
+}
+
+// Gets the number of all test suites.
+int UnitTestImpl::total_test_suite_count() const {
+ return static_cast<int>(test_suites_.size());
+}
+
+// Gets the number of all test suites that contain at least one test
+// that should run.
+int UnitTestImpl::test_suite_to_run_count() const {
+ return CountIf(test_suites_, ShouldRunTestSuite);
+}
+
+// Gets the number of successful tests.
+int UnitTestImpl::successful_test_count() const {
+ return SumOverTestSuiteList(test_suites_, &TestSuite::successful_test_count);
+}
+
+// Gets the number of skipped tests.
+int UnitTestImpl::skipped_test_count() const {
+ return SumOverTestSuiteList(test_suites_, &TestSuite::skipped_test_count);
+}
+
+// Gets the number of failed tests.
+int UnitTestImpl::failed_test_count() const {
+ return SumOverTestSuiteList(test_suites_, &TestSuite::failed_test_count);
+}
+
+// Gets the number of disabled tests that will be reported in the XML report.
+int UnitTestImpl::reportable_disabled_test_count() const {
+ return SumOverTestSuiteList(test_suites_,
+ &TestSuite::reportable_disabled_test_count);
+}
+
+// Gets the number of disabled tests.
+int UnitTestImpl::disabled_test_count() const {
+ return SumOverTestSuiteList(test_suites_, &TestSuite::disabled_test_count);
+}
+
+// Gets the number of tests to be printed in the XML report.
+int UnitTestImpl::reportable_test_count() const {
+ return SumOverTestSuiteList(test_suites_, &TestSuite::reportable_test_count);
+}
+
+// Gets the number of all tests.
+int UnitTestImpl::total_test_count() const {
+ return SumOverTestSuiteList(test_suites_, &TestSuite::total_test_count);
+}
+
+// Gets the number of tests that should run.
+int UnitTestImpl::test_to_run_count() const {
+ return SumOverTestSuiteList(test_suites_, &TestSuite::test_to_run_count);
+}
+
+// Returns the current OS stack trace as an std::string.
+//
+// The maximum number of stack frames to be included is specified by
+// the gtest_stack_trace_depth flag. The skip_count parameter
+// specifies the number of top frames to be skipped, which doesn't
+// count against the number of frames to be included.
+//
+// For example, if Foo() calls Bar(), which in turn calls
+// CurrentOsStackTraceExceptTop(1), Foo() will be included in the
+// trace but Bar() and CurrentOsStackTraceExceptTop() won't.
+std::string UnitTestImpl::CurrentOsStackTraceExceptTop(int skip_count) {
+ return os_stack_trace_getter()->CurrentStackTrace(
+ static_cast<int>(GTEST_FLAG_GET(stack_trace_depth)), skip_count + 1
+ // Skips the user-specified number of frames plus this function
+ // itself.
+ ); // NOLINT
+}
+
+// A helper class for measuring elapsed times.
+class Timer {
+ public:
+ Timer() : start_(std::chrono::steady_clock::now()) {}
+
+ // Return time elapsed in milliseconds since the timer was created.
+ TimeInMillis Elapsed() {
+ return std::chrono::duration_cast<std::chrono::milliseconds>(
+ std::chrono::steady_clock::now() - start_)
+ .count();
+ }
+
+ private:
+ std::chrono::steady_clock::time_point start_;
+};
+
+// Returns a timestamp as milliseconds since the epoch. Note this time may jump
+// around subject to adjustments by the system, to measure elapsed time use
+// Timer instead.
+TimeInMillis GetTimeInMillis() {
+ return std::chrono::duration_cast<std::chrono::milliseconds>(
+ std::chrono::system_clock::now() -
+ std::chrono::system_clock::from_time_t(0))
+ .count();
+}
+
+// Utilities
+
+// class String.
+
+#if GTEST_OS_WINDOWS_MOBILE
+// Creates a UTF-16 wide string from the given ANSI string, allocating
+// memory using new. The caller is responsible for deleting the return
+// value using delete[]. Returns the wide string, or NULL if the
+// input is NULL.
+LPCWSTR String::AnsiToUtf16(const char* ansi) {
+ if (!ansi) return nullptr;
+ const int length = strlen(ansi);
+ const int unicode_length =
+ MultiByteToWideChar(CP_ACP, 0, ansi, length, nullptr, 0);
+ WCHAR* unicode = new WCHAR[unicode_length + 1];
+ MultiByteToWideChar(CP_ACP, 0, ansi, length, unicode, unicode_length);
+ unicode[unicode_length] = 0;
+ return unicode;
+}
+
+// Creates an ANSI string from the given wide string, allocating
+// memory using new. The caller is responsible for deleting the return
+// value using delete[]. Returns the ANSI string, or NULL if the
+// input is NULL.
+const char* String::Utf16ToAnsi(LPCWSTR utf16_str) {
+ if (!utf16_str) return nullptr;
+ const int ansi_length = WideCharToMultiByte(CP_ACP, 0, utf16_str, -1, nullptr,
+ 0, nullptr, nullptr);
+ char* ansi = new char[ansi_length + 1];
+ WideCharToMultiByte(CP_ACP, 0, utf16_str, -1, ansi, ansi_length, nullptr,
+ nullptr);
+ ansi[ansi_length] = 0;
+ return ansi;
+}
+
+#endif // GTEST_OS_WINDOWS_MOBILE
+
+// Compares two C strings. Returns true if and only if they have the same
+// content.
+//
+// Unlike strcmp(), this function can handle NULL argument(s). A NULL
+// C string is considered different to any non-NULL C string,
+// including the empty string.
+bool String::CStringEquals(const char* lhs, const char* rhs) {
+ if (lhs == nullptr) return rhs == nullptr;
+
+ if (rhs == nullptr) return false;
+
+ return strcmp(lhs, rhs) == 0;
+}
+
+#if GTEST_HAS_STD_WSTRING
+
+// Converts an array of wide chars to a narrow string using the UTF-8
+// encoding, and streams the result to the given Message object.
+static void StreamWideCharsToMessage(const wchar_t* wstr, size_t length,
+ Message* msg) {
+ for (size_t i = 0; i != length;) { // NOLINT
+ if (wstr[i] != L'\0') {
+ *msg << WideStringToUtf8(wstr + i, static_cast<int>(length - i));
+ while (i != length && wstr[i] != L'\0') i++;
+ } else {
+ *msg << '\0';
+ i++;
+ }
+ }
+}
+
+#endif // GTEST_HAS_STD_WSTRING
+
+void SplitString(const ::std::string& str, char delimiter,
+ ::std::vector< ::std::string>* dest) {
+ ::std::vector< ::std::string> parsed;
+ ::std::string::size_type pos = 0;
+ while (::testing::internal::AlwaysTrue()) {
+ const ::std::string::size_type colon = str.find(delimiter, pos);
+ if (colon == ::std::string::npos) {
+ parsed.push_back(str.substr(pos));
+ break;
+ } else {
+ parsed.push_back(str.substr(pos, colon - pos));
+ pos = colon + 1;
+ }
+ }
+ dest->swap(parsed);
+}
+
+} // namespace internal
+
+// Constructs an empty Message.
+// We allocate the stringstream separately because otherwise each use of
+// ASSERT/EXPECT in a procedure adds over 200 bytes to the procedure's
+// stack frame leading to huge stack frames in some cases; gcc does not reuse
+// the stack space.
+Message::Message() : ss_(new ::std::stringstream) {
+ // By default, we want there to be enough precision when printing
+ // a double to a Message.
+ *ss_ << std::setprecision(std::numeric_limits<double>::digits10 + 2);
+}
+
+// These two overloads allow streaming a wide C string to a Message
+// using the UTF-8 encoding.
+Message& Message::operator<<(const wchar_t* wide_c_str) {
+ return *this << internal::String::ShowWideCString(wide_c_str);
+}
+Message& Message::operator<<(wchar_t* wide_c_str) {
+ return *this << internal::String::ShowWideCString(wide_c_str);
+}
+
+#if GTEST_HAS_STD_WSTRING
+// Converts the given wide string to a narrow string using the UTF-8
+// encoding, and streams the result to this Message object.
+Message& Message::operator<<(const ::std::wstring& wstr) {
+ internal::StreamWideCharsToMessage(wstr.c_str(), wstr.length(), this);
+ return *this;
+}
+#endif // GTEST_HAS_STD_WSTRING
+
+// Gets the text streamed to this object so far as an std::string.
+// Each '\0' character in the buffer is replaced with "\\0".
+std::string Message::GetString() const {
+ return internal::StringStreamToString(ss_.get());
+}
+
+namespace internal {
+
+namespace edit_distance {
+std::vector<EditType> CalculateOptimalEdits(const std::vector<size_t>& left,
+ const std::vector<size_t>& right) {
+ std::vector<std::vector<double> > costs(
+ left.size() + 1, std::vector<double>(right.size() + 1));
+ std::vector<std::vector<EditType> > best_move(
+ left.size() + 1, std::vector<EditType>(right.size() + 1));
+
+ // Populate for empty right.
+ for (size_t l_i = 0; l_i < costs.size(); ++l_i) {
+ costs[l_i][0] = static_cast<double>(l_i);
+ best_move[l_i][0] = kRemove;
+ }
+ // Populate for empty left.
+ for (size_t r_i = 1; r_i < costs[0].size(); ++r_i) {
+ costs[0][r_i] = static_cast<double>(r_i);
+ best_move[0][r_i] = kAdd;
+ }
+
+ for (size_t l_i = 0; l_i < left.size(); ++l_i) {
+ for (size_t r_i = 0; r_i < right.size(); ++r_i) {
+ if (left[l_i] == right[r_i]) {
+ // Found a match. Consume it.
+ costs[l_i + 1][r_i + 1] = costs[l_i][r_i];
+ best_move[l_i + 1][r_i + 1] = kMatch;
+ continue;
+ }
+
+ const double add = costs[l_i + 1][r_i];
+ const double remove = costs[l_i][r_i + 1];
+ const double replace = costs[l_i][r_i];
+ if (add < remove && add < replace) {
+ costs[l_i + 1][r_i + 1] = add + 1;
+ best_move[l_i + 1][r_i + 1] = kAdd;
+ } else if (remove < add && remove < replace) {
+ costs[l_i + 1][r_i + 1] = remove + 1;
+ best_move[l_i + 1][r_i + 1] = kRemove;
+ } else {
+ // We make replace a little more expensive than add/remove to lower
+ // their priority.
+ costs[l_i + 1][r_i + 1] = replace + 1.00001;
+ best_move[l_i + 1][r_i + 1] = kReplace;
+ }
+ }
+ }
+
+ // Reconstruct the best path. We do it in reverse order.
+ std::vector<EditType> best_path;
+ for (size_t l_i = left.size(), r_i = right.size(); l_i > 0 || r_i > 0;) {
+ EditType move = best_move[l_i][r_i];
+ best_path.push_back(move);
+ l_i -= move != kAdd;
+ r_i -= move != kRemove;
+ }
+ std::reverse(best_path.begin(), best_path.end());
+ return best_path;
+}
+
+namespace {
+
+// Helper class to convert string into ids with deduplication.
+class InternalStrings {
+ public:
+ size_t GetId(const std::string& str) {
+ IdMap::iterator it = ids_.find(str);
+ if (it != ids_.end()) return it->second;
+ size_t id = ids_.size();
+ return ids_[str] = id;
+ }
+
+ private:
+ typedef std::map<std::string, size_t> IdMap;
+ IdMap ids_;
+};
+
+} // namespace
+
+std::vector<EditType> CalculateOptimalEdits(
+ const std::vector<std::string>& left,
+ const std::vector<std::string>& right) {
+ std::vector<size_t> left_ids, right_ids;
+ {
+ InternalStrings intern_table;
+ for (size_t i = 0; i < left.size(); ++i) {
+ left_ids.push_back(intern_table.GetId(left[i]));
+ }
+ for (size_t i = 0; i < right.size(); ++i) {
+ right_ids.push_back(intern_table.GetId(right[i]));
+ }
+ }
+ return CalculateOptimalEdits(left_ids, right_ids);
+}
+
+namespace {
+
+// Helper class that holds the state for one hunk and prints it out to the
+// stream.
+// It reorders adds/removes when possible to group all removes before all
+// adds. It also adds the hunk header before printint into the stream.
+class Hunk {
+ public:
+ Hunk(size_t left_start, size_t right_start)
+ : left_start_(left_start),
+ right_start_(right_start),
+ adds_(),
+ removes_(),
+ common_() {}
+
+ void PushLine(char edit, const char* line) {
+ switch (edit) {
+ case ' ':
+ ++common_;
+ FlushEdits();
+ hunk_.push_back(std::make_pair(' ', line));
+ break;
+ case '-':
+ ++removes_;
+ hunk_removes_.push_back(std::make_pair('-', line));
+ break;
+ case '+':
+ ++adds_;
+ hunk_adds_.push_back(std::make_pair('+', line));
+ break;
+ }
+ }
+
+ void PrintTo(std::ostream* os) {
+ PrintHeader(os);
+ FlushEdits();
+ for (std::list<std::pair<char, const char*> >::const_iterator it =
+ hunk_.begin();
+ it != hunk_.end(); ++it) {
+ *os << it->first << it->second << "\n";
+ }
+ }
+
+ bool has_edits() const { return adds_ || removes_; }
+
+ private:
+ void FlushEdits() {
+ hunk_.splice(hunk_.end(), hunk_removes_);
+ hunk_.splice(hunk_.end(), hunk_adds_);
+ }
+
+ // Print a unified diff header for one hunk.
+ // The format is
+ // "@@ -<left_start>,<left_length> +<right_start>,<right_length> @@"
+ // where the left/right parts are omitted if unnecessary.
+ void PrintHeader(std::ostream* ss) const {
+ *ss << "@@ ";
+ if (removes_) {
+ *ss << "-" << left_start_ << "," << (removes_ + common_);
+ }
+ if (removes_ && adds_) {
+ *ss << " ";
+ }
+ if (adds_) {
+ *ss << "+" << right_start_ << "," << (adds_ + common_);
+ }
+ *ss << " @@\n";
+ }
+
+ size_t left_start_, right_start_;
+ size_t adds_, removes_, common_;
+ std::list<std::pair<char, const char*> > hunk_, hunk_adds_, hunk_removes_;
+};
+
+} // namespace
+
+// Create a list of diff hunks in Unified diff format.
+// Each hunk has a header generated by PrintHeader above plus a body with
+// lines prefixed with ' ' for no change, '-' for deletion and '+' for
+// addition.
+// 'context' represents the desired unchanged prefix/suffix around the diff.
+// If two hunks are close enough that their contexts overlap, then they are
+// joined into one hunk.
+std::string CreateUnifiedDiff(const std::vector<std::string>& left,
+ const std::vector<std::string>& right,
+ size_t context) {
+ const std::vector<EditType> edits = CalculateOptimalEdits(left, right);
+
+ size_t l_i = 0, r_i = 0, edit_i = 0;
+ std::stringstream ss;
+ while (edit_i < edits.size()) {
+ // Find first edit.
+ while (edit_i < edits.size() && edits[edit_i] == kMatch) {
+ ++l_i;
+ ++r_i;
+ ++edit_i;
+ }
+
+ // Find the first line to include in the hunk.
+ const size_t prefix_context = std::min(l_i, context);
+ Hunk hunk(l_i - prefix_context + 1, r_i - prefix_context + 1);
+ for (size_t i = prefix_context; i > 0; --i) {
+ hunk.PushLine(' ', left[l_i - i].c_str());
+ }
+
+ // Iterate the edits until we found enough suffix for the hunk or the input
+ // is over.
+ size_t n_suffix = 0;
+ for (; edit_i < edits.size(); ++edit_i) {
+ if (n_suffix >= context) {
+ // Continue only if the next hunk is very close.
+ auto it = edits.begin() + static_cast<int>(edit_i);
+ while (it != edits.end() && *it == kMatch) ++it;
+ if (it == edits.end() ||
+ static_cast<size_t>(it - edits.begin()) - edit_i >= context) {
+ // There is no next edit or it is too far away.
+ break;
+ }
+ }
+
+ EditType edit = edits[edit_i];
+ // Reset count when a non match is found.
+ n_suffix = edit == kMatch ? n_suffix + 1 : 0;
+
+ if (edit == kMatch || edit == kRemove || edit == kReplace) {
+ hunk.PushLine(edit == kMatch ? ' ' : '-', left[l_i].c_str());
+ }
+ if (edit == kAdd || edit == kReplace) {
+ hunk.PushLine('+', right[r_i].c_str());
+ }
+
+ // Advance indices, depending on edit type.
+ l_i += edit != kAdd;
+ r_i += edit != kRemove;
+ }
+
+ if (!hunk.has_edits()) {
+ // We are done. We don't want this hunk.
+ break;
+ }
+
+ hunk.PrintTo(&ss);
+ }
+ return ss.str();
+}
+
+} // namespace edit_distance
+
+namespace {
+
+// The string representation of the values received in EqFailure() are already
+// escaped. Split them on escaped '\n' boundaries. Leave all other escaped
+// characters the same.
+std::vector<std::string> SplitEscapedString(const std::string& str) {
+ std::vector<std::string> lines;
+ size_t start = 0, end = str.size();
+ if (end > 2 && str[0] == '"' && str[end - 1] == '"') {
+ ++start;
+ --end;
+ }
+ bool escaped = false;
+ for (size_t i = start; i + 1 < end; ++i) {
+ if (escaped) {
+ escaped = false;
+ if (str[i] == 'n') {
+ lines.push_back(str.substr(start, i - start - 1));
+ start = i + 1;
+ }
+ } else {
+ escaped = str[i] == '\\';
+ }
+ }
+ lines.push_back(str.substr(start, end - start));
+ return lines;
+}
+
+} // namespace
+
+// Constructs and returns the message for an equality assertion
+// (e.g. ASSERT_EQ, EXPECT_STREQ, etc) failure.
+//
+// The first four parameters are the expressions used in the assertion
+// and their values, as strings. For example, for ASSERT_EQ(foo, bar)
+// where foo is 5 and bar is 6, we have:
+//
+// lhs_expression: "foo"
+// rhs_expression: "bar"
+// lhs_value: "5"
+// rhs_value: "6"
+//
+// The ignoring_case parameter is true if and only if the assertion is a
+// *_STRCASEEQ*. When it's true, the string "Ignoring case" will
+// be inserted into the message.
+AssertionResult EqFailure(const char* lhs_expression,
+ const char* rhs_expression,
+ const std::string& lhs_value,
+ const std::string& rhs_value, bool ignoring_case) {
+ Message msg;
+ msg << "Expected equality of these values:";
+ msg << "\n " << lhs_expression;
+ if (lhs_value != lhs_expression) {
+ msg << "\n Which is: " << lhs_value;
+ }
+ msg << "\n " << rhs_expression;
+ if (rhs_value != rhs_expression) {
+ msg << "\n Which is: " << rhs_value;
+ }
+
+ if (ignoring_case) {
+ msg << "\nIgnoring case";
+ }
+
+ if (!lhs_value.empty() && !rhs_value.empty()) {
+ const std::vector<std::string> lhs_lines = SplitEscapedString(lhs_value);
+ const std::vector<std::string> rhs_lines = SplitEscapedString(rhs_value);
+ if (lhs_lines.size() > 1 || rhs_lines.size() > 1) {
+ msg << "\nWith diff:\n"
+ << edit_distance::CreateUnifiedDiff(lhs_lines, rhs_lines);
+ }
+ }
+
+ return AssertionFailure() << msg;
+}
+
+// Constructs a failure message for Boolean assertions such as EXPECT_TRUE.
+std::string GetBoolAssertionFailureMessage(
+ const AssertionResult& assertion_result, const char* expression_text,
+ const char* actual_predicate_value, const char* expected_predicate_value) {
+ const char* actual_message = assertion_result.message();
+ Message msg;
+ msg << "Value of: " << expression_text
+ << "\n Actual: " << actual_predicate_value;
+ if (actual_message[0] != '\0') msg << " (" << actual_message << ")";
+ msg << "\nExpected: " << expected_predicate_value;
+ return msg.GetString();
+}
+
+// Helper function for implementing ASSERT_NEAR.
+AssertionResult DoubleNearPredFormat(const char* expr1, const char* expr2,
+ const char* abs_error_expr, double val1,
+ double val2, double abs_error) {
+ const double diff = fabs(val1 - val2);
+ if (diff <= abs_error) return AssertionSuccess();
+
+ // Find the value which is closest to zero.
+ const double min_abs = std::min(fabs(val1), fabs(val2));
+ // Find the distance to the next double from that value.
+ const double epsilon =
+ nextafter(min_abs, std::numeric_limits<double>::infinity()) - min_abs;
+ // Detect the case where abs_error is so small that EXPECT_NEAR is
+ // effectively the same as EXPECT_EQUAL, and give an informative error
+ // message so that the situation can be more easily understood without
+ // requiring exotic floating-point knowledge.
+ // Don't do an epsilon check if abs_error is zero because that implies
+ // that an equality check was actually intended.
+ if (!(std::isnan)(val1) && !(std::isnan)(val2) && abs_error > 0 &&
+ abs_error < epsilon) {
+ return AssertionFailure()
+ << "The difference between " << expr1 << " and " << expr2 << " is "
+ << diff << ", where\n"
+ << expr1 << " evaluates to " << val1 << ",\n"
+ << expr2 << " evaluates to " << val2 << ".\nThe abs_error parameter "
+ << abs_error_expr << " evaluates to " << abs_error
+ << " which is smaller than the minimum distance between doubles for "
+ "numbers of this magnitude which is "
+ << epsilon
+ << ", thus making this EXPECT_NEAR check equivalent to "
+ "EXPECT_EQUAL. Consider using EXPECT_DOUBLE_EQ instead.";
+ }
+ return AssertionFailure()
+ << "The difference between " << expr1 << " and " << expr2 << " is "
+ << diff << ", which exceeds " << abs_error_expr << ", where\n"
+ << expr1 << " evaluates to " << val1 << ",\n"
+ << expr2 << " evaluates to " << val2 << ", and\n"
+ << abs_error_expr << " evaluates to " << abs_error << ".";
+}
+
+// Helper template for implementing FloatLE() and DoubleLE().
+template <typename RawType>
+AssertionResult FloatingPointLE(const char* expr1, const char* expr2,
+ RawType val1, RawType val2) {
+ // Returns success if val1 is less than val2,
+ if (val1 < val2) {
+ return AssertionSuccess();
+ }
+
+ // or if val1 is almost equal to val2.
+ const FloatingPoint<RawType> lhs(val1), rhs(val2);
+ if (lhs.AlmostEquals(rhs)) {
+ return AssertionSuccess();
+ }
+
+ // Note that the above two checks will both fail if either val1 or
+ // val2 is NaN, as the IEEE floating-point standard requires that
+ // any predicate involving a NaN must return false.
+
+ ::std::stringstream val1_ss;
+ val1_ss << std::setprecision(std::numeric_limits<RawType>::digits10 + 2)
+ << val1;
+
+ ::std::stringstream val2_ss;
+ val2_ss << std::setprecision(std::numeric_limits<RawType>::digits10 + 2)
+ << val2;
+
+ return AssertionFailure()
+ << "Expected: (" << expr1 << ") <= (" << expr2 << ")\n"
+ << " Actual: " << StringStreamToString(&val1_ss) << " vs "
+ << StringStreamToString(&val2_ss);
+}
+
+} // namespace internal
+
+// Asserts that val1 is less than, or almost equal to, val2. Fails
+// otherwise. In particular, it fails if either val1 or val2 is NaN.
+AssertionResult FloatLE(const char* expr1, const char* expr2, float val1,
+ float val2) {
+ return internal::FloatingPointLE<float>(expr1, expr2, val1, val2);
+}
+
+// Asserts that val1 is less than, or almost equal to, val2. Fails
+// otherwise. In particular, it fails if either val1 or val2 is NaN.
+AssertionResult DoubleLE(const char* expr1, const char* expr2, double val1,
+ double val2) {
+ return internal::FloatingPointLE<double>(expr1, expr2, val1, val2);
+}
+
+namespace internal {
+
+// The helper function for {ASSERT|EXPECT}_STREQ.
+AssertionResult CmpHelperSTREQ(const char* lhs_expression,
+ const char* rhs_expression, const char* lhs,
+ const char* rhs) {
+ if (String::CStringEquals(lhs, rhs)) {
+ return AssertionSuccess();
+ }
+
+ return EqFailure(lhs_expression, rhs_expression, PrintToString(lhs),
+ PrintToString(rhs), false);
+}
+
+// The helper function for {ASSERT|EXPECT}_STRCASEEQ.
+AssertionResult CmpHelperSTRCASEEQ(const char* lhs_expression,
+ const char* rhs_expression, const char* lhs,
+ const char* rhs) {
+ if (String::CaseInsensitiveCStringEquals(lhs, rhs)) {
+ return AssertionSuccess();
+ }
+
+ return EqFailure(lhs_expression, rhs_expression, PrintToString(lhs),
+ PrintToString(rhs), true);
+}
+
+// The helper function for {ASSERT|EXPECT}_STRNE.
+AssertionResult CmpHelperSTRNE(const char* s1_expression,
+ const char* s2_expression, const char* s1,
+ const char* s2) {
+ if (!String::CStringEquals(s1, s2)) {
+ return AssertionSuccess();
+ } else {
+ return AssertionFailure()
+ << "Expected: (" << s1_expression << ") != (" << s2_expression
+ << "), actual: \"" << s1 << "\" vs \"" << s2 << "\"";
+ }
+}
+
+// The helper function for {ASSERT|EXPECT}_STRCASENE.
+AssertionResult CmpHelperSTRCASENE(const char* s1_expression,
+ const char* s2_expression, const char* s1,
+ const char* s2) {
+ if (!String::CaseInsensitiveCStringEquals(s1, s2)) {
+ return AssertionSuccess();
+ } else {
+ return AssertionFailure()
+ << "Expected: (" << s1_expression << ") != (" << s2_expression
+ << ") (ignoring case), actual: \"" << s1 << "\" vs \"" << s2 << "\"";
+ }
+}
+
+} // namespace internal
+
+namespace {
+
+// Helper functions for implementing IsSubString() and IsNotSubstring().
+
+// This group of overloaded functions return true if and only if needle
+// is a substring of haystack. NULL is considered a substring of
+// itself only.
+
+bool IsSubstringPred(const char* needle, const char* haystack) {
+ if (needle == nullptr || haystack == nullptr) return needle == haystack;
+
+ return strstr(haystack, needle) != nullptr;
+}
+
+bool IsSubstringPred(const wchar_t* needle, const wchar_t* haystack) {
+ if (needle == nullptr || haystack == nullptr) return needle == haystack;
+
+ return wcsstr(haystack, needle) != nullptr;
+}
+
+// StringType here can be either ::std::string or ::std::wstring.
+template <typename StringType>
+bool IsSubstringPred(const StringType& needle, const StringType& haystack) {
+ return haystack.find(needle) != StringType::npos;
+}
+
+// This function implements either IsSubstring() or IsNotSubstring(),
+// depending on the value of the expected_to_be_substring parameter.
+// StringType here can be const char*, const wchar_t*, ::std::string,
+// or ::std::wstring.
+template <typename StringType>
+AssertionResult IsSubstringImpl(bool expected_to_be_substring,
+ const char* needle_expr,
+ const char* haystack_expr,
+ const StringType& needle,
+ const StringType& haystack) {
+ if (IsSubstringPred(needle, haystack) == expected_to_be_substring)
+ return AssertionSuccess();
+
+ const bool is_wide_string = sizeof(needle[0]) > 1;
+ const char* const begin_string_quote = is_wide_string ? "L\"" : "\"";
+ return AssertionFailure()
+ << "Value of: " << needle_expr << "\n"
+ << " Actual: " << begin_string_quote << needle << "\"\n"
+ << "Expected: " << (expected_to_be_substring ? "" : "not ")
+ << "a substring of " << haystack_expr << "\n"
+ << "Which is: " << begin_string_quote << haystack << "\"";
+}
+
+} // namespace
+
+// IsSubstring() and IsNotSubstring() check whether needle is a
+// substring of haystack (NULL is considered a substring of itself
+// only), and return an appropriate error message when they fail.
+
+AssertionResult IsSubstring(const char* needle_expr, const char* haystack_expr,
+ const char* needle, const char* haystack) {
+ return IsSubstringImpl(true, needle_expr, haystack_expr, needle, haystack);
+}
+
+AssertionResult IsSubstring(const char* needle_expr, const char* haystack_expr,
+ const wchar_t* needle, const wchar_t* haystack) {
+ return IsSubstringImpl(true, needle_expr, haystack_expr, needle, haystack);
+}
+
+AssertionResult IsNotSubstring(const char* needle_expr,
+ const char* haystack_expr, const char* needle,
+ const char* haystack) {
+ return IsSubstringImpl(false, needle_expr, haystack_expr, needle, haystack);
+}
+
+AssertionResult IsNotSubstring(const char* needle_expr,
+ const char* haystack_expr, const wchar_t* needle,
+ const wchar_t* haystack) {
+ return IsSubstringImpl(false, needle_expr, haystack_expr, needle, haystack);
+}
+
+AssertionResult IsSubstring(const char* needle_expr, const char* haystack_expr,
+ const ::std::string& needle,
+ const ::std::string& haystack) {
+ return IsSubstringImpl(true, needle_expr, haystack_expr, needle, haystack);
+}
+
+AssertionResult IsNotSubstring(const char* needle_expr,
+ const char* haystack_expr,
+ const ::std::string& needle,
+ const ::std::string& haystack) {
+ return IsSubstringImpl(false, needle_expr, haystack_expr, needle, haystack);
+}
+
+#if GTEST_HAS_STD_WSTRING
+AssertionResult IsSubstring(const char* needle_expr, const char* haystack_expr,
+ const ::std::wstring& needle,
+ const ::std::wstring& haystack) {
+ return IsSubstringImpl(true, needle_expr, haystack_expr, needle, haystack);
+}
+
+AssertionResult IsNotSubstring(const char* needle_expr,
+ const char* haystack_expr,
+ const ::std::wstring& needle,
+ const ::std::wstring& haystack) {
+ return IsSubstringImpl(false, needle_expr, haystack_expr, needle, haystack);
+}
+#endif // GTEST_HAS_STD_WSTRING
+
+namespace internal {
+
+#if GTEST_OS_WINDOWS
+
+namespace {
+
+// Helper function for IsHRESULT{SuccessFailure} predicates
+AssertionResult HRESULTFailureHelper(const char* expr, const char* expected,
+ long hr) { // NOLINT
+#if GTEST_OS_WINDOWS_MOBILE || GTEST_OS_WINDOWS_TV_TITLE
+
+ // Windows CE doesn't support FormatMessage.
+ const char error_text[] = "";
+
+#else
+
+ // Looks up the human-readable system message for the HRESULT code
+ // and since we're not passing any params to FormatMessage, we don't
+ // want inserts expanded.
+ const DWORD kFlags =
+ FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS;
+ const DWORD kBufSize = 4096;
+ // Gets the system's human readable message string for this HRESULT.
+ char error_text[kBufSize] = {'\0'};
+ DWORD message_length = ::FormatMessageA(kFlags,
+ 0, // no source, we're asking system
+ static_cast<DWORD>(hr), // the error
+ 0, // no line width restrictions
+ error_text, // output buffer
+ kBufSize, // buf size
+ nullptr); // no arguments for inserts
+ // Trims tailing white space (FormatMessage leaves a trailing CR-LF)
+ for (; message_length && IsSpace(error_text[message_length - 1]);
+ --message_length) {
+ error_text[message_length - 1] = '\0';
+ }
+
+#endif // GTEST_OS_WINDOWS_MOBILE
+
+ const std::string error_hex("0x" + String::FormatHexInt(hr));
+ return ::testing::AssertionFailure()
+ << "Expected: " << expr << " " << expected << ".\n"
+ << " Actual: " << error_hex << " " << error_text << "\n";
+}
+
+} // namespace
+
+AssertionResult IsHRESULTSuccess(const char* expr, long hr) { // NOLINT
+ if (SUCCEEDED(hr)) {
+ return AssertionSuccess();
+ }
+ return HRESULTFailureHelper(expr, "succeeds", hr);
+}
+
+AssertionResult IsHRESULTFailure(const char* expr, long hr) { // NOLINT
+ if (FAILED(hr)) {
+ return AssertionSuccess();
+ }
+ return HRESULTFailureHelper(expr, "fails", hr);
+}
+
+#endif // GTEST_OS_WINDOWS
+
+// Utility functions for encoding Unicode text (wide strings) in
+// UTF-8.
+
+// A Unicode code-point can have up to 21 bits, and is encoded in UTF-8
+// like this:
+//
+// Code-point length Encoding
+// 0 - 7 bits 0xxxxxxx
+// 8 - 11 bits 110xxxxx 10xxxxxx
+// 12 - 16 bits 1110xxxx 10xxxxxx 10xxxxxx
+// 17 - 21 bits 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+
+// The maximum code-point a one-byte UTF-8 sequence can represent.
+constexpr uint32_t kMaxCodePoint1 = (static_cast<uint32_t>(1) << 7) - 1;
+
+// The maximum code-point a two-byte UTF-8 sequence can represent.
+constexpr uint32_t kMaxCodePoint2 = (static_cast<uint32_t>(1) << (5 + 6)) - 1;
+
+// The maximum code-point a three-byte UTF-8 sequence can represent.
+constexpr uint32_t kMaxCodePoint3 =
+ (static_cast<uint32_t>(1) << (4 + 2 * 6)) - 1;
+
+// The maximum code-point a four-byte UTF-8 sequence can represent.
+constexpr uint32_t kMaxCodePoint4 =
+ (static_cast<uint32_t>(1) << (3 + 3 * 6)) - 1;
+
+// Chops off the n lowest bits from a bit pattern. Returns the n
+// lowest bits. As a side effect, the original bit pattern will be
+// shifted to the right by n bits.
+inline uint32_t ChopLowBits(uint32_t* bits, int n) {
+ const uint32_t low_bits = *bits & ((static_cast<uint32_t>(1) << n) - 1);
+ *bits >>= n;
+ return low_bits;
+}
+
+// Converts a Unicode code point to a narrow string in UTF-8 encoding.
+// code_point parameter is of type uint32_t because wchar_t may not be
+// wide enough to contain a code point.
+// If the code_point is not a valid Unicode code point
+// (i.e. outside of Unicode range U+0 to U+10FFFF) it will be converted
+// to "(Invalid Unicode 0xXXXXXXXX)".
+std::string CodePointToUtf8(uint32_t code_point) {
+ if (code_point > kMaxCodePoint4) {
+ return "(Invalid Unicode 0x" + String::FormatHexUInt32(code_point) + ")";
+ }
+
+ char str[5]; // Big enough for the largest valid code point.
+ if (code_point <= kMaxCodePoint1) {
+ str[1] = '\0';
+ str[0] = static_cast<char>(code_point); // 0xxxxxxx
+ } else if (code_point <= kMaxCodePoint2) {
+ str[2] = '\0';
+ str[1] = static_cast<char>(0x80 | ChopLowBits(&code_point, 6)); // 10xxxxxx
+ str[0] = static_cast<char>(0xC0 | code_point); // 110xxxxx
+ } else if (code_point <= kMaxCodePoint3) {
+ str[3] = '\0';
+ str[2] = static_cast<char>(0x80 | ChopLowBits(&code_point, 6)); // 10xxxxxx
+ str[1] = static_cast<char>(0x80 | ChopLowBits(&code_point, 6)); // 10xxxxxx
+ str[0] = static_cast<char>(0xE0 | code_point); // 1110xxxx
+ } else { // code_point <= kMaxCodePoint4
+ str[4] = '\0';
+ str[3] = static_cast<char>(0x80 | ChopLowBits(&code_point, 6)); // 10xxxxxx
+ str[2] = static_cast<char>(0x80 | ChopLowBits(&code_point, 6)); // 10xxxxxx
+ str[1] = static_cast<char>(0x80 | ChopLowBits(&code_point, 6)); // 10xxxxxx
+ str[0] = static_cast<char>(0xF0 | code_point); // 11110xxx
+ }
+ return str;
+}
+
+// The following two functions only make sense if the system
+// uses UTF-16 for wide string encoding. All supported systems
+// with 16 bit wchar_t (Windows, Cygwin) do use UTF-16.
+
+// Determines if the arguments constitute UTF-16 surrogate pair
+// and thus should be combined into a single Unicode code point
+// using CreateCodePointFromUtf16SurrogatePair.
+inline bool IsUtf16SurrogatePair(wchar_t first, wchar_t second) {
+ return sizeof(wchar_t) == 2 && (first & 0xFC00) == 0xD800 &&
+ (second & 0xFC00) == 0xDC00;
+}
+
+// Creates a Unicode code point from UTF16 surrogate pair.
+inline uint32_t CreateCodePointFromUtf16SurrogatePair(wchar_t first,
+ wchar_t second) {
+ const auto first_u = static_cast<uint32_t>(first);
+ const auto second_u = static_cast<uint32_t>(second);
+ const uint32_t mask = (1 << 10) - 1;
+ return (sizeof(wchar_t) == 2)
+ ? (((first_u & mask) << 10) | (second_u & mask)) + 0x10000
+ :
+ // This function should not be called when the condition is
+ // false, but we provide a sensible default in case it is.
+ first_u;
+}
+
+// Converts a wide string to a narrow string in UTF-8 encoding.
+// The wide string is assumed to have the following encoding:
+// UTF-16 if sizeof(wchar_t) == 2 (on Windows, Cygwin)
+// UTF-32 if sizeof(wchar_t) == 4 (on Linux)
+// Parameter str points to a null-terminated wide string.
+// Parameter num_chars may additionally limit the number
+// of wchar_t characters processed. -1 is used when the entire string
+// should be processed.
+// If the string contains code points that are not valid Unicode code points
+// (i.e. outside of Unicode range U+0 to U+10FFFF) they will be output
+// as '(Invalid Unicode 0xXXXXXXXX)'. If the string is in UTF16 encoding
+// and contains invalid UTF-16 surrogate pairs, values in those pairs
+// will be encoded as individual Unicode characters from Basic Normal Plane.
+std::string WideStringToUtf8(const wchar_t* str, int num_chars) {
+ if (num_chars == -1) num_chars = static_cast<int>(wcslen(str));
+
+ ::std::stringstream stream;
+ for (int i = 0; i < num_chars; ++i) {
+ uint32_t unicode_code_point;
+
+ if (str[i] == L'\0') {
+ break;
+ } else if (i + 1 < num_chars && IsUtf16SurrogatePair(str[i], str[i + 1])) {
+ unicode_code_point =
+ CreateCodePointFromUtf16SurrogatePair(str[i], str[i + 1]);
+ i++;
+ } else {
+ unicode_code_point = static_cast<uint32_t>(str[i]);
+ }
+
+ stream << CodePointToUtf8(unicode_code_point);
+ }
+ return StringStreamToString(&stream);
+}
+
+// Converts a wide C string to an std::string using the UTF-8 encoding.
+// NULL will be converted to "(null)".
+std::string String::ShowWideCString(const wchar_t* wide_c_str) {
+ if (wide_c_str == nullptr) return "(null)";
+
+ return internal::WideStringToUtf8(wide_c_str, -1);
+}
+
+// Compares two wide C strings. Returns true if and only if they have the
+// same content.
+//
+// Unlike wcscmp(), this function can handle NULL argument(s). A NULL
+// C string is considered different to any non-NULL C string,
+// including the empty string.
+bool String::WideCStringEquals(const wchar_t* lhs, const wchar_t* rhs) {
+ if (lhs == nullptr) return rhs == nullptr;
+
+ if (rhs == nullptr) return false;
+
+ return wcscmp(lhs, rhs) == 0;
+}
+
+// Helper function for *_STREQ on wide strings.
+AssertionResult CmpHelperSTREQ(const char* lhs_expression,
+ const char* rhs_expression, const wchar_t* lhs,
+ const wchar_t* rhs) {
+ if (String::WideCStringEquals(lhs, rhs)) {
+ return AssertionSuccess();
+ }
+
+ return EqFailure(lhs_expression, rhs_expression, PrintToString(lhs),
+ PrintToString(rhs), false);
+}
+
+// Helper function for *_STRNE on wide strings.
+AssertionResult CmpHelperSTRNE(const char* s1_expression,
+ const char* s2_expression, const wchar_t* s1,
+ const wchar_t* s2) {
+ if (!String::WideCStringEquals(s1, s2)) {
+ return AssertionSuccess();
+ }
+
+ return AssertionFailure()
+ << "Expected: (" << s1_expression << ") != (" << s2_expression
+ << "), actual: " << PrintToString(s1) << " vs " << PrintToString(s2);
+}
+
+// Compares two C strings, ignoring case. Returns true if and only if they have
+// the same content.
+//
+// Unlike strcasecmp(), this function can handle NULL argument(s). A
+// NULL C string is considered different to any non-NULL C string,
+// including the empty string.
+bool String::CaseInsensitiveCStringEquals(const char* lhs, const char* rhs) {
+ if (lhs == nullptr) return rhs == nullptr;
+ if (rhs == nullptr) return false;
+ return posix::StrCaseCmp(lhs, rhs) == 0;
+}
+
+// Compares two wide C strings, ignoring case. Returns true if and only if they
+// have the same content.
+//
+// Unlike wcscasecmp(), this function can handle NULL argument(s).
+// A NULL C string is considered different to any non-NULL wide C string,
+// including the empty string.
+// NB: The implementations on different platforms slightly differ.
+// On windows, this method uses _wcsicmp which compares according to LC_CTYPE
+// environment variable. On GNU platform this method uses wcscasecmp
+// which compares according to LC_CTYPE category of the current locale.
+// On MacOS X, it uses towlower, which also uses LC_CTYPE category of the
+// current locale.
+bool String::CaseInsensitiveWideCStringEquals(const wchar_t* lhs,
+ const wchar_t* rhs) {
+ if (lhs == nullptr) return rhs == nullptr;
+
+ if (rhs == nullptr) return false;
+
+#if GTEST_OS_WINDOWS
+ return _wcsicmp(lhs, rhs) == 0;
+#elif GTEST_OS_LINUX && !GTEST_OS_LINUX_ANDROID
+ return wcscasecmp(lhs, rhs) == 0;
+#else
+ // Android, Mac OS X and Cygwin don't define wcscasecmp.
+ // Other unknown OSes may not define it either.
+ wint_t left, right;
+ do {
+ left = towlower(static_cast<wint_t>(*lhs++));
+ right = towlower(static_cast<wint_t>(*rhs++));
+ } while (left && left == right);
+ return left == right;
+#endif // OS selector
+}
+
+// Returns true if and only if str ends with the given suffix, ignoring case.
+// Any string is considered to end with an empty suffix.
+bool String::EndsWithCaseInsensitive(const std::string& str,
+ const std::string& suffix) {
+ const size_t str_len = str.length();
+ const size_t suffix_len = suffix.length();
+ return (str_len >= suffix_len) &&
+ CaseInsensitiveCStringEquals(str.c_str() + str_len - suffix_len,
+ suffix.c_str());
+}
+
+// Formats an int value as "%02d".
+std::string String::FormatIntWidth2(int value) {
+ return FormatIntWidthN(value, 2);
+}
+
+// Formats an int value to given width with leading zeros.
+std::string String::FormatIntWidthN(int value, int width) {
+ std::stringstream ss;
+ ss << std::setfill('0') << std::setw(width) << value;
+ return ss.str();
+}
+
+// Formats an int value as "%X".
+std::string String::FormatHexUInt32(uint32_t value) {
+ std::stringstream ss;
+ ss << std::hex << std::uppercase << value;
+ return ss.str();
+}
+
+// Formats an int value as "%X".
+std::string String::FormatHexInt(int value) {
+ return FormatHexUInt32(static_cast<uint32_t>(value));
+}
+
+// Formats a byte as "%02X".
+std::string String::FormatByte(unsigned char value) {
+ std::stringstream ss;
+ ss << std::setfill('0') << std::setw(2) << std::hex << std::uppercase
+ << static_cast<unsigned int>(value);
+ return ss.str();
+}
+
+// Converts the buffer in a stringstream to an std::string, converting NUL
+// bytes to "\\0" along the way.
+std::string StringStreamToString(::std::stringstream* ss) {
+ const ::std::string& str = ss->str();
+ const char* const start = str.c_str();
+ const char* const end = start + str.length();
+
+ std::string result;
+ result.reserve(static_cast<size_t>(2 * (end - start)));
+ for (const char* ch = start; ch != end; ++ch) {
+ if (*ch == '\0') {
+ result += "\\0"; // Replaces NUL with "\\0";
+ } else {
+ result += *ch;
+ }
+ }
+
+ return result;
+}
+
+// Appends the user-supplied message to the Google-Test-generated message.
+std::string AppendUserMessage(const std::string& gtest_msg,
+ const Message& user_msg) {
+ // Appends the user message if it's non-empty.
+ const std::string user_msg_string = user_msg.GetString();
+ if (user_msg_string.empty()) {
+ return gtest_msg;
+ }
+ if (gtest_msg.empty()) {
+ return user_msg_string;
+ }
+ return gtest_msg + "\n" + user_msg_string;
+}
+
+} // namespace internal
+
+// class TestResult
+
+// Creates an empty TestResult.
+TestResult::TestResult()
+ : death_test_count_(0), start_timestamp_(0), elapsed_time_(0) {}
+
+// D'tor.
+TestResult::~TestResult() {}
+
+// Returns the i-th test part result among all the results. i can
+// range from 0 to total_part_count() - 1. If i is not in that range,
+// aborts the program.
+const TestPartResult& TestResult::GetTestPartResult(int i) const {
+ if (i < 0 || i >= total_part_count()) internal::posix::Abort();
+ return test_part_results_.at(static_cast<size_t>(i));
+}
+
+// Returns the i-th test property. i can range from 0 to
+// test_property_count() - 1. If i is not in that range, aborts the
+// program.
+const TestProperty& TestResult::GetTestProperty(int i) const {
+ if (i < 0 || i >= test_property_count()) internal::posix::Abort();
+ return test_properties_.at(static_cast<size_t>(i));
+}
+
+// Clears the test part results.
+void TestResult::ClearTestPartResults() { test_part_results_.clear(); }
+
+// Adds a test part result to the list.
+void TestResult::AddTestPartResult(const TestPartResult& test_part_result) {
+ test_part_results_.push_back(test_part_result);
+}
+
+// Adds a test property to the list. If a property with the same key as the
+// supplied property is already represented, the value of this test_property
+// replaces the old value for that key.
+void TestResult::RecordProperty(const std::string& xml_element,
+ const TestProperty& test_property) {
+ if (!ValidateTestProperty(xml_element, test_property)) {
+ return;
+ }
+ internal::MutexLock lock(&test_properties_mutex_);
+ const std::vector<TestProperty>::iterator property_with_matching_key =
+ std::find_if(test_properties_.begin(), test_properties_.end(),
+ internal::TestPropertyKeyIs(test_property.key()));
+ if (property_with_matching_key == test_properties_.end()) {
+ test_properties_.push_back(test_property);
+ return;
+ }
+ property_with_matching_key->SetValue(test_property.value());
+}
+
+// The list of reserved attributes used in the <testsuites> element of XML
+// output.
+static const char* const kReservedTestSuitesAttributes[] = {
+ "disabled", "errors", "failures", "name",
+ "random_seed", "tests", "time", "timestamp"};
+
+// The list of reserved attributes used in the <testsuite> element of XML
+// output.
+static const char* const kReservedTestSuiteAttributes[] = {
+ "disabled", "errors", "failures", "name",
+ "tests", "time", "timestamp", "skipped"};
+
+// The list of reserved attributes used in the <testcase> element of XML output.
+static const char* const kReservedTestCaseAttributes[] = {
+ "classname", "name", "status", "time",
+ "type_param", "value_param", "file", "line"};
+
+// Use a slightly different set for allowed output to ensure existing tests can
+// still RecordProperty("result") or "RecordProperty(timestamp")
+static const char* const kReservedOutputTestCaseAttributes[] = {
+ "classname", "name", "status", "time", "type_param",
+ "value_param", "file", "line", "result", "timestamp"};
+
+template <size_t kSize>
+std::vector<std::string> ArrayAsVector(const char* const (&array)[kSize]) {
+ return std::vector<std::string>(array, array + kSize);
+}
+
+static std::vector<std::string> GetReservedAttributesForElement(
+ const std::string& xml_element) {
+ if (xml_element == "testsuites") {
+ return ArrayAsVector(kReservedTestSuitesAttributes);
+ } else if (xml_element == "testsuite") {
+ return ArrayAsVector(kReservedTestSuiteAttributes);
+ } else if (xml_element == "testcase") {
+ return ArrayAsVector(kReservedTestCaseAttributes);
+ } else {
+ GTEST_CHECK_(false) << "Unrecognized xml_element provided: " << xml_element;
+ }
+ // This code is unreachable but some compilers may not realizes that.
+ return std::vector<std::string>();
+}
+
+// TODO(jdesprez): Merge the two getReserved attributes once skip is improved
+static std::vector<std::string> GetReservedOutputAttributesForElement(
+ const std::string& xml_element) {
+ if (xml_element == "testsuites") {
+ return ArrayAsVector(kReservedTestSuitesAttributes);
+ } else if (xml_element == "testsuite") {
+ return ArrayAsVector(kReservedTestSuiteAttributes);
+ } else if (xml_element == "testcase") {
+ return ArrayAsVector(kReservedOutputTestCaseAttributes);
+ } else {
+ GTEST_CHECK_(false) << "Unrecognized xml_element provided: " << xml_element;
+ }
+ // This code is unreachable but some compilers may not realizes that.
+ return std::vector<std::string>();
+}
+
+static std::string FormatWordList(const std::vector<std::string>& words) {
+ Message word_list;
+ for (size_t i = 0; i < words.size(); ++i) {
+ if (i > 0 && words.size() > 2) {
+ word_list << ", ";
+ }
+ if (i == words.size() - 1) {
+ word_list << "and ";
+ }
+ word_list << "'" << words[i] << "'";
+ }
+ return word_list.GetString();
+}
+
+static bool ValidateTestPropertyName(
+ const std::string& property_name,
+ const std::vector<std::string>& reserved_names) {
+ if (std::find(reserved_names.begin(), reserved_names.end(), property_name) !=
+ reserved_names.end()) {
+ ADD_FAILURE() << "Reserved key used in RecordProperty(): " << property_name
+ << " (" << FormatWordList(reserved_names)
+ << " are reserved by " << GTEST_NAME_ << ")";
+ return false;
+ }
+ return true;
+}
+
+// Adds a failure if the key is a reserved attribute of the element named
+// xml_element. Returns true if the property is valid.
+bool TestResult::ValidateTestProperty(const std::string& xml_element,
+ const TestProperty& test_property) {
+ return ValidateTestPropertyName(test_property.key(),
+ GetReservedAttributesForElement(xml_element));
+}
+
+// Clears the object.
+void TestResult::Clear() {
+ test_part_results_.clear();
+ test_properties_.clear();
+ death_test_count_ = 0;
+ elapsed_time_ = 0;
+}
+
+// Returns true off the test part was skipped.
+static bool TestPartSkipped(const TestPartResult& result) {
+ return result.skipped();
+}
+
+// Returns true if and only if the test was skipped.
+bool TestResult::Skipped() const {
+ return !Failed() && CountIf(test_part_results_, TestPartSkipped) > 0;
+}
+
+// Returns true if and only if the test failed.
+bool TestResult::Failed() const {
+ for (int i = 0; i < total_part_count(); ++i) {
+ if (GetTestPartResult(i).failed()) return true;
+ }
+ return false;
+}
+
+// Returns true if and only if the test part fatally failed.
+static bool TestPartFatallyFailed(const TestPartResult& result) {
+ return result.fatally_failed();
+}
+
+// Returns true if and only if the test fatally failed.
+bool TestResult::HasFatalFailure() const {
+ return CountIf(test_part_results_, TestPartFatallyFailed) > 0;
+}
+
+// Returns true if and only if the test part non-fatally failed.
+static bool TestPartNonfatallyFailed(const TestPartResult& result) {
+ return result.nonfatally_failed();
+}
+
+// Returns true if and only if the test has a non-fatal failure.
+bool TestResult::HasNonfatalFailure() const {
+ return CountIf(test_part_results_, TestPartNonfatallyFailed) > 0;
+}
+
+// Gets the number of all test parts. This is the sum of the number
+// of successful test parts and the number of failed test parts.
+int TestResult::total_part_count() const {
+ return static_cast<int>(test_part_results_.size());
+}
+
+// Returns the number of the test properties.
+int TestResult::test_property_count() const {
+ return static_cast<int>(test_properties_.size());
+}
+
+// class Test
+
+// Creates a Test object.
+
+// The c'tor saves the states of all flags.
+Test::Test() : gtest_flag_saver_(new GTEST_FLAG_SAVER_) {}
+
+// The d'tor restores the states of all flags. The actual work is
+// done by the d'tor of the gtest_flag_saver_ field, and thus not
+// visible here.
+Test::~Test() {}
+
+// Sets up the test fixture.
+//
+// A sub-class may override this.
+void Test::SetUp() {}
+
+// Tears down the test fixture.
+//
+// A sub-class may override this.
+void Test::TearDown() {}
+
+// Allows user supplied key value pairs to be recorded for later output.
+void Test::RecordProperty(const std::string& key, const std::string& value) {
+ UnitTest::GetInstance()->RecordProperty(key, value);
+}
+
+// Allows user supplied key value pairs to be recorded for later output.
+void Test::RecordProperty(const std::string& key, int value) {
+ Message value_message;
+ value_message << value;
+ RecordProperty(key, value_message.GetString().c_str());
+}
+
+namespace internal {
+
+void ReportFailureInUnknownLocation(TestPartResult::Type result_type,
+ const std::string& message) {
+ // This function is a friend of UnitTest and as such has access to
+ // AddTestPartResult.
+ UnitTest::GetInstance()->AddTestPartResult(
+ result_type,
+ nullptr, // No info about the source file where the exception occurred.
+ -1, // We have no info on which line caused the exception.
+ message,
+ ""); // No stack trace, either.
+}
+
+} // namespace internal
+
+// Google Test requires all tests in the same test suite to use the same test
+// fixture class. This function checks if the current test has the
+// same fixture class as the first test in the current test suite. If
+// yes, it returns true; otherwise it generates a Google Test failure and
+// returns false.
+bool Test::HasSameFixtureClass() {
+ internal::UnitTestImpl* const impl = internal::GetUnitTestImpl();
+ const TestSuite* const test_suite = impl->current_test_suite();
+
+ // Info about the first test in the current test suite.
+ const TestInfo* const first_test_info = test_suite->test_info_list()[0];
+ const internal::TypeId first_fixture_id = first_test_info->fixture_class_id_;
+ const char* const first_test_name = first_test_info->name();
+
+ // Info about the current test.
+ const TestInfo* const this_test_info = impl->current_test_info();
+ const internal::TypeId this_fixture_id = this_test_info->fixture_class_id_;
+ const char* const this_test_name = this_test_info->name();
+
+ if (this_fixture_id != first_fixture_id) {
+ // Is the first test defined using TEST?
+ const bool first_is_TEST = first_fixture_id == internal::GetTestTypeId();
+ // Is this test defined using TEST?
+ const bool this_is_TEST = this_fixture_id == internal::GetTestTypeId();
+
+ if (first_is_TEST || this_is_TEST) {
+ // Both TEST and TEST_F appear in same test suite, which is incorrect.
+ // Tell the user how to fix this.
+
+ // Gets the name of the TEST and the name of the TEST_F. Note
+ // that first_is_TEST and this_is_TEST cannot both be true, as
+ // the fixture IDs are different for the two tests.
+ const char* const TEST_name =
+ first_is_TEST ? first_test_name : this_test_name;
+ const char* const TEST_F_name =
+ first_is_TEST ? this_test_name : first_test_name;
+
+ ADD_FAILURE()
+ << "All tests in the same test suite must use the same test fixture\n"
+ << "class, so mixing TEST_F and TEST in the same test suite is\n"
+ << "illegal. In test suite " << this_test_info->test_suite_name()
+ << ",\n"
+ << "test " << TEST_F_name << " is defined using TEST_F but\n"
+ << "test " << TEST_name << " is defined using TEST. You probably\n"
+ << "want to change the TEST to TEST_F or move it to another test\n"
+ << "case.";
+ } else {
+ // Two fixture classes with the same name appear in two different
+ // namespaces, which is not allowed. Tell the user how to fix this.
+ ADD_FAILURE()
+ << "All tests in the same test suite must use the same test fixture\n"
+ << "class. However, in test suite "
+ << this_test_info->test_suite_name() << ",\n"
+ << "you defined test " << first_test_name << " and test "
+ << this_test_name << "\n"
+ << "using two different test fixture classes. This can happen if\n"
+ << "the two classes are from different namespaces or translation\n"
+ << "units and have the same name. You should probably rename one\n"
+ << "of the classes to put the tests into different test suites.";
+ }
+ return false;
+ }
+
+ return true;
+}
+
+#if GTEST_HAS_SEH
+
+// Adds an "exception thrown" fatal failure to the current test. This
+// function returns its result via an output parameter pointer because VC++
+// prohibits creation of objects with destructors on stack in functions
+// using __try (see error C2712).
+static std::string* FormatSehExceptionMessage(DWORD exception_code,
+ const char* location) {
+ Message message;
+ message << "SEH exception with code 0x" << std::setbase(16) << exception_code
+ << std::setbase(10) << " thrown in " << location << ".";
+
+ return new std::string(message.GetString());
+}
+
+#endif // GTEST_HAS_SEH
+
+namespace internal {
+
+#if GTEST_HAS_EXCEPTIONS
+
+// Adds an "exception thrown" fatal failure to the current test.
+static std::string FormatCxxExceptionMessage(const char* description,
+ const char* location) {
+ Message message;
+ if (description != nullptr) {
+ message << "C++ exception with description \"" << description << "\"";
+ } else {
+ message << "Unknown C++ exception";
+ }
+ message << " thrown in " << location << ".";
+
+ return message.GetString();
+}
+
+static std::string PrintTestPartResultToString(
+ const TestPartResult& test_part_result);
+
+GoogleTestFailureException::GoogleTestFailureException(
+ const TestPartResult& failure)
+ : ::std::runtime_error(PrintTestPartResultToString(failure).c_str()) {}
+
+#endif // GTEST_HAS_EXCEPTIONS
+
+// We put these helper functions in the internal namespace as IBM's xlC
+// compiler rejects the code if they were declared static.
+
+// Runs the given method and handles SEH exceptions it throws, when
+// SEH is supported; returns the 0-value for type Result in case of an
+// SEH exception. (Microsoft compilers cannot handle SEH and C++
+// exceptions in the same function. Therefore, we provide a separate
+// wrapper function for handling SEH exceptions.)
+template <class T, typename Result>
+Result HandleSehExceptionsInMethodIfSupported(T* object, Result (T::*method)(),
+ const char* location) {
+#if GTEST_HAS_SEH
+ __try {
+ return (object->*method)();
+ } __except (internal::UnitTestOptions::GTestShouldProcessSEH( // NOLINT
+ GetExceptionCode())) {
+ // We create the exception message on the heap because VC++ prohibits
+ // creation of objects with destructors on stack in functions using __try
+ // (see error C2712).
+ std::string* exception_message =
+ FormatSehExceptionMessage(GetExceptionCode(), location);
+ internal::ReportFailureInUnknownLocation(TestPartResult::kFatalFailure,
+ *exception_message);
+ delete exception_message;
+ return static_cast<Result>(0);
+ }
+#else
+ (void)location;
+ return (object->*method)();
+#endif // GTEST_HAS_SEH
+}
+
+// Runs the given method and catches and reports C++ and/or SEH-style
+// exceptions, if they are supported; returns the 0-value for type
+// Result in case of an SEH exception.
+template <class T, typename Result>
+Result HandleExceptionsInMethodIfSupported(T* object, Result (T::*method)(),
+ const char* location) {
+ // NOTE: The user code can affect the way in which Google Test handles
+ // exceptions by setting GTEST_FLAG(catch_exceptions), but only before
+ // RUN_ALL_TESTS() starts. It is technically possible to check the flag
+ // after the exception is caught and either report or re-throw the
+ // exception based on the flag's value:
+ //
+ // try {
+ // // Perform the test method.
+ // } catch (...) {
+ // if (GTEST_FLAG_GET(catch_exceptions))
+ // // Report the exception as failure.
+ // else
+ // throw; // Re-throws the original exception.
+ // }
+ //
+ // However, the purpose of this flag is to allow the program to drop into
+ // the debugger when the exception is thrown. On most platforms, once the
+ // control enters the catch block, the exception origin information is
+ // lost and the debugger will stop the program at the point of the
+ // re-throw in this function -- instead of at the point of the original
+ // throw statement in the code under test. For this reason, we perform
+ // the check early, sacrificing the ability to affect Google Test's
+ // exception handling in the method where the exception is thrown.
+ if (internal::GetUnitTestImpl()->catch_exceptions()) {
+#if GTEST_HAS_EXCEPTIONS
+ try {
+ return HandleSehExceptionsInMethodIfSupported(object, method, location);
+ } catch (const AssertionException&) { // NOLINT
+ // This failure was reported already.
+ } catch (const internal::GoogleTestFailureException&) { // NOLINT
+ // This exception type can only be thrown by a failed Google
+ // Test assertion with the intention of letting another testing
+ // framework catch it. Therefore we just re-throw it.
+ throw;
+ } catch (const std::exception& e) { // NOLINT
+ internal::ReportFailureInUnknownLocation(
+ TestPartResult::kFatalFailure,
+ FormatCxxExceptionMessage(e.what(), location));
+ } catch (...) { // NOLINT
+ internal::ReportFailureInUnknownLocation(
+ TestPartResult::kFatalFailure,
+ FormatCxxExceptionMessage(nullptr, location));
+ }
+ return static_cast<Result>(0);
+#else
+ return HandleSehExceptionsInMethodIfSupported(object, method, location);
+#endif // GTEST_HAS_EXCEPTIONS
+ } else {
+ return (object->*method)();
+ }
+}
+
+} // namespace internal
+
+// Runs the test and updates the test result.
+void Test::Run() {
+ if (!HasSameFixtureClass()) return;
+
+ internal::UnitTestImpl* const impl = internal::GetUnitTestImpl();
+ impl->os_stack_trace_getter()->UponLeavingGTest();
+ internal::HandleExceptionsInMethodIfSupported(this, &Test::SetUp, "SetUp()");
+ // We will run the test only if SetUp() was successful and didn't call
+ // GTEST_SKIP().
+ if (!HasFatalFailure() && !IsSkipped()) {
+ impl->os_stack_trace_getter()->UponLeavingGTest();
+ internal::HandleExceptionsInMethodIfSupported(this, &Test::TestBody,
+ "the test body");
+ }
+
+ // However, we want to clean up as much as possible. Hence we will
+ // always call TearDown(), even if SetUp() or the test body has
+ // failed.
+ impl->os_stack_trace_getter()->UponLeavingGTest();
+ internal::HandleExceptionsInMethodIfSupported(this, &Test::TearDown,
+ "TearDown()");
+}
+
+// Returns true if and only if the current test has a fatal failure.
+bool Test::HasFatalFailure() {
+ return internal::GetUnitTestImpl()->current_test_result()->HasFatalFailure();
+}
+
+// Returns true if and only if the current test has a non-fatal failure.
+bool Test::HasNonfatalFailure() {
+ return internal::GetUnitTestImpl()
+ ->current_test_result()
+ ->HasNonfatalFailure();
+}
+
+// Returns true if and only if the current test was skipped.
+bool Test::IsSkipped() {
+ return internal::GetUnitTestImpl()->current_test_result()->Skipped();
+}
+
+// class TestInfo
+
+// Constructs a TestInfo object. It assumes ownership of the test factory
+// object.
+TestInfo::TestInfo(const std::string& a_test_suite_name,
+ const std::string& a_name, const char* a_type_param,
+ const char* a_value_param,
+ internal::CodeLocation a_code_location,
+ internal::TypeId fixture_class_id,
+ internal::TestFactoryBase* factory)
+ : test_suite_name_(a_test_suite_name),
+ name_(a_name),
+ type_param_(a_type_param ? new std::string(a_type_param) : nullptr),
+ value_param_(a_value_param ? new std::string(a_value_param) : nullptr),
+ location_(a_code_location),
+ fixture_class_id_(fixture_class_id),
+ should_run_(false),
+ is_disabled_(false),
+ matches_filter_(false),
+ is_in_another_shard_(false),
+ factory_(factory),
+ result_() {}
+
+// Destructs a TestInfo object.
+TestInfo::~TestInfo() { delete factory_; }
+
+namespace internal {
+
+// Creates a new TestInfo object and registers it with Google Test;
+// returns the created object.
+//
+// Arguments:
+//
+// test_suite_name: name of the test suite
+// name: name of the test
+// type_param: the name of the test's type parameter, or NULL if
+// this is not a typed or a type-parameterized test.
+// value_param: text representation of the test's value parameter,
+// or NULL if this is not a value-parameterized test.
+// code_location: code location where the test is defined
+// fixture_class_id: ID of the test fixture class
+// set_up_tc: pointer to the function that sets up the test suite
+// tear_down_tc: pointer to the function that tears down the test suite
+// factory: pointer to the factory that creates a test object.
+// The newly created TestInfo instance will assume
+// ownership of the factory object.
+TestInfo* MakeAndRegisterTestInfo(
+ const char* test_suite_name, const char* name, const char* type_param,
+ const char* value_param, CodeLocation code_location,
+ TypeId fixture_class_id, SetUpTestSuiteFunc set_up_tc,
+ TearDownTestSuiteFunc tear_down_tc, TestFactoryBase* factory) {
+ TestInfo* const test_info =
+ new TestInfo(test_suite_name, name, type_param, value_param,
+ code_location, fixture_class_id, factory);
+ GetUnitTestImpl()->AddTestInfo(set_up_tc, tear_down_tc, test_info);
+ return test_info;
+}
+
+void ReportInvalidTestSuiteType(const char* test_suite_name,
+ CodeLocation code_location) {
+ Message errors;
+ errors
+ << "Attempted redefinition of test suite " << test_suite_name << ".\n"
+ << "All tests in the same test suite must use the same test fixture\n"
+ << "class. However, in test suite " << test_suite_name << ", you tried\n"
+ << "to define a test using a fixture class different from the one\n"
+ << "used earlier. This can happen if the two fixture classes are\n"
+ << "from different namespaces and have the same name. You should\n"
+ << "probably rename one of the classes to put the tests into different\n"
+ << "test suites.";
+
+ GTEST_LOG_(ERROR) << FormatFileLocation(code_location.file.c_str(),
+ code_location.line)
+ << " " << errors.GetString();
+}
+} // namespace internal
+
+namespace {
+
+// A predicate that checks the test name of a TestInfo against a known
+// value.
+//
+// This is used for implementation of the TestSuite class only. We put
+// it in the anonymous namespace to prevent polluting the outer
+// namespace.
+//
+// TestNameIs is copyable.
+class TestNameIs {
+ public:
+ // Constructor.
+ //
+ // TestNameIs has NO default constructor.
+ explicit TestNameIs(const char* name) : name_(name) {}
+
+ // Returns true if and only if the test name of test_info matches name_.
+ bool operator()(const TestInfo* test_info) const {
+ return test_info && test_info->name() == name_;
+ }
+
+ private:
+ std::string name_;
+};
+
+} // namespace
+
+namespace internal {
+
+// This method expands all parameterized tests registered with macros TEST_P
+// and INSTANTIATE_TEST_SUITE_P into regular tests and registers those.
+// This will be done just once during the program runtime.
+void UnitTestImpl::RegisterParameterizedTests() {
+ if (!parameterized_tests_registered_) {
+ parameterized_test_registry_.RegisterTests();
+ type_parameterized_test_registry_.CheckForInstantiations();
+ parameterized_tests_registered_ = true;
+ }
+}
+
+} // namespace internal
+
+// Creates the test object, runs it, records its result, and then
+// deletes it.
+void TestInfo::Run() {
+ TestEventListener* repeater = UnitTest::GetInstance()->listeners().repeater();
+ if (!should_run_) {
+ if (is_disabled_ && matches_filter_) repeater->OnTestDisabled(*this);
+ return;
+ }
+
+ // Tells UnitTest where to store test result.
+ internal::UnitTestImpl* const impl = internal::GetUnitTestImpl();
+ impl->set_current_test_info(this);
+
+ // Notifies the unit test event listeners that a test is about to start.
+ repeater->OnTestStart(*this);
+ result_.set_start_timestamp(internal::GetTimeInMillis());
+ internal::Timer timer;
+ impl->os_stack_trace_getter()->UponLeavingGTest();
+
+ // Creates the test object.
+ Test* const test = internal::HandleExceptionsInMethodIfSupported(
+ factory_, &internal::TestFactoryBase::CreateTest,
+ "the test fixture's constructor");
+
+ // Runs the test if the constructor didn't generate a fatal failure or invoke
+ // GTEST_SKIP().
+ // Note that the object will not be null
+ if (!Test::HasFatalFailure() && !Test::IsSkipped()) {
+ // This doesn't throw as all user code that can throw are wrapped into
+ // exception handling code.
+ test->Run();
+ }
+
+ if (test != nullptr) {
+ // Deletes the test object.
+ impl->os_stack_trace_getter()->UponLeavingGTest();
+ internal::HandleExceptionsInMethodIfSupported(
+ test, &Test::DeleteSelf_, "the test fixture's destructor");
+ }
+
+ result_.set_elapsed_time(timer.Elapsed());
+
+ // Notifies the unit test event listener that a test has just finished.
+ repeater->OnTestEnd(*this);
+
+ // Tells UnitTest to stop associating assertion results to this
+ // test.
+ impl->set_current_test_info(nullptr);
+}
+
+// Skip and records a skipped test result for this object.
+void TestInfo::Skip() {
+ if (!should_run_) return;
+
+ internal::UnitTestImpl* const impl = internal::GetUnitTestImpl();
+ impl->set_current_test_info(this);
+
+ TestEventListener* repeater = UnitTest::GetInstance()->listeners().repeater();
+
+ // Notifies the unit test event listeners that a test is about to start.
+ repeater->OnTestStart(*this);
+
+ const TestPartResult test_part_result =
+ TestPartResult(TestPartResult::kSkip, this->file(), this->line(), "");
+ impl->GetTestPartResultReporterForCurrentThread()->ReportTestPartResult(
+ test_part_result);
+
+ // Notifies the unit test event listener that a test has just finished.
+ repeater->OnTestEnd(*this);
+ impl->set_current_test_info(nullptr);
+}
+
+// class TestSuite
+
+// Gets the number of successful tests in this test suite.
+int TestSuite::successful_test_count() const {
+ return CountIf(test_info_list_, TestPassed);
+}
+
+// Gets the number of successful tests in this test suite.
+int TestSuite::skipped_test_count() const {
+ return CountIf(test_info_list_, TestSkipped);
+}
+
+// Gets the number of failed tests in this test suite.
+int TestSuite::failed_test_count() const {
+ return CountIf(test_info_list_, TestFailed);
+}
+
+// Gets the number of disabled tests that will be reported in the XML report.
+int TestSuite::reportable_disabled_test_count() const {
+ return CountIf(test_info_list_, TestReportableDisabled);
+}
+
+// Gets the number of disabled tests in this test suite.
+int TestSuite::disabled_test_count() const {
+ return CountIf(test_info_list_, TestDisabled);
+}
+
+// Gets the number of tests to be printed in the XML report.
+int TestSuite::reportable_test_count() const {
+ return CountIf(test_info_list_, TestReportable);
+}
+
+// Get the number of tests in this test suite that should run.
+int TestSuite::test_to_run_count() const {
+ return CountIf(test_info_list_, ShouldRunTest);
+}
+
+// Gets the number of all tests.
+int TestSuite::total_test_count() const {
+ return static_cast<int>(test_info_list_.size());
+}
+
+// Creates a TestSuite with the given name.
+//
+// Arguments:
+//
+// a_name: name of the test suite
+// a_type_param: the name of the test suite's type parameter, or NULL if
+// this is not a typed or a type-parameterized test suite.
+// set_up_tc: pointer to the function that sets up the test suite
+// tear_down_tc: pointer to the function that tears down the test suite
+TestSuite::TestSuite(const char* a_name, const char* a_type_param,
+ internal::SetUpTestSuiteFunc set_up_tc,
+ internal::TearDownTestSuiteFunc tear_down_tc)
+ : name_(a_name),
+ type_param_(a_type_param ? new std::string(a_type_param) : nullptr),
+ set_up_tc_(set_up_tc),
+ tear_down_tc_(tear_down_tc),
+ should_run_(false),
+ start_timestamp_(0),
+ elapsed_time_(0) {}
+
+// Destructor of TestSuite.
+TestSuite::~TestSuite() {
+ // Deletes every Test in the collection.
+ ForEach(test_info_list_, internal::Delete<TestInfo>);
+}
+
+// Returns the i-th test among all the tests. i can range from 0 to
+// total_test_count() - 1. If i is not in that range, returns NULL.
+const TestInfo* TestSuite::GetTestInfo(int i) const {
+ const int index = GetElementOr(test_indices_, i, -1);
+ return index < 0 ? nullptr : test_info_list_[static_cast<size_t>(index)];
+}
+
+// Returns the i-th test among all the tests. i can range from 0 to
+// total_test_count() - 1. If i is not in that range, returns NULL.
+TestInfo* TestSuite::GetMutableTestInfo(int i) {
+ const int index = GetElementOr(test_indices_, i, -1);
+ return index < 0 ? nullptr : test_info_list_[static_cast<size_t>(index)];
+}
+
+// Adds a test to this test suite. Will delete the test upon
+// destruction of the TestSuite object.
+void TestSuite::AddTestInfo(TestInfo* test_info) {
+ test_info_list_.push_back(test_info);
+ test_indices_.push_back(static_cast<int>(test_indices_.size()));
+}
+
+// Runs every test in this TestSuite.
+void TestSuite::Run() {
+ if (!should_run_) return;
+
+ internal::UnitTestImpl* const impl = internal::GetUnitTestImpl();
+ impl->set_current_test_suite(this);
+
+ TestEventListener* repeater = UnitTest::GetInstance()->listeners().repeater();
+
+ // Call both legacy and the new API
+ repeater->OnTestSuiteStart(*this);
+// Legacy API is deprecated but still available
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+ repeater->OnTestCaseStart(*this);
+#endif // GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+
+ impl->os_stack_trace_getter()->UponLeavingGTest();
+ internal::HandleExceptionsInMethodIfSupported(
+ this, &TestSuite::RunSetUpTestSuite, "SetUpTestSuite()");
+
+ const bool skip_all = ad_hoc_test_result().Failed();
+
+ start_timestamp_ = internal::GetTimeInMillis();
+ internal::Timer timer;
+ for (int i = 0; i < total_test_count(); i++) {
+ if (skip_all) {
+ GetMutableTestInfo(i)->Skip();
+ } else {
+ GetMutableTestInfo(i)->Run();
+ }
+ if (GTEST_FLAG_GET(fail_fast) &&
+ GetMutableTestInfo(i)->result()->Failed()) {
+ for (int j = i + 1; j < total_test_count(); j++) {
+ GetMutableTestInfo(j)->Skip();
+ }
+ break;
+ }
+ }
+ elapsed_time_ = timer.Elapsed();
+
+ impl->os_stack_trace_getter()->UponLeavingGTest();
+ internal::HandleExceptionsInMethodIfSupported(
+ this, &TestSuite::RunTearDownTestSuite, "TearDownTestSuite()");
+
+ // Call both legacy and the new API
+ repeater->OnTestSuiteEnd(*this);
+// Legacy API is deprecated but still available
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+ repeater->OnTestCaseEnd(*this);
+#endif // GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+
+ impl->set_current_test_suite(nullptr);
+}
+
+// Skips all tests under this TestSuite.
+void TestSuite::Skip() {
+ if (!should_run_) return;
+
+ internal::UnitTestImpl* const impl = internal::GetUnitTestImpl();
+ impl->set_current_test_suite(this);
+
+ TestEventListener* repeater = UnitTest::GetInstance()->listeners().repeater();
+
+ // Call both legacy and the new API
+ repeater->OnTestSuiteStart(*this);
+// Legacy API is deprecated but still available
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+ repeater->OnTestCaseStart(*this);
+#endif // GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+
+ for (int i = 0; i < total_test_count(); i++) {
+ GetMutableTestInfo(i)->Skip();
+ }
+
+ // Call both legacy and the new API
+ repeater->OnTestSuiteEnd(*this);
+ // Legacy API is deprecated but still available
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+ repeater->OnTestCaseEnd(*this);
+#endif // GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+
+ impl->set_current_test_suite(nullptr);
+}
+
+// Clears the results of all tests in this test suite.
+void TestSuite::ClearResult() {
+ ad_hoc_test_result_.Clear();
+ ForEach(test_info_list_, TestInfo::ClearTestResult);
+}
+
+// Shuffles the tests in this test suite.
+void TestSuite::ShuffleTests(internal::Random* random) {
+ Shuffle(random, &test_indices_);
+}
+
+// Restores the test order to before the first shuffle.
+void TestSuite::UnshuffleTests() {
+ for (size_t i = 0; i < test_indices_.size(); i++) {
+ test_indices_[i] = static_cast<int>(i);
+ }
+}
+
+// Formats a countable noun. Depending on its quantity, either the
+// singular form or the plural form is used. e.g.
+//
+// FormatCountableNoun(1, "formula", "formuli") returns "1 formula".
+// FormatCountableNoun(5, "book", "books") returns "5 books".
+static std::string FormatCountableNoun(int count, const char* singular_form,
+ const char* plural_form) {
+ return internal::StreamableToString(count) + " " +
+ (count == 1 ? singular_form : plural_form);
+}
+
+// Formats the count of tests.
+static std::string FormatTestCount(int test_count) {
+ return FormatCountableNoun(test_count, "test", "tests");
+}
+
+// Formats the count of test suites.
+static std::string FormatTestSuiteCount(int test_suite_count) {
+ return FormatCountableNoun(test_suite_count, "test suite", "test suites");
+}
+
+// Converts a TestPartResult::Type enum to human-friendly string
+// representation. Both kNonFatalFailure and kFatalFailure are translated
+// to "Failure", as the user usually doesn't care about the difference
+// between the two when viewing the test result.
+static const char* TestPartResultTypeToString(TestPartResult::Type type) {
+ switch (type) {
+ case TestPartResult::kSkip:
+ return "Skipped\n";
+ case TestPartResult::kSuccess:
+ return "Success";
+
+ case TestPartResult::kNonFatalFailure:
+ case TestPartResult::kFatalFailure:
+#ifdef _MSC_VER
+ return "error: ";
+#else
+ return "Failure\n";
+#endif
+ default:
+ return "Unknown result type";
+ }
+}
+
+namespace internal {
+namespace {
+enum class GTestColor { kDefault, kRed, kGreen, kYellow };
+} // namespace
+
+// Prints a TestPartResult to an std::string.
+static std::string PrintTestPartResultToString(
+ const TestPartResult& test_part_result) {
+ return (Message() << internal::FormatFileLocation(
+ test_part_result.file_name(),
+ test_part_result.line_number())
+ << " "
+ << TestPartResultTypeToString(test_part_result.type())
+ << test_part_result.message())
+ .GetString();
+}
+
+// Prints a TestPartResult.
+static void PrintTestPartResult(const TestPartResult& test_part_result) {
+ const std::string& result = PrintTestPartResultToString(test_part_result);
+ printf("%s\n", result.c_str());
+ fflush(stdout);
+ // If the test program runs in Visual Studio or a debugger, the
+ // following statements add the test part result message to the Output
+ // window such that the user can double-click on it to jump to the
+ // corresponding source code location; otherwise they do nothing.
+#if GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MOBILE
+ // We don't call OutputDebugString*() on Windows Mobile, as printing
+ // to stdout is done by OutputDebugString() there already - we don't
+ // want the same message printed twice.
+ ::OutputDebugStringA(result.c_str());
+ ::OutputDebugStringA("\n");
+#endif
+}
+
+// class PrettyUnitTestResultPrinter
+#if GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MOBILE && !GTEST_OS_WINDOWS_PHONE && \
+ !GTEST_OS_WINDOWS_RT && !GTEST_OS_WINDOWS_MINGW
+
+// Returns the character attribute for the given color.
+static WORD GetColorAttribute(GTestColor color) {
+ switch (color) {
+ case GTestColor::kRed:
+ return FOREGROUND_RED;
+ case GTestColor::kGreen:
+ return FOREGROUND_GREEN;
+ case GTestColor::kYellow:
+ return FOREGROUND_RED | FOREGROUND_GREEN;
+ default:
+ return 0;
+ }
+}
+
+static int GetBitOffset(WORD color_mask) {
+ if (color_mask == 0) return 0;
+
+ int bitOffset = 0;
+ while ((color_mask & 1) == 0) {
+ color_mask >>= 1;
+ ++bitOffset;
+ }
+ return bitOffset;
+}
+
+static WORD GetNewColor(GTestColor color, WORD old_color_attrs) {
+ // Let's reuse the BG
+ static const WORD background_mask = BACKGROUND_BLUE | BACKGROUND_GREEN |
+ BACKGROUND_RED | BACKGROUND_INTENSITY;
+ static const WORD foreground_mask = FOREGROUND_BLUE | FOREGROUND_GREEN |
+ FOREGROUND_RED | FOREGROUND_INTENSITY;
+ const WORD existing_bg = old_color_attrs & background_mask;
+
+ WORD new_color =
+ GetColorAttribute(color) | existing_bg | FOREGROUND_INTENSITY;
+ static const int bg_bitOffset = GetBitOffset(background_mask);
+ static const int fg_bitOffset = GetBitOffset(foreground_mask);
+
+ if (((new_color & background_mask) >> bg_bitOffset) ==
+ ((new_color & foreground_mask) >> fg_bitOffset)) {
+ new_color ^= FOREGROUND_INTENSITY; // invert intensity
+ }
+ return new_color;
+}
+
+#else
+
+// Returns the ANSI color code for the given color. GTestColor::kDefault is
+// an invalid input.
+static const char* GetAnsiColorCode(GTestColor color) {
+ switch (color) {
+ case GTestColor::kRed:
+ return "1";
+ case GTestColor::kGreen:
+ return "2";
+ case GTestColor::kYellow:
+ return "3";
+ default:
+ return nullptr;
+ }
+}
+
+#endif // GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MOBILE
+
+// Returns true if and only if Google Test should use colors in the output.
+bool ShouldUseColor(bool stdout_is_tty) {
+ std::string c = GTEST_FLAG_GET(color);
+ const char* const gtest_color = c.c_str();
+
+ if (String::CaseInsensitiveCStringEquals(gtest_color, "auto")) {
+#if GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MINGW
+ // On Windows the TERM variable is usually not set, but the
+ // console there does support colors.
+ return stdout_is_tty;
+#else
+ // On non-Windows platforms, we rely on the TERM variable.
+ const char* const term = posix::GetEnv("TERM");
+ const bool term_supports_color =
+ String::CStringEquals(term, "xterm") ||
+ String::CStringEquals(term, "xterm-color") ||
+ String::CStringEquals(term, "xterm-256color") ||
+ String::CStringEquals(term, "screen") ||
+ String::CStringEquals(term, "screen-256color") ||
+ String::CStringEquals(term, "tmux") ||
+ String::CStringEquals(term, "tmux-256color") ||
+ String::CStringEquals(term, "rxvt-unicode") ||
+ String::CStringEquals(term, "rxvt-unicode-256color") ||
+ String::CStringEquals(term, "linux") ||
+ String::CStringEquals(term, "cygwin");
+ return stdout_is_tty && term_supports_color;
+#endif // GTEST_OS_WINDOWS
+ }
+
+ return String::CaseInsensitiveCStringEquals(gtest_color, "yes") ||
+ String::CaseInsensitiveCStringEquals(gtest_color, "true") ||
+ String::CaseInsensitiveCStringEquals(gtest_color, "t") ||
+ String::CStringEquals(gtest_color, "1");
+ // We take "yes", "true", "t", and "1" as meaning "yes". If the
+ // value is neither one of these nor "auto", we treat it as "no" to
+ // be conservative.
+}
+
+// Helpers for printing colored strings to stdout. Note that on Windows, we
+// cannot simply emit special characters and have the terminal change colors.
+// This routine must actually emit the characters rather than return a string
+// that would be colored when printed, as can be done on Linux.
+
+GTEST_ATTRIBUTE_PRINTF_(2, 3)
+static void ColoredPrintf(GTestColor color, const char* fmt, ...) {
+ va_list args;
+ va_start(args, fmt);
+
+ static const bool in_color_mode =
+ ShouldUseColor(posix::IsATTY(posix::FileNo(stdout)) != 0);
+ const bool use_color = in_color_mode && (color != GTestColor::kDefault);
+
+ if (!use_color) {
+ vprintf(fmt, args);
+ va_end(args);
+ return;
+ }
+
+#if GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MOBILE && !GTEST_OS_WINDOWS_PHONE && \
+ !GTEST_OS_WINDOWS_RT && !GTEST_OS_WINDOWS_MINGW
+ const HANDLE stdout_handle = GetStdHandle(STD_OUTPUT_HANDLE);
+
+ // Gets the current text color.
+ CONSOLE_SCREEN_BUFFER_INFO buffer_info;
+ GetConsoleScreenBufferInfo(stdout_handle, &buffer_info);
+ const WORD old_color_attrs = buffer_info.wAttributes;
+ const WORD new_color = GetNewColor(color, old_color_attrs);
+
+ // We need to flush the stream buffers into the console before each
+ // SetConsoleTextAttribute call lest it affect the text that is already
+ // printed but has not yet reached the console.
+ fflush(stdout);
+ SetConsoleTextAttribute(stdout_handle, new_color);
+
+ vprintf(fmt, args);
+
+ fflush(stdout);
+ // Restores the text color.
+ SetConsoleTextAttribute(stdout_handle, old_color_attrs);
+#else
+ printf("\033[0;3%sm", GetAnsiColorCode(color));
+ vprintf(fmt, args);
+ printf("\033[m"); // Resets the terminal to default.
+#endif // GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MOBILE
+ va_end(args);
+}
+
+// Text printed in Google Test's text output and --gtest_list_tests
+// output to label the type parameter and value parameter for a test.
+static const char kTypeParamLabel[] = "TypeParam";
+static const char kValueParamLabel[] = "GetParam()";
+
+static void PrintFullTestCommentIfPresent(const TestInfo& test_info) {
+ const char* const type_param = test_info.type_param();
+ const char* const value_param = test_info.value_param();
+
+ if (type_param != nullptr || value_param != nullptr) {
+ printf(", where ");
+ if (type_param != nullptr) {
+ printf("%s = %s", kTypeParamLabel, type_param);
+ if (value_param != nullptr) printf(" and ");
+ }
+ if (value_param != nullptr) {
+ printf("%s = %s", kValueParamLabel, value_param);
+ }
+ }
+}
+
+// This class implements the TestEventListener interface.
+//
+// Class PrettyUnitTestResultPrinter is copyable.
+class PrettyUnitTestResultPrinter : public TestEventListener {
+ public:
+ PrettyUnitTestResultPrinter() {}
+ static void PrintTestName(const char* test_suite, const char* test) {
+ printf("%s.%s", test_suite, test);
+ }
+
+ // The following methods override what's in the TestEventListener class.
+ void OnTestProgramStart(const UnitTest& /*unit_test*/) override {}
+ void OnTestIterationStart(const UnitTest& unit_test, int iteration) override;
+ void OnEnvironmentsSetUpStart(const UnitTest& unit_test) override;
+ void OnEnvironmentsSetUpEnd(const UnitTest& /*unit_test*/) override {}
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+ void OnTestCaseStart(const TestCase& test_case) override;
+#else
+ void OnTestSuiteStart(const TestSuite& test_suite) override;
+#endif // OnTestCaseStart
+
+ void OnTestStart(const TestInfo& test_info) override;
+ void OnTestDisabled(const TestInfo& test_info) override;
+
+ void OnTestPartResult(const TestPartResult& result) override;
+ void OnTestEnd(const TestInfo& test_info) override;
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+ void OnTestCaseEnd(const TestCase& test_case) override;
+#else
+ void OnTestSuiteEnd(const TestSuite& test_suite) override;
+#endif // GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+
+ void OnEnvironmentsTearDownStart(const UnitTest& unit_test) override;
+ void OnEnvironmentsTearDownEnd(const UnitTest& /*unit_test*/) override {}
+ void OnTestIterationEnd(const UnitTest& unit_test, int iteration) override;
+ void OnTestProgramEnd(const UnitTest& /*unit_test*/) override {}
+
+ private:
+ static void PrintFailedTests(const UnitTest& unit_test);
+ static void PrintFailedTestSuites(const UnitTest& unit_test);
+ static void PrintSkippedTests(const UnitTest& unit_test);
+};
+
+// Fired before each iteration of tests starts.
+void PrettyUnitTestResultPrinter::OnTestIterationStart(
+ const UnitTest& unit_test, int iteration) {
+ if (GTEST_FLAG_GET(repeat) != 1)
+ printf("\nRepeating all tests (iteration %d) . . .\n\n", iteration + 1);
+
+ std::string f = GTEST_FLAG_GET(filter);
+ const char* const filter = f.c_str();
+
+ // Prints the filter if it's not *. This reminds the user that some
+ // tests may be skipped.
+ if (!String::CStringEquals(filter, kUniversalFilter)) {
+ ColoredPrintf(GTestColor::kYellow, "Note: %s filter = %s\n", GTEST_NAME_,
+ filter);
+ }
+
+ if (internal::ShouldShard(kTestTotalShards, kTestShardIndex, false)) {
+ const int32_t shard_index = Int32FromEnvOrDie(kTestShardIndex, -1);
+ ColoredPrintf(GTestColor::kYellow, "Note: This is test shard %d of %s.\n",
+ static_cast<int>(shard_index) + 1,
+ internal::posix::GetEnv(kTestTotalShards));
+ }
+
+ if (GTEST_FLAG_GET(shuffle)) {
+ ColoredPrintf(GTestColor::kYellow,
+ "Note: Randomizing tests' orders with a seed of %d .\n",
+ unit_test.random_seed());
+ }
+
+ ColoredPrintf(GTestColor::kGreen, "[==========] ");
+ printf("Running %s from %s.\n",
+ FormatTestCount(unit_test.test_to_run_count()).c_str(),
+ FormatTestSuiteCount(unit_test.test_suite_to_run_count()).c_str());
+ fflush(stdout);
+}
+
+void PrettyUnitTestResultPrinter::OnEnvironmentsSetUpStart(
+ const UnitTest& /*unit_test*/) {
+ ColoredPrintf(GTestColor::kGreen, "[----------] ");
+ printf("Global test environment set-up.\n");
+ fflush(stdout);
+}
+
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+void PrettyUnitTestResultPrinter::OnTestCaseStart(const TestCase& test_case) {
+ const std::string counts =
+ FormatCountableNoun(test_case.test_to_run_count(), "test", "tests");
+ ColoredPrintf(GTestColor::kGreen, "[----------] ");
+ printf("%s from %s", counts.c_str(), test_case.name());
+ if (test_case.type_param() == nullptr) {
+ printf("\n");
+ } else {
+ printf(", where %s = %s\n", kTypeParamLabel, test_case.type_param());
+ }
+ fflush(stdout);
+}
+#else
+void PrettyUnitTestResultPrinter::OnTestSuiteStart(
+ const TestSuite& test_suite) {
+ const std::string counts =
+ FormatCountableNoun(test_suite.test_to_run_count(), "test", "tests");
+ ColoredPrintf(GTestColor::kGreen, "[----------] ");
+ printf("%s from %s", counts.c_str(), test_suite.name());
+ if (test_suite.type_param() == nullptr) {
+ printf("\n");
+ } else {
+ printf(", where %s = %s\n", kTypeParamLabel, test_suite.type_param());
+ }
+ fflush(stdout);
+}
+#endif // GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+
+void PrettyUnitTestResultPrinter::OnTestStart(const TestInfo& test_info) {
+ ColoredPrintf(GTestColor::kGreen, "[ RUN ] ");
+ PrintTestName(test_info.test_suite_name(), test_info.name());
+ printf("\n");
+ fflush(stdout);
+}
+
+void PrettyUnitTestResultPrinter::OnTestDisabled(const TestInfo& test_info) {
+ ColoredPrintf(GTestColor::kYellow, "[ DISABLED ] ");
+ PrintTestName(test_info.test_suite_name(), test_info.name());
+ printf("\n");
+ fflush(stdout);
+}
+
+// Called after an assertion failure.
+void PrettyUnitTestResultPrinter::OnTestPartResult(
+ const TestPartResult& result) {
+ switch (result.type()) {
+ // If the test part succeeded, we don't need to do anything.
+ case TestPartResult::kSuccess:
+ return;
+ default:
+ // Print failure message from the assertion
+ // (e.g. expected this and got that).
+ PrintTestPartResult(result);
+ fflush(stdout);
+ }
+}
+
+void PrettyUnitTestResultPrinter::OnTestEnd(const TestInfo& test_info) {
+ if (test_info.result()->Passed()) {
+ ColoredPrintf(GTestColor::kGreen, "[ OK ] ");
+ } else if (test_info.result()->Skipped()) {
+ ColoredPrintf(GTestColor::kGreen, "[ SKIPPED ] ");
+ } else {
+ ColoredPrintf(GTestColor::kRed, "[ FAILED ] ");
+ }
+ PrintTestName(test_info.test_suite_name(), test_info.name());
+ if (test_info.result()->Failed()) PrintFullTestCommentIfPresent(test_info);
+
+ if (GTEST_FLAG_GET(print_time)) {
+ printf(" (%s ms)\n",
+ internal::StreamableToString(test_info.result()->elapsed_time())
+ .c_str());
+ } else {
+ printf("\n");
+ }
+ fflush(stdout);
+}
+
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+void PrettyUnitTestResultPrinter::OnTestCaseEnd(const TestCase& test_case) {
+ if (!GTEST_FLAG_GET(print_time)) return;
+
+ const std::string counts =
+ FormatCountableNoun(test_case.test_to_run_count(), "test", "tests");
+ ColoredPrintf(GTestColor::kGreen, "[----------] ");
+ printf("%s from %s (%s ms total)\n\n", counts.c_str(), test_case.name(),
+ internal::StreamableToString(test_case.elapsed_time()).c_str());
+ fflush(stdout);
+}
+#else
+void PrettyUnitTestResultPrinter::OnTestSuiteEnd(const TestSuite& test_suite) {
+ if (!GTEST_FLAG_GET(print_time)) return;
+
+ const std::string counts =
+ FormatCountableNoun(test_suite.test_to_run_count(), "test", "tests");
+ ColoredPrintf(GTestColor::kGreen, "[----------] ");
+ printf("%s from %s (%s ms total)\n\n", counts.c_str(), test_suite.name(),
+ internal::StreamableToString(test_suite.elapsed_time()).c_str());
+ fflush(stdout);
+}
+#endif // GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+
+void PrettyUnitTestResultPrinter::OnEnvironmentsTearDownStart(
+ const UnitTest& /*unit_test*/) {
+ ColoredPrintf(GTestColor::kGreen, "[----------] ");
+ printf("Global test environment tear-down\n");
+ fflush(stdout);
+}
+
+// Internal helper for printing the list of failed tests.
+void PrettyUnitTestResultPrinter::PrintFailedTests(const UnitTest& unit_test) {
+ const int failed_test_count = unit_test.failed_test_count();
+ ColoredPrintf(GTestColor::kRed, "[ FAILED ] ");
+ printf("%s, listed below:\n", FormatTestCount(failed_test_count).c_str());
+
+ for (int i = 0; i < unit_test.total_test_suite_count(); ++i) {
+ const TestSuite& test_suite = *unit_test.GetTestSuite(i);
+ if (!test_suite.should_run() || (test_suite.failed_test_count() == 0)) {
+ continue;
+ }
+ for (int j = 0; j < test_suite.total_test_count(); ++j) {
+ const TestInfo& test_info = *test_suite.GetTestInfo(j);
+ if (!test_info.should_run() || !test_info.result()->Failed()) {
+ continue;
+ }
+ ColoredPrintf(GTestColor::kRed, "[ FAILED ] ");
+ printf("%s.%s", test_suite.name(), test_info.name());
+ PrintFullTestCommentIfPresent(test_info);
+ printf("\n");
+ }
+ }
+ printf("\n%2d FAILED %s\n", failed_test_count,
+ failed_test_count == 1 ? "TEST" : "TESTS");
+}
+
+// Internal helper for printing the list of test suite failures not covered by
+// PrintFailedTests.
+void PrettyUnitTestResultPrinter::PrintFailedTestSuites(
+ const UnitTest& unit_test) {
+ int suite_failure_count = 0;
+ for (int i = 0; i < unit_test.total_test_suite_count(); ++i) {
+ const TestSuite& test_suite = *unit_test.GetTestSuite(i);
+ if (!test_suite.should_run()) {
+ continue;
+ }
+ if (test_suite.ad_hoc_test_result().Failed()) {
+ ColoredPrintf(GTestColor::kRed, "[ FAILED ] ");
+ printf("%s: SetUpTestSuite or TearDownTestSuite\n", test_suite.name());
+ ++suite_failure_count;
+ }
+ }
+ if (suite_failure_count > 0) {
+ printf("\n%2d FAILED TEST %s\n", suite_failure_count,
+ suite_failure_count == 1 ? "SUITE" : "SUITES");
+ }
+}
+
+// Internal helper for printing the list of skipped tests.
+void PrettyUnitTestResultPrinter::PrintSkippedTests(const UnitTest& unit_test) {
+ const int skipped_test_count = unit_test.skipped_test_count();
+ if (skipped_test_count == 0) {
+ return;
+ }
+
+ for (int i = 0; i < unit_test.total_test_suite_count(); ++i) {
+ const TestSuite& test_suite = *unit_test.GetTestSuite(i);
+ if (!test_suite.should_run() || (test_suite.skipped_test_count() == 0)) {
+ continue;
+ }
+ for (int j = 0; j < test_suite.total_test_count(); ++j) {
+ const TestInfo& test_info = *test_suite.GetTestInfo(j);
+ if (!test_info.should_run() || !test_info.result()->Skipped()) {
+ continue;
+ }
+ ColoredPrintf(GTestColor::kGreen, "[ SKIPPED ] ");
+ printf("%s.%s", test_suite.name(), test_info.name());
+ printf("\n");
+ }
+ }
+}
+
+void PrettyUnitTestResultPrinter::OnTestIterationEnd(const UnitTest& unit_test,
+ int /*iteration*/) {
+ ColoredPrintf(GTestColor::kGreen, "[==========] ");
+ printf("%s from %s ran.",
+ FormatTestCount(unit_test.test_to_run_count()).c_str(),
+ FormatTestSuiteCount(unit_test.test_suite_to_run_count()).c_str());
+ if (GTEST_FLAG_GET(print_time)) {
+ printf(" (%s ms total)",
+ internal::StreamableToString(unit_test.elapsed_time()).c_str());
+ }
+ printf("\n");
+ ColoredPrintf(GTestColor::kGreen, "[ PASSED ] ");
+ printf("%s.\n", FormatTestCount(unit_test.successful_test_count()).c_str());
+
+ const int skipped_test_count = unit_test.skipped_test_count();
+ if (skipped_test_count > 0) {
+ ColoredPrintf(GTestColor::kGreen, "[ SKIPPED ] ");
+ printf("%s, listed below:\n", FormatTestCount(skipped_test_count).c_str());
+ PrintSkippedTests(unit_test);
+ }
+
+ if (!unit_test.Passed()) {
+ PrintFailedTests(unit_test);
+ PrintFailedTestSuites(unit_test);
+ }
+
+ int num_disabled = unit_test.reportable_disabled_test_count();
+ if (num_disabled && !GTEST_FLAG_GET(also_run_disabled_tests)) {
+ if (unit_test.Passed()) {
+ printf("\n"); // Add a spacer if no FAILURE banner is displayed.
+ }
+ ColoredPrintf(GTestColor::kYellow, " YOU HAVE %d DISABLED %s\n\n",
+ num_disabled, num_disabled == 1 ? "TEST" : "TESTS");
+ }
+ // Ensure that Google Test output is printed before, e.g., heapchecker output.
+ fflush(stdout);
+}
+
+// End PrettyUnitTestResultPrinter
+
+// This class implements the TestEventListener interface.
+//
+// Class BriefUnitTestResultPrinter is copyable.
+class BriefUnitTestResultPrinter : public TestEventListener {
+ public:
+ BriefUnitTestResultPrinter() {}
+ static void PrintTestName(const char* test_suite, const char* test) {
+ printf("%s.%s", test_suite, test);
+ }
+
+ // The following methods override what's in the TestEventListener class.
+ void OnTestProgramStart(const UnitTest& /*unit_test*/) override {}
+ void OnTestIterationStart(const UnitTest& /*unit_test*/,
+ int /*iteration*/) override {}
+ void OnEnvironmentsSetUpStart(const UnitTest& /*unit_test*/) override {}
+ void OnEnvironmentsSetUpEnd(const UnitTest& /*unit_test*/) override {}
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+ void OnTestCaseStart(const TestCase& /*test_case*/) override {}
+#else
+ void OnTestSuiteStart(const TestSuite& /*test_suite*/) override {}
+#endif // OnTestCaseStart
+
+ void OnTestStart(const TestInfo& /*test_info*/) override {}
+ void OnTestDisabled(const TestInfo& /*test_info*/) override {}
+
+ void OnTestPartResult(const TestPartResult& result) override;
+ void OnTestEnd(const TestInfo& test_info) override;
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+ void OnTestCaseEnd(const TestCase& /*test_case*/) override {}
+#else
+ void OnTestSuiteEnd(const TestSuite& /*test_suite*/) override {}
+#endif // GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+
+ void OnEnvironmentsTearDownStart(const UnitTest& /*unit_test*/) override {}
+ void OnEnvironmentsTearDownEnd(const UnitTest& /*unit_test*/) override {}
+ void OnTestIterationEnd(const UnitTest& unit_test, int iteration) override;
+ void OnTestProgramEnd(const UnitTest& /*unit_test*/) override {}
+};
+
+// Called after an assertion failure.
+void BriefUnitTestResultPrinter::OnTestPartResult(
+ const TestPartResult& result) {
+ switch (result.type()) {
+ // If the test part succeeded, we don't need to do anything.
+ case TestPartResult::kSuccess:
+ return;
+ default:
+ // Print failure message from the assertion
+ // (e.g. expected this and got that).
+ PrintTestPartResult(result);
+ fflush(stdout);
+ }
+}
+
+void BriefUnitTestResultPrinter::OnTestEnd(const TestInfo& test_info) {
+ if (test_info.result()->Failed()) {
+ ColoredPrintf(GTestColor::kRed, "[ FAILED ] ");
+ PrintTestName(test_info.test_suite_name(), test_info.name());
+ PrintFullTestCommentIfPresent(test_info);
+
+ if (GTEST_FLAG_GET(print_time)) {
+ printf(" (%s ms)\n",
+ internal::StreamableToString(test_info.result()->elapsed_time())
+ .c_str());
+ } else {
+ printf("\n");
+ }
+ fflush(stdout);
+ }
+}
+
+void BriefUnitTestResultPrinter::OnTestIterationEnd(const UnitTest& unit_test,
+ int /*iteration*/) {
+ ColoredPrintf(GTestColor::kGreen, "[==========] ");
+ printf("%s from %s ran.",
+ FormatTestCount(unit_test.test_to_run_count()).c_str(),
+ FormatTestSuiteCount(unit_test.test_suite_to_run_count()).c_str());
+ if (GTEST_FLAG_GET(print_time)) {
+ printf(" (%s ms total)",
+ internal::StreamableToString(unit_test.elapsed_time()).c_str());
+ }
+ printf("\n");
+ ColoredPrintf(GTestColor::kGreen, "[ PASSED ] ");
+ printf("%s.\n", FormatTestCount(unit_test.successful_test_count()).c_str());
+
+ const int skipped_test_count = unit_test.skipped_test_count();
+ if (skipped_test_count > 0) {
+ ColoredPrintf(GTestColor::kGreen, "[ SKIPPED ] ");
+ printf("%s.\n", FormatTestCount(skipped_test_count).c_str());
+ }
+
+ int num_disabled = unit_test.reportable_disabled_test_count();
+ if (num_disabled && !GTEST_FLAG_GET(also_run_disabled_tests)) {
+ if (unit_test.Passed()) {
+ printf("\n"); // Add a spacer if no FAILURE banner is displayed.
+ }
+ ColoredPrintf(GTestColor::kYellow, " YOU HAVE %d DISABLED %s\n\n",
+ num_disabled, num_disabled == 1 ? "TEST" : "TESTS");
+ }
+ // Ensure that Google Test output is printed before, e.g., heapchecker output.
+ fflush(stdout);
+}
+
+// End BriefUnitTestResultPrinter
+
+// class TestEventRepeater
+//
+// This class forwards events to other event listeners.
+class TestEventRepeater : public TestEventListener {
+ public:
+ TestEventRepeater() : forwarding_enabled_(true) {}
+ ~TestEventRepeater() override;
+ void Append(TestEventListener* listener);
+ TestEventListener* Release(TestEventListener* listener);
+
+ // Controls whether events will be forwarded to listeners_. Set to false
+ // in death test child processes.
+ bool forwarding_enabled() const { return forwarding_enabled_; }
+ void set_forwarding_enabled(bool enable) { forwarding_enabled_ = enable; }
+
+ void OnTestProgramStart(const UnitTest& unit_test) override;
+ void OnTestIterationStart(const UnitTest& unit_test, int iteration) override;
+ void OnEnvironmentsSetUpStart(const UnitTest& unit_test) override;
+ void OnEnvironmentsSetUpEnd(const UnitTest& unit_test) override;
+// Legacy API is deprecated but still available
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+ void OnTestCaseStart(const TestSuite& parameter) override;
+#endif // GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+ void OnTestSuiteStart(const TestSuite& parameter) override;
+ void OnTestStart(const TestInfo& test_info) override;
+ void OnTestDisabled(const TestInfo& test_info) override;
+ void OnTestPartResult(const TestPartResult& result) override;
+ void OnTestEnd(const TestInfo& test_info) override;
+// Legacy API is deprecated but still available
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+ void OnTestCaseEnd(const TestCase& parameter) override;
+#endif // GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+ void OnTestSuiteEnd(const TestSuite& parameter) override;
+ void OnEnvironmentsTearDownStart(const UnitTest& unit_test) override;
+ void OnEnvironmentsTearDownEnd(const UnitTest& unit_test) override;
+ void OnTestIterationEnd(const UnitTest& unit_test, int iteration) override;
+ void OnTestProgramEnd(const UnitTest& unit_test) override;
+
+ private:
+ // Controls whether events will be forwarded to listeners_. Set to false
+ // in death test child processes.
+ bool forwarding_enabled_;
+ // The list of listeners that receive events.
+ std::vector<TestEventListener*> listeners_;
+
+ TestEventRepeater(const TestEventRepeater&) = delete;
+ TestEventRepeater& operator=(const TestEventRepeater&) = delete;
+};
+
+TestEventRepeater::~TestEventRepeater() {
+ ForEach(listeners_, Delete<TestEventListener>);
+}
+
+void TestEventRepeater::Append(TestEventListener* listener) {
+ listeners_.push_back(listener);
+}
+
+TestEventListener* TestEventRepeater::Release(TestEventListener* listener) {
+ for (size_t i = 0; i < listeners_.size(); ++i) {
+ if (listeners_[i] == listener) {
+ listeners_.erase(listeners_.begin() + static_cast<int>(i));
+ return listener;
+ }
+ }
+
+ return nullptr;
+}
+
+// Since most methods are very similar, use macros to reduce boilerplate.
+// This defines a member that forwards the call to all listeners.
+#define GTEST_REPEATER_METHOD_(Name, Type) \
+ void TestEventRepeater::Name(const Type& parameter) { \
+ if (forwarding_enabled_) { \
+ for (size_t i = 0; i < listeners_.size(); i++) { \
+ listeners_[i]->Name(parameter); \
+ } \
+ } \
+ }
+// This defines a member that forwards the call to all listeners in reverse
+// order.
+#define GTEST_REVERSE_REPEATER_METHOD_(Name, Type) \
+ void TestEventRepeater::Name(const Type& parameter) { \
+ if (forwarding_enabled_) { \
+ for (size_t i = listeners_.size(); i != 0; i--) { \
+ listeners_[i - 1]->Name(parameter); \
+ } \
+ } \
+ }
+
+GTEST_REPEATER_METHOD_(OnTestProgramStart, UnitTest)
+GTEST_REPEATER_METHOD_(OnEnvironmentsSetUpStart, UnitTest)
+// Legacy API is deprecated but still available
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+GTEST_REPEATER_METHOD_(OnTestCaseStart, TestSuite)
+#endif // GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+GTEST_REPEATER_METHOD_(OnTestSuiteStart, TestSuite)
+GTEST_REPEATER_METHOD_(OnTestStart, TestInfo)
+GTEST_REPEATER_METHOD_(OnTestDisabled, TestInfo)
+GTEST_REPEATER_METHOD_(OnTestPartResult, TestPartResult)
+GTEST_REPEATER_METHOD_(OnEnvironmentsTearDownStart, UnitTest)
+GTEST_REVERSE_REPEATER_METHOD_(OnEnvironmentsSetUpEnd, UnitTest)
+GTEST_REVERSE_REPEATER_METHOD_(OnEnvironmentsTearDownEnd, UnitTest)
+GTEST_REVERSE_REPEATER_METHOD_(OnTestEnd, TestInfo)
+// Legacy API is deprecated but still available
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+GTEST_REVERSE_REPEATER_METHOD_(OnTestCaseEnd, TestSuite)
+#endif // GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+GTEST_REVERSE_REPEATER_METHOD_(OnTestSuiteEnd, TestSuite)
+GTEST_REVERSE_REPEATER_METHOD_(OnTestProgramEnd, UnitTest)
+
+#undef GTEST_REPEATER_METHOD_
+#undef GTEST_REVERSE_REPEATER_METHOD_
+
+void TestEventRepeater::OnTestIterationStart(const UnitTest& unit_test,
+ int iteration) {
+ if (forwarding_enabled_) {
+ for (size_t i = 0; i < listeners_.size(); i++) {
+ listeners_[i]->OnTestIterationStart(unit_test, iteration);
+ }
+ }
+}
+
+void TestEventRepeater::OnTestIterationEnd(const UnitTest& unit_test,
+ int iteration) {
+ if (forwarding_enabled_) {
+ for (size_t i = listeners_.size(); i > 0; i--) {
+ listeners_[i - 1]->OnTestIterationEnd(unit_test, iteration);
+ }
+ }
+}
+
+// End TestEventRepeater
+
+// This class generates an XML output file.
+class XmlUnitTestResultPrinter : public EmptyTestEventListener {
+ public:
+ explicit XmlUnitTestResultPrinter(const char* output_file);
+
+ void OnTestIterationEnd(const UnitTest& unit_test, int iteration) override;
+ void ListTestsMatchingFilter(const std::vector<TestSuite*>& test_suites);
+
+ // Prints an XML summary of all unit tests.
+ static void PrintXmlTestsList(std::ostream* stream,
+ const std::vector<TestSuite*>& test_suites);
+
+ private:
+ // Is c a whitespace character that is normalized to a space character
+ // when it appears in an XML attribute value?
+ static bool IsNormalizableWhitespace(unsigned char c) {
+ return c == '\t' || c == '\n' || c == '\r';
+ }
+
+ // May c appear in a well-formed XML document?
+ // https://www.w3.org/TR/REC-xml/#charsets
+ static bool IsValidXmlCharacter(unsigned char c) {
+ return IsNormalizableWhitespace(c) || c >= 0x20;
+ }
+
+ // Returns an XML-escaped copy of the input string str. If
+ // is_attribute is true, the text is meant to appear as an attribute
+ // value, and normalizable whitespace is preserved by replacing it
+ // with character references.
+ static std::string EscapeXml(const std::string& str, bool is_attribute);
+
+ // Returns the given string with all characters invalid in XML removed.
+ static std::string RemoveInvalidXmlCharacters(const std::string& str);
+
+ // Convenience wrapper around EscapeXml when str is an attribute value.
+ static std::string EscapeXmlAttribute(const std::string& str) {
+ return EscapeXml(str, true);
+ }
+
+ // Convenience wrapper around EscapeXml when str is not an attribute value.
+ static std::string EscapeXmlText(const char* str) {
+ return EscapeXml(str, false);
+ }
+
+ // Verifies that the given attribute belongs to the given element and
+ // streams the attribute as XML.
+ static void OutputXmlAttribute(std::ostream* stream,
+ const std::string& element_name,
+ const std::string& name,
+ const std::string& value);
+
+ // Streams an XML CDATA section, escaping invalid CDATA sequences as needed.
+ static void OutputXmlCDataSection(::std::ostream* stream, const char* data);
+
+ // Streams a test suite XML stanza containing the given test result.
+ //
+ // Requires: result.Failed()
+ static void OutputXmlTestSuiteForTestResult(::std::ostream* stream,
+ const TestResult& result);
+
+ // Streams an XML representation of a TestResult object.
+ static void OutputXmlTestResult(::std::ostream* stream,
+ const TestResult& result);
+
+ // Streams an XML representation of a TestInfo object.
+ static void OutputXmlTestInfo(::std::ostream* stream,
+ const char* test_suite_name,
+ const TestInfo& test_info);
+
+ // Prints an XML representation of a TestSuite object
+ static void PrintXmlTestSuite(::std::ostream* stream,
+ const TestSuite& test_suite);
+
+ // Prints an XML summary of unit_test to output stream out.
+ static void PrintXmlUnitTest(::std::ostream* stream,
+ const UnitTest& unit_test);
+
+ // Produces a string representing the test properties in a result as space
+ // delimited XML attributes based on the property key="value" pairs.
+ // When the std::string is not empty, it includes a space at the beginning,
+ // to delimit this attribute from prior attributes.
+ static std::string TestPropertiesAsXmlAttributes(const TestResult& result);
+
+ // Streams an XML representation of the test properties of a TestResult
+ // object.
+ static void OutputXmlTestProperties(std::ostream* stream,
+ const TestResult& result);
+
+ // The output file.
+ const std::string output_file_;
+
+ XmlUnitTestResultPrinter(const XmlUnitTestResultPrinter&) = delete;
+ XmlUnitTestResultPrinter& operator=(const XmlUnitTestResultPrinter&) = delete;
+};
+
+// Creates a new XmlUnitTestResultPrinter.
+XmlUnitTestResultPrinter::XmlUnitTestResultPrinter(const char* output_file)
+ : output_file_(output_file) {
+ if (output_file_.empty()) {
+ GTEST_LOG_(FATAL) << "XML output file may not be null";
+ }
+}
+
+// Called after the unit test ends.
+void XmlUnitTestResultPrinter::OnTestIterationEnd(const UnitTest& unit_test,
+ int /*iteration*/) {
+ FILE* xmlout = OpenFileForWriting(output_file_);
+ std::stringstream stream;
+ PrintXmlUnitTest(&stream, unit_test);
+ fprintf(xmlout, "%s", StringStreamToString(&stream).c_str());
+ fclose(xmlout);
+}
+
+void XmlUnitTestResultPrinter::ListTestsMatchingFilter(
+ const std::vector<TestSuite*>& test_suites) {
+ FILE* xmlout = OpenFileForWriting(output_file_);
+ std::stringstream stream;
+ PrintXmlTestsList(&stream, test_suites);
+ fprintf(xmlout, "%s", StringStreamToString(&stream).c_str());
+ fclose(xmlout);
+}
+
+// Returns an XML-escaped copy of the input string str. If is_attribute
+// is true, the text is meant to appear as an attribute value, and
+// normalizable whitespace is preserved by replacing it with character
+// references.
+//
+// Invalid XML characters in str, if any, are stripped from the output.
+// It is expected that most, if not all, of the text processed by this
+// module will consist of ordinary English text.
+// If this module is ever modified to produce version 1.1 XML output,
+// most invalid characters can be retained using character references.
+std::string XmlUnitTestResultPrinter::EscapeXml(const std::string& str,
+ bool is_attribute) {
+ Message m;
+
+ for (size_t i = 0; i < str.size(); ++i) {
+ const char ch = str[i];
+ switch (ch) {
+ case '<':
+ m << "&lt;";
+ break;
+ case '>':
+ m << "&gt;";
+ break;
+ case '&':
+ m << "&amp;";
+ break;
+ case '\'':
+ if (is_attribute)
+ m << "&apos;";
+ else
+ m << '\'';
+ break;
+ case '"':
+ if (is_attribute)
+ m << "&quot;";
+ else
+ m << '"';
+ break;
+ default:
+ if (IsValidXmlCharacter(static_cast<unsigned char>(ch))) {
+ if (is_attribute &&
+ IsNormalizableWhitespace(static_cast<unsigned char>(ch)))
+ m << "&#x" << String::FormatByte(static_cast<unsigned char>(ch))
+ << ";";
+ else
+ m << ch;
+ }
+ break;
+ }
+ }
+
+ return m.GetString();
+}
+
+// Returns the given string with all characters invalid in XML removed.
+// Currently invalid characters are dropped from the string. An
+// alternative is to replace them with certain characters such as . or ?.
+std::string XmlUnitTestResultPrinter::RemoveInvalidXmlCharacters(
+ const std::string& str) {
+ std::string output;
+ output.reserve(str.size());
+ for (std::string::const_iterator it = str.begin(); it != str.end(); ++it)
+ if (IsValidXmlCharacter(static_cast<unsigned char>(*it)))
+ output.push_back(*it);
+
+ return output;
+}
+
+// The following routines generate an XML representation of a UnitTest
+// object.
+//
+// This is how Google Test concepts map to the DTD:
+//
+// <testsuites name="AllTests"> <-- corresponds to a UnitTest object
+// <testsuite name="testcase-name"> <-- corresponds to a TestSuite object
+// <testcase name="test-name"> <-- corresponds to a TestInfo object
+// <failure message="...">...</failure>
+// <failure message="...">...</failure>
+// <failure message="...">...</failure>
+// <-- individual assertion failures
+// </testcase>
+// </testsuite>
+// </testsuites>
+
+// Formats the given time in milliseconds as seconds.
+std::string FormatTimeInMillisAsSeconds(TimeInMillis ms) {
+ ::std::stringstream ss;
+ ss << (static_cast<double>(ms) * 1e-3);
+ return ss.str();
+}
+
+static bool PortableLocaltime(time_t seconds, struct tm* out) {
+#if defined(_MSC_VER)
+ return localtime_s(out, &seconds) == 0;
+#elif defined(__MINGW32__) || defined(__MINGW64__)
+ // MINGW <time.h> provides neither localtime_r nor localtime_s, but uses
+ // Windows' localtime(), which has a thread-local tm buffer.
+ struct tm* tm_ptr = localtime(&seconds); // NOLINT
+ if (tm_ptr == nullptr) return false;
+ *out = *tm_ptr;
+ return true;
+#elif defined(__STDC_LIB_EXT1__)
+ // Uses localtime_s when available as localtime_r is only available from
+ // C23 standard.
+ return localtime_s(&seconds, out) != nullptr;
+#else
+ return localtime_r(&seconds, out) != nullptr;
+#endif
+}
+
+// Converts the given epoch time in milliseconds to a date string in the ISO
+// 8601 format, without the timezone information.
+std::string FormatEpochTimeInMillisAsIso8601(TimeInMillis ms) {
+ struct tm time_struct;
+ if (!PortableLocaltime(static_cast<time_t>(ms / 1000), &time_struct))
+ return "";
+ // YYYY-MM-DDThh:mm:ss.sss
+ return StreamableToString(time_struct.tm_year + 1900) + "-" +
+ String::FormatIntWidth2(time_struct.tm_mon + 1) + "-" +
+ String::FormatIntWidth2(time_struct.tm_mday) + "T" +
+ String::FormatIntWidth2(time_struct.tm_hour) + ":" +
+ String::FormatIntWidth2(time_struct.tm_min) + ":" +
+ String::FormatIntWidth2(time_struct.tm_sec) + "." +
+ String::FormatIntWidthN(static_cast<int>(ms % 1000), 3);
+}
+
+// Streams an XML CDATA section, escaping invalid CDATA sequences as needed.
+void XmlUnitTestResultPrinter::OutputXmlCDataSection(::std::ostream* stream,
+ const char* data) {
+ const char* segment = data;
+ *stream << "<![CDATA[";
+ for (;;) {
+ const char* const next_segment = strstr(segment, "]]>");
+ if (next_segment != nullptr) {
+ stream->write(segment,
+ static_cast<std::streamsize>(next_segment - segment));
+ *stream << "]]>]]&gt;<![CDATA[";
+ segment = next_segment + strlen("]]>");
+ } else {
+ *stream << segment;
+ break;
+ }
+ }
+ *stream << "]]>";
+}
+
+void XmlUnitTestResultPrinter::OutputXmlAttribute(
+ std::ostream* stream, const std::string& element_name,
+ const std::string& name, const std::string& value) {
+ const std::vector<std::string>& allowed_names =
+ GetReservedOutputAttributesForElement(element_name);
+
+ GTEST_CHECK_(std::find(allowed_names.begin(), allowed_names.end(), name) !=
+ allowed_names.end())
+ << "Attribute " << name << " is not allowed for element <" << element_name
+ << ">.";
+
+ *stream << " " << name << "=\"" << EscapeXmlAttribute(value) << "\"";
+}
+
+// Streams a test suite XML stanza containing the given test result.
+void XmlUnitTestResultPrinter::OutputXmlTestSuiteForTestResult(
+ ::std::ostream* stream, const TestResult& result) {
+ // Output the boilerplate for a minimal test suite with one test.
+ *stream << " <testsuite";
+ OutputXmlAttribute(stream, "testsuite", "name", "NonTestSuiteFailure");
+ OutputXmlAttribute(stream, "testsuite", "tests", "1");
+ OutputXmlAttribute(stream, "testsuite", "failures", "1");
+ OutputXmlAttribute(stream, "testsuite", "disabled", "0");
+ OutputXmlAttribute(stream, "testsuite", "skipped", "0");
+ OutputXmlAttribute(stream, "testsuite", "errors", "0");
+ OutputXmlAttribute(stream, "testsuite", "time",
+ FormatTimeInMillisAsSeconds(result.elapsed_time()));
+ OutputXmlAttribute(
+ stream, "testsuite", "timestamp",
+ FormatEpochTimeInMillisAsIso8601(result.start_timestamp()));
+ *stream << ">";
+
+ // Output the boilerplate for a minimal test case with a single test.
+ *stream << " <testcase";
+ OutputXmlAttribute(stream, "testcase", "name", "");
+ OutputXmlAttribute(stream, "testcase", "status", "run");
+ OutputXmlAttribute(stream, "testcase", "result", "completed");
+ OutputXmlAttribute(stream, "testcase", "classname", "");
+ OutputXmlAttribute(stream, "testcase", "time",
+ FormatTimeInMillisAsSeconds(result.elapsed_time()));
+ OutputXmlAttribute(
+ stream, "testcase", "timestamp",
+ FormatEpochTimeInMillisAsIso8601(result.start_timestamp()));
+
+ // Output the actual test result.
+ OutputXmlTestResult(stream, result);
+
+ // Complete the test suite.
+ *stream << " </testsuite>\n";
+}
+
+// Prints an XML representation of a TestInfo object.
+void XmlUnitTestResultPrinter::OutputXmlTestInfo(::std::ostream* stream,
+ const char* test_suite_name,
+ const TestInfo& test_info) {
+ const TestResult& result = *test_info.result();
+ const std::string kTestsuite = "testcase";
+
+ if (test_info.is_in_another_shard()) {
+ return;
+ }
+
+ *stream << " <testcase";
+ OutputXmlAttribute(stream, kTestsuite, "name", test_info.name());
+
+ if (test_info.value_param() != nullptr) {
+ OutputXmlAttribute(stream, kTestsuite, "value_param",
+ test_info.value_param());
+ }
+ if (test_info.type_param() != nullptr) {
+ OutputXmlAttribute(stream, kTestsuite, "type_param",
+ test_info.type_param());
+ }
+
+ OutputXmlAttribute(stream, kTestsuite, "file", test_info.file());
+ OutputXmlAttribute(stream, kTestsuite, "line",
+ StreamableToString(test_info.line()));
+ if (GTEST_FLAG_GET(list_tests)) {
+ *stream << " />\n";
+ return;
+ }
+
+ OutputXmlAttribute(stream, kTestsuite, "status",
+ test_info.should_run() ? "run" : "notrun");
+ OutputXmlAttribute(stream, kTestsuite, "result",
+ test_info.should_run()
+ ? (result.Skipped() ? "skipped" : "completed")
+ : "suppressed");
+ OutputXmlAttribute(stream, kTestsuite, "time",
+ FormatTimeInMillisAsSeconds(result.elapsed_time()));
+ OutputXmlAttribute(
+ stream, kTestsuite, "timestamp",
+ FormatEpochTimeInMillisAsIso8601(result.start_timestamp()));
+ OutputXmlAttribute(stream, kTestsuite, "classname", test_suite_name);
+
+ OutputXmlTestResult(stream, result);
+}
+
+void XmlUnitTestResultPrinter::OutputXmlTestResult(::std::ostream* stream,
+ const TestResult& result) {
+ int failures = 0;
+ int skips = 0;
+ for (int i = 0; i < result.total_part_count(); ++i) {
+ const TestPartResult& part = result.GetTestPartResult(i);
+ if (part.failed()) {
+ if (++failures == 1 && skips == 0) {
+ *stream << ">\n";
+ }
+ const std::string location =
+ internal::FormatCompilerIndependentFileLocation(part.file_name(),
+ part.line_number());
+ const std::string summary = location + "\n" + part.summary();
+ *stream << " <failure message=\"" << EscapeXmlAttribute(summary)
+ << "\" type=\"\">";
+ const std::string detail = location + "\n" + part.message();
+ OutputXmlCDataSection(stream, RemoveInvalidXmlCharacters(detail).c_str());
+ *stream << "</failure>\n";
+ } else if (part.skipped()) {
+ if (++skips == 1 && failures == 0) {
+ *stream << ">\n";
+ }
+ const std::string location =
+ internal::FormatCompilerIndependentFileLocation(part.file_name(),
+ part.line_number());
+ const std::string summary = location + "\n" + part.summary();
+ *stream << " <skipped message=\""
+ << EscapeXmlAttribute(summary.c_str()) << "\">";
+ const std::string detail = location + "\n" + part.message();
+ OutputXmlCDataSection(stream, RemoveInvalidXmlCharacters(detail).c_str());
+ *stream << "</skipped>\n";
+ }
+ }
+
+ if (failures == 0 && skips == 0 && result.test_property_count() == 0) {
+ *stream << " />\n";
+ } else {
+ if (failures == 0 && skips == 0) {
+ *stream << ">\n";
+ }
+ OutputXmlTestProperties(stream, result);
+ *stream << " </testcase>\n";
+ }
+}
+
+// Prints an XML representation of a TestSuite object
+void XmlUnitTestResultPrinter::PrintXmlTestSuite(std::ostream* stream,
+ const TestSuite& test_suite) {
+ const std::string kTestsuite = "testsuite";
+ *stream << " <" << kTestsuite;
+ OutputXmlAttribute(stream, kTestsuite, "name", test_suite.name());
+ OutputXmlAttribute(stream, kTestsuite, "tests",
+ StreamableToString(test_suite.reportable_test_count()));
+ if (!GTEST_FLAG_GET(list_tests)) {
+ OutputXmlAttribute(stream, kTestsuite, "failures",
+ StreamableToString(test_suite.failed_test_count()));
+ OutputXmlAttribute(
+ stream, kTestsuite, "disabled",
+ StreamableToString(test_suite.reportable_disabled_test_count()));
+ OutputXmlAttribute(stream, kTestsuite, "skipped",
+ StreamableToString(test_suite.skipped_test_count()));
+
+ OutputXmlAttribute(stream, kTestsuite, "errors", "0");
+
+ OutputXmlAttribute(stream, kTestsuite, "time",
+ FormatTimeInMillisAsSeconds(test_suite.elapsed_time()));
+ OutputXmlAttribute(
+ stream, kTestsuite, "timestamp",
+ FormatEpochTimeInMillisAsIso8601(test_suite.start_timestamp()));
+ *stream << TestPropertiesAsXmlAttributes(test_suite.ad_hoc_test_result());
+ }
+ *stream << ">\n";
+ for (int i = 0; i < test_suite.total_test_count(); ++i) {
+ if (test_suite.GetTestInfo(i)->is_reportable())
+ OutputXmlTestInfo(stream, test_suite.name(), *test_suite.GetTestInfo(i));
+ }
+ *stream << " </" << kTestsuite << ">\n";
+}
+
+// Prints an XML summary of unit_test to output stream out.
+void XmlUnitTestResultPrinter::PrintXmlUnitTest(std::ostream* stream,
+ const UnitTest& unit_test) {
+ const std::string kTestsuites = "testsuites";
+
+ *stream << "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
+ *stream << "<" << kTestsuites;
+
+ OutputXmlAttribute(stream, kTestsuites, "tests",
+ StreamableToString(unit_test.reportable_test_count()));
+ OutputXmlAttribute(stream, kTestsuites, "failures",
+ StreamableToString(unit_test.failed_test_count()));
+ OutputXmlAttribute(
+ stream, kTestsuites, "disabled",
+ StreamableToString(unit_test.reportable_disabled_test_count()));
+ OutputXmlAttribute(stream, kTestsuites, "errors", "0");
+ OutputXmlAttribute(stream, kTestsuites, "time",
+ FormatTimeInMillisAsSeconds(unit_test.elapsed_time()));
+ OutputXmlAttribute(
+ stream, kTestsuites, "timestamp",
+ FormatEpochTimeInMillisAsIso8601(unit_test.start_timestamp()));
+
+ if (GTEST_FLAG_GET(shuffle)) {
+ OutputXmlAttribute(stream, kTestsuites, "random_seed",
+ StreamableToString(unit_test.random_seed()));
+ }
+ *stream << TestPropertiesAsXmlAttributes(unit_test.ad_hoc_test_result());
+
+ OutputXmlAttribute(stream, kTestsuites, "name", "AllTests");
+ *stream << ">\n";
+
+ for (int i = 0; i < unit_test.total_test_suite_count(); ++i) {
+ if (unit_test.GetTestSuite(i)->reportable_test_count() > 0)
+ PrintXmlTestSuite(stream, *unit_test.GetTestSuite(i));
+ }
+
+ // If there was a test failure outside of one of the test suites (like in a
+ // test environment) include that in the output.
+ if (unit_test.ad_hoc_test_result().Failed()) {
+ OutputXmlTestSuiteForTestResult(stream, unit_test.ad_hoc_test_result());
+ }
+
+ *stream << "</" << kTestsuites << ">\n";
+}
+
+void XmlUnitTestResultPrinter::PrintXmlTestsList(
+ std::ostream* stream, const std::vector<TestSuite*>& test_suites) {
+ const std::string kTestsuites = "testsuites";
+
+ *stream << "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
+ *stream << "<" << kTestsuites;
+
+ int total_tests = 0;
+ for (auto test_suite : test_suites) {
+ total_tests += test_suite->total_test_count();
+ }
+ OutputXmlAttribute(stream, kTestsuites, "tests",
+ StreamableToString(total_tests));
+ OutputXmlAttribute(stream, kTestsuites, "name", "AllTests");
+ *stream << ">\n";
+
+ for (auto test_suite : test_suites) {
+ PrintXmlTestSuite(stream, *test_suite);
+ }
+ *stream << "</" << kTestsuites << ">\n";
+}
+
+// Produces a string representing the test properties in a result as space
+// delimited XML attributes based on the property key="value" pairs.
+std::string XmlUnitTestResultPrinter::TestPropertiesAsXmlAttributes(
+ const TestResult& result) {
+ Message attributes;
+ for (int i = 0; i < result.test_property_count(); ++i) {
+ const TestProperty& property = result.GetTestProperty(i);
+ attributes << " " << property.key() << "="
+ << "\"" << EscapeXmlAttribute(property.value()) << "\"";
+ }
+ return attributes.GetString();
+}
+
+void XmlUnitTestResultPrinter::OutputXmlTestProperties(
+ std::ostream* stream, const TestResult& result) {
+ const std::string kProperties = "properties";
+ const std::string kProperty = "property";
+
+ if (result.test_property_count() <= 0) {
+ return;
+ }
+
+ *stream << " <" << kProperties << ">\n";
+ for (int i = 0; i < result.test_property_count(); ++i) {
+ const TestProperty& property = result.GetTestProperty(i);
+ *stream << " <" << kProperty;
+ *stream << " name=\"" << EscapeXmlAttribute(property.key()) << "\"";
+ *stream << " value=\"" << EscapeXmlAttribute(property.value()) << "\"";
+ *stream << "/>\n";
+ }
+ *stream << " </" << kProperties << ">\n";
+}
+
+// End XmlUnitTestResultPrinter
+
+// This class generates an JSON output file.
+class JsonUnitTestResultPrinter : public EmptyTestEventListener {
+ public:
+ explicit JsonUnitTestResultPrinter(const char* output_file);
+
+ void OnTestIterationEnd(const UnitTest& unit_test, int iteration) override;
+
+ // Prints an JSON summary of all unit tests.
+ static void PrintJsonTestList(::std::ostream* stream,
+ const std::vector<TestSuite*>& test_suites);
+
+ private:
+ // Returns an JSON-escaped copy of the input string str.
+ static std::string EscapeJson(const std::string& str);
+
+ //// Verifies that the given attribute belongs to the given element and
+ //// streams the attribute as JSON.
+ static void OutputJsonKey(std::ostream* stream,
+ const std::string& element_name,
+ const std::string& name, const std::string& value,
+ const std::string& indent, bool comma = true);
+ static void OutputJsonKey(std::ostream* stream,
+ const std::string& element_name,
+ const std::string& name, int value,
+ const std::string& indent, bool comma = true);
+
+ // Streams a test suite JSON stanza containing the given test result.
+ //
+ // Requires: result.Failed()
+ static void OutputJsonTestSuiteForTestResult(::std::ostream* stream,
+ const TestResult& result);
+
+ // Streams a JSON representation of a TestResult object.
+ static void OutputJsonTestResult(::std::ostream* stream,
+ const TestResult& result);
+
+ // Streams a JSON representation of a TestInfo object.
+ static void OutputJsonTestInfo(::std::ostream* stream,
+ const char* test_suite_name,
+ const TestInfo& test_info);
+
+ // Prints a JSON representation of a TestSuite object
+ static void PrintJsonTestSuite(::std::ostream* stream,
+ const TestSuite& test_suite);
+
+ // Prints a JSON summary of unit_test to output stream out.
+ static void PrintJsonUnitTest(::std::ostream* stream,
+ const UnitTest& unit_test);
+
+ // Produces a string representing the test properties in a result as
+ // a JSON dictionary.
+ static std::string TestPropertiesAsJson(const TestResult& result,
+ const std::string& indent);
+
+ // The output file.
+ const std::string output_file_;
+
+ JsonUnitTestResultPrinter(const JsonUnitTestResultPrinter&) = delete;
+ JsonUnitTestResultPrinter& operator=(const JsonUnitTestResultPrinter&) =
+ delete;
+};
+
+// Creates a new JsonUnitTestResultPrinter.
+JsonUnitTestResultPrinter::JsonUnitTestResultPrinter(const char* output_file)
+ : output_file_(output_file) {
+ if (output_file_.empty()) {
+ GTEST_LOG_(FATAL) << "JSON output file may not be null";
+ }
+}
+
+void JsonUnitTestResultPrinter::OnTestIterationEnd(const UnitTest& unit_test,
+ int /*iteration*/) {
+ FILE* jsonout = OpenFileForWriting(output_file_);
+ std::stringstream stream;
+ PrintJsonUnitTest(&stream, unit_test);
+ fprintf(jsonout, "%s", StringStreamToString(&stream).c_str());
+ fclose(jsonout);
+}
+
+// Returns an JSON-escaped copy of the input string str.
+std::string JsonUnitTestResultPrinter::EscapeJson(const std::string& str) {
+ Message m;
+
+ for (size_t i = 0; i < str.size(); ++i) {
+ const char ch = str[i];
+ switch (ch) {
+ case '\\':
+ case '"':
+ case '/':
+ m << '\\' << ch;
+ break;
+ case '\b':
+ m << "\\b";
+ break;
+ case '\t':
+ m << "\\t";
+ break;
+ case '\n':
+ m << "\\n";
+ break;
+ case '\f':
+ m << "\\f";
+ break;
+ case '\r':
+ m << "\\r";
+ break;
+ default:
+ if (ch < ' ') {
+ m << "\\u00" << String::FormatByte(static_cast<unsigned char>(ch));
+ } else {
+ m << ch;
+ }
+ break;
+ }
+ }
+
+ return m.GetString();
+}
+
+// The following routines generate an JSON representation of a UnitTest
+// object.
+
+// Formats the given time in milliseconds as seconds.
+static std::string FormatTimeInMillisAsDuration(TimeInMillis ms) {
+ ::std::stringstream ss;
+ ss << (static_cast<double>(ms) * 1e-3) << "s";
+ return ss.str();
+}
+
+// Converts the given epoch time in milliseconds to a date string in the
+// RFC3339 format, without the timezone information.
+static std::string FormatEpochTimeInMillisAsRFC3339(TimeInMillis ms) {
+ struct tm time_struct;
+ if (!PortableLocaltime(static_cast<time_t>(ms / 1000), &time_struct))
+ return "";
+ // YYYY-MM-DDThh:mm:ss
+ return StreamableToString(time_struct.tm_year + 1900) + "-" +
+ String::FormatIntWidth2(time_struct.tm_mon + 1) + "-" +
+ String::FormatIntWidth2(time_struct.tm_mday) + "T" +
+ String::FormatIntWidth2(time_struct.tm_hour) + ":" +
+ String::FormatIntWidth2(time_struct.tm_min) + ":" +
+ String::FormatIntWidth2(time_struct.tm_sec) + "Z";
+}
+
+static inline std::string Indent(size_t width) {
+ return std::string(width, ' ');
+}
+
+void JsonUnitTestResultPrinter::OutputJsonKey(std::ostream* stream,
+ const std::string& element_name,
+ const std::string& name,
+ const std::string& value,
+ const std::string& indent,
+ bool comma) {
+ const std::vector<std::string>& allowed_names =
+ GetReservedOutputAttributesForElement(element_name);
+
+ GTEST_CHECK_(std::find(allowed_names.begin(), allowed_names.end(), name) !=
+ allowed_names.end())
+ << "Key \"" << name << "\" is not allowed for value \"" << element_name
+ << "\".";
+
+ *stream << indent << "\"" << name << "\": \"" << EscapeJson(value) << "\"";
+ if (comma) *stream << ",\n";
+}
+
+void JsonUnitTestResultPrinter::OutputJsonKey(
+ std::ostream* stream, const std::string& element_name,
+ const std::string& name, int value, const std::string& indent, bool comma) {
+ const std::vector<std::string>& allowed_names =
+ GetReservedOutputAttributesForElement(element_name);
+
+ GTEST_CHECK_(std::find(allowed_names.begin(), allowed_names.end(), name) !=
+ allowed_names.end())
+ << "Key \"" << name << "\" is not allowed for value \"" << element_name
+ << "\".";
+
+ *stream << indent << "\"" << name << "\": " << StreamableToString(value);
+ if (comma) *stream << ",\n";
+}
+
+// Streams a test suite JSON stanza containing the given test result.
+void JsonUnitTestResultPrinter::OutputJsonTestSuiteForTestResult(
+ ::std::ostream* stream, const TestResult& result) {
+ // Output the boilerplate for a new test suite.
+ *stream << Indent(4) << "{\n";
+ OutputJsonKey(stream, "testsuite", "name", "NonTestSuiteFailure", Indent(6));
+ OutputJsonKey(stream, "testsuite", "tests", 1, Indent(6));
+ if (!GTEST_FLAG_GET(list_tests)) {
+ OutputJsonKey(stream, "testsuite", "failures", 1, Indent(6));
+ OutputJsonKey(stream, "testsuite", "disabled", 0, Indent(6));
+ OutputJsonKey(stream, "testsuite", "skipped", 0, Indent(6));
+ OutputJsonKey(stream, "testsuite", "errors", 0, Indent(6));
+ OutputJsonKey(stream, "testsuite", "time",
+ FormatTimeInMillisAsDuration(result.elapsed_time()),
+ Indent(6));
+ OutputJsonKey(stream, "testsuite", "timestamp",
+ FormatEpochTimeInMillisAsRFC3339(result.start_timestamp()),
+ Indent(6));
+ }
+ *stream << Indent(6) << "\"testsuite\": [\n";
+
+ // Output the boilerplate for a new test case.
+ *stream << Indent(8) << "{\n";
+ OutputJsonKey(stream, "testcase", "name", "", Indent(10));
+ OutputJsonKey(stream, "testcase", "status", "RUN", Indent(10));
+ OutputJsonKey(stream, "testcase", "result", "COMPLETED", Indent(10));
+ OutputJsonKey(stream, "testcase", "timestamp",
+ FormatEpochTimeInMillisAsRFC3339(result.start_timestamp()),
+ Indent(10));
+ OutputJsonKey(stream, "testcase", "time",
+ FormatTimeInMillisAsDuration(result.elapsed_time()),
+ Indent(10));
+ OutputJsonKey(stream, "testcase", "classname", "", Indent(10), false);
+ *stream << TestPropertiesAsJson(result, Indent(10));
+
+ // Output the actual test result.
+ OutputJsonTestResult(stream, result);
+
+ // Finish the test suite.
+ *stream << "\n" << Indent(6) << "]\n" << Indent(4) << "}";
+}
+
+// Prints a JSON representation of a TestInfo object.
+void JsonUnitTestResultPrinter::OutputJsonTestInfo(::std::ostream* stream,
+ const char* test_suite_name,
+ const TestInfo& test_info) {
+ const TestResult& result = *test_info.result();
+ const std::string kTestsuite = "testcase";
+ const std::string kIndent = Indent(10);
+
+ *stream << Indent(8) << "{\n";
+ OutputJsonKey(stream, kTestsuite, "name", test_info.name(), kIndent);
+
+ if (test_info.value_param() != nullptr) {
+ OutputJsonKey(stream, kTestsuite, "value_param", test_info.value_param(),
+ kIndent);
+ }
+ if (test_info.type_param() != nullptr) {
+ OutputJsonKey(stream, kTestsuite, "type_param", test_info.type_param(),
+ kIndent);
+ }
+
+ OutputJsonKey(stream, kTestsuite, "file", test_info.file(), kIndent);
+ OutputJsonKey(stream, kTestsuite, "line", test_info.line(), kIndent, false);
+ if (GTEST_FLAG_GET(list_tests)) {
+ *stream << "\n" << Indent(8) << "}";
+ return;
+ } else {
+ *stream << ",\n";
+ }
+
+ OutputJsonKey(stream, kTestsuite, "status",
+ test_info.should_run() ? "RUN" : "NOTRUN", kIndent);
+ OutputJsonKey(stream, kTestsuite, "result",
+ test_info.should_run()
+ ? (result.Skipped() ? "SKIPPED" : "COMPLETED")
+ : "SUPPRESSED",
+ kIndent);
+ OutputJsonKey(stream, kTestsuite, "timestamp",
+ FormatEpochTimeInMillisAsRFC3339(result.start_timestamp()),
+ kIndent);
+ OutputJsonKey(stream, kTestsuite, "time",
+ FormatTimeInMillisAsDuration(result.elapsed_time()), kIndent);
+ OutputJsonKey(stream, kTestsuite, "classname", test_suite_name, kIndent,
+ false);
+ *stream << TestPropertiesAsJson(result, kIndent);
+
+ OutputJsonTestResult(stream, result);
+}
+
+void JsonUnitTestResultPrinter::OutputJsonTestResult(::std::ostream* stream,
+ const TestResult& result) {
+ const std::string kIndent = Indent(10);
+
+ int failures = 0;
+ for (int i = 0; i < result.total_part_count(); ++i) {
+ const TestPartResult& part = result.GetTestPartResult(i);
+ if (part.failed()) {
+ *stream << ",\n";
+ if (++failures == 1) {
+ *stream << kIndent << "\""
+ << "failures"
+ << "\": [\n";
+ }
+ const std::string location =
+ internal::FormatCompilerIndependentFileLocation(part.file_name(),
+ part.line_number());
+ const std::string message = EscapeJson(location + "\n" + part.message());
+ *stream << kIndent << " {\n"
+ << kIndent << " \"failure\": \"" << message << "\",\n"
+ << kIndent << " \"type\": \"\"\n"
+ << kIndent << " }";
+ }
+ }
+
+ if (failures > 0) *stream << "\n" << kIndent << "]";
+ *stream << "\n" << Indent(8) << "}";
+}
+
+// Prints an JSON representation of a TestSuite object
+void JsonUnitTestResultPrinter::PrintJsonTestSuite(
+ std::ostream* stream, const TestSuite& test_suite) {
+ const std::string kTestsuite = "testsuite";
+ const std::string kIndent = Indent(6);
+
+ *stream << Indent(4) << "{\n";
+ OutputJsonKey(stream, kTestsuite, "name", test_suite.name(), kIndent);
+ OutputJsonKey(stream, kTestsuite, "tests", test_suite.reportable_test_count(),
+ kIndent);
+ if (!GTEST_FLAG_GET(list_tests)) {
+ OutputJsonKey(stream, kTestsuite, "failures",
+ test_suite.failed_test_count(), kIndent);
+ OutputJsonKey(stream, kTestsuite, "disabled",
+ test_suite.reportable_disabled_test_count(), kIndent);
+ OutputJsonKey(stream, kTestsuite, "errors", 0, kIndent);
+ OutputJsonKey(
+ stream, kTestsuite, "timestamp",
+ FormatEpochTimeInMillisAsRFC3339(test_suite.start_timestamp()),
+ kIndent);
+ OutputJsonKey(stream, kTestsuite, "time",
+ FormatTimeInMillisAsDuration(test_suite.elapsed_time()),
+ kIndent, false);
+ *stream << TestPropertiesAsJson(test_suite.ad_hoc_test_result(), kIndent)
+ << ",\n";
+ }
+
+ *stream << kIndent << "\"" << kTestsuite << "\": [\n";
+
+ bool comma = false;
+ for (int i = 0; i < test_suite.total_test_count(); ++i) {
+ if (test_suite.GetTestInfo(i)->is_reportable()) {
+ if (comma) {
+ *stream << ",\n";
+ } else {
+ comma = true;
+ }
+ OutputJsonTestInfo(stream, test_suite.name(), *test_suite.GetTestInfo(i));
+ }
+ }
+ *stream << "\n" << kIndent << "]\n" << Indent(4) << "}";
+}
+
+// Prints a JSON summary of unit_test to output stream out.
+void JsonUnitTestResultPrinter::PrintJsonUnitTest(std::ostream* stream,
+ const UnitTest& unit_test) {
+ const std::string kTestsuites = "testsuites";
+ const std::string kIndent = Indent(2);
+ *stream << "{\n";
+
+ OutputJsonKey(stream, kTestsuites, "tests", unit_test.reportable_test_count(),
+ kIndent);
+ OutputJsonKey(stream, kTestsuites, "failures", unit_test.failed_test_count(),
+ kIndent);
+ OutputJsonKey(stream, kTestsuites, "disabled",
+ unit_test.reportable_disabled_test_count(), kIndent);
+ OutputJsonKey(stream, kTestsuites, "errors", 0, kIndent);
+ if (GTEST_FLAG_GET(shuffle)) {
+ OutputJsonKey(stream, kTestsuites, "random_seed", unit_test.random_seed(),
+ kIndent);
+ }
+ OutputJsonKey(stream, kTestsuites, "timestamp",
+ FormatEpochTimeInMillisAsRFC3339(unit_test.start_timestamp()),
+ kIndent);
+ OutputJsonKey(stream, kTestsuites, "time",
+ FormatTimeInMillisAsDuration(unit_test.elapsed_time()), kIndent,
+ false);
+
+ *stream << TestPropertiesAsJson(unit_test.ad_hoc_test_result(), kIndent)
+ << ",\n";
+
+ OutputJsonKey(stream, kTestsuites, "name", "AllTests", kIndent);
+ *stream << kIndent << "\"" << kTestsuites << "\": [\n";
+
+ bool comma = false;
+ for (int i = 0; i < unit_test.total_test_suite_count(); ++i) {
+ if (unit_test.GetTestSuite(i)->reportable_test_count() > 0) {
+ if (comma) {
+ *stream << ",\n";
+ } else {
+ comma = true;
+ }
+ PrintJsonTestSuite(stream, *unit_test.GetTestSuite(i));
+ }
+ }
+
+ // If there was a test failure outside of one of the test suites (like in a
+ // test environment) include that in the output.
+ if (unit_test.ad_hoc_test_result().Failed()) {
+ OutputJsonTestSuiteForTestResult(stream, unit_test.ad_hoc_test_result());
+ }
+
+ *stream << "\n"
+ << kIndent << "]\n"
+ << "}\n";
+}
+
+void JsonUnitTestResultPrinter::PrintJsonTestList(
+ std::ostream* stream, const std::vector<TestSuite*>& test_suites) {
+ const std::string kTestsuites = "testsuites";
+ const std::string kIndent = Indent(2);
+ *stream << "{\n";
+ int total_tests = 0;
+ for (auto test_suite : test_suites) {
+ total_tests += test_suite->total_test_count();
+ }
+ OutputJsonKey(stream, kTestsuites, "tests", total_tests, kIndent);
+
+ OutputJsonKey(stream, kTestsuites, "name", "AllTests", kIndent);
+ *stream << kIndent << "\"" << kTestsuites << "\": [\n";
+
+ for (size_t i = 0; i < test_suites.size(); ++i) {
+ if (i != 0) {
+ *stream << ",\n";
+ }
+ PrintJsonTestSuite(stream, *test_suites[i]);
+ }
+
+ *stream << "\n"
+ << kIndent << "]\n"
+ << "}\n";
+}
+// Produces a string representing the test properties in a result as
+// a JSON dictionary.
+std::string JsonUnitTestResultPrinter::TestPropertiesAsJson(
+ const TestResult& result, const std::string& indent) {
+ Message attributes;
+ for (int i = 0; i < result.test_property_count(); ++i) {
+ const TestProperty& property = result.GetTestProperty(i);
+ attributes << ",\n"
+ << indent << "\"" << property.key() << "\": "
+ << "\"" << EscapeJson(property.value()) << "\"";
+ }
+ return attributes.GetString();
+}
+
+// End JsonUnitTestResultPrinter
+
+#if GTEST_CAN_STREAM_RESULTS_
+
+// Checks if str contains '=', '&', '%' or '\n' characters. If yes,
+// replaces them by "%xx" where xx is their hexadecimal value. For
+// example, replaces "=" with "%3D". This algorithm is O(strlen(str))
+// in both time and space -- important as the input str may contain an
+// arbitrarily long test failure message and stack trace.
+std::string StreamingListener::UrlEncode(const char* str) {
+ std::string result;
+ result.reserve(strlen(str) + 1);
+ for (char ch = *str; ch != '\0'; ch = *++str) {
+ switch (ch) {
+ case '%':
+ case '=':
+ case '&':
+ case '\n':
+ result.append("%" + String::FormatByte(static_cast<unsigned char>(ch)));
+ break;
+ default:
+ result.push_back(ch);
+ break;
+ }
+ }
+ return result;
+}
+
+void StreamingListener::SocketWriter::MakeConnection() {
+ GTEST_CHECK_(sockfd_ == -1)
+ << "MakeConnection() can't be called when there is already a connection.";
+
+ addrinfo hints;
+ memset(&hints, 0, sizeof(hints));
+ hints.ai_family = AF_UNSPEC; // To allow both IPv4 and IPv6 addresses.
+ hints.ai_socktype = SOCK_STREAM;
+ addrinfo* servinfo = nullptr;
+
+ // Use the getaddrinfo() to get a linked list of IP addresses for
+ // the given host name.
+ const int error_num =
+ getaddrinfo(host_name_.c_str(), port_num_.c_str(), &hints, &servinfo);
+ if (error_num != 0) {
+ GTEST_LOG_(WARNING) << "stream_result_to: getaddrinfo() failed: "
+ << gai_strerror(error_num);
+ }
+
+ // Loop through all the results and connect to the first we can.
+ for (addrinfo* cur_addr = servinfo; sockfd_ == -1 && cur_addr != nullptr;
+ cur_addr = cur_addr->ai_next) {
+ sockfd_ = socket(cur_addr->ai_family, cur_addr->ai_socktype,
+ cur_addr->ai_protocol);
+ if (sockfd_ != -1) {
+ // Connect the client socket to the server socket.
+ if (connect(sockfd_, cur_addr->ai_addr, cur_addr->ai_addrlen) == -1) {
+ close(sockfd_);
+ sockfd_ = -1;
+ }
+ }
+ }
+
+ freeaddrinfo(servinfo); // all done with this structure
+
+ if (sockfd_ == -1) {
+ GTEST_LOG_(WARNING) << "stream_result_to: failed to connect to "
+ << host_name_ << ":" << port_num_;
+ }
+}
+
+// End of class Streaming Listener
+#endif // GTEST_CAN_STREAM_RESULTS__
+
+// class OsStackTraceGetter
+
+const char* const OsStackTraceGetterInterface::kElidedFramesMarker =
+ "... " GTEST_NAME_ " internal frames ...";
+
+std::string OsStackTraceGetter::CurrentStackTrace(int max_depth, int skip_count)
+ GTEST_LOCK_EXCLUDED_(mutex_) {
+#if GTEST_HAS_ABSL
+ std::string result;
+
+ if (max_depth <= 0) {
+ return result;
+ }
+
+ max_depth = std::min(max_depth, kMaxStackTraceDepth);
+
+ std::vector<void*> raw_stack(max_depth);
+ // Skips the frames requested by the caller, plus this function.
+ const int raw_stack_size =
+ absl::GetStackTrace(&raw_stack[0], max_depth, skip_count + 1);
+
+ void* caller_frame = nullptr;
+ {
+ MutexLock lock(&mutex_);
+ caller_frame = caller_frame_;
+ }
+
+ for (int i = 0; i < raw_stack_size; ++i) {
+ if (raw_stack[i] == caller_frame &&
+ !GTEST_FLAG_GET(show_internal_stack_frames)) {
+ // Add a marker to the trace and stop adding frames.
+ absl::StrAppend(&result, kElidedFramesMarker, "\n");
+ break;
+ }
+
+ char tmp[1024];
+ const char* symbol = "(unknown)";
+ if (absl::Symbolize(raw_stack[i], tmp, sizeof(tmp))) {
+ symbol = tmp;
+ }
+
+ char line[1024];
+ snprintf(line, sizeof(line), " %p: %s\n", raw_stack[i], symbol);
+ result += line;
+ }
+
+ return result;
+
+#else // !GTEST_HAS_ABSL
+ static_cast<void>(max_depth);
+ static_cast<void>(skip_count);
+ return "";
+#endif // GTEST_HAS_ABSL
+}
+
+void OsStackTraceGetter::UponLeavingGTest() GTEST_LOCK_EXCLUDED_(mutex_) {
+#if GTEST_HAS_ABSL
+ void* caller_frame = nullptr;
+ if (absl::GetStackTrace(&caller_frame, 1, 3) <= 0) {
+ caller_frame = nullptr;
+ }
+
+ MutexLock lock(&mutex_);
+ caller_frame_ = caller_frame;
+#endif // GTEST_HAS_ABSL
+}
+
+// A helper class that creates the premature-exit file in its
+// constructor and deletes the file in its destructor.
+class ScopedPrematureExitFile {
+ public:
+ explicit ScopedPrematureExitFile(const char* premature_exit_filepath)
+ : premature_exit_filepath_(
+ premature_exit_filepath ? premature_exit_filepath : "") {
+ // If a path to the premature-exit file is specified...
+ if (!premature_exit_filepath_.empty()) {
+ // create the file with a single "0" character in it. I/O
+ // errors are ignored as there's nothing better we can do and we
+ // don't want to fail the test because of this.
+ FILE* pfile = posix::FOpen(premature_exit_filepath_.c_str(), "w");
+ fwrite("0", 1, 1, pfile);
+ fclose(pfile);
+ }
+ }
+
+ ~ScopedPrematureExitFile() {
+#if !defined GTEST_OS_ESP8266
+ if (!premature_exit_filepath_.empty()) {
+ int retval = remove(premature_exit_filepath_.c_str());
+ if (retval) {
+ GTEST_LOG_(ERROR) << "Failed to remove premature exit filepath \""
+ << premature_exit_filepath_ << "\" with error "
+ << retval;
+ }
+ }
+#endif
+ }
+
+ private:
+ const std::string premature_exit_filepath_;
+
+ ScopedPrematureExitFile(const ScopedPrematureExitFile&) = delete;
+ ScopedPrematureExitFile& operator=(const ScopedPrematureExitFile&) = delete;
+};
+
+} // namespace internal
+
+// class TestEventListeners
+
+TestEventListeners::TestEventListeners()
+ : repeater_(new internal::TestEventRepeater()),
+ default_result_printer_(nullptr),
+ default_xml_generator_(nullptr) {}
+
+TestEventListeners::~TestEventListeners() { delete repeater_; }
+
+// Returns the standard listener responsible for the default console
+// output. Can be removed from the listeners list to shut down default
+// console output. Note that removing this object from the listener list
+// with Release transfers its ownership to the user.
+void TestEventListeners::Append(TestEventListener* listener) {
+ repeater_->Append(listener);
+}
+
+// Removes the given event listener from the list and returns it. It then
+// becomes the caller's responsibility to delete the listener. Returns
+// NULL if the listener is not found in the list.
+TestEventListener* TestEventListeners::Release(TestEventListener* listener) {
+ if (listener == default_result_printer_)
+ default_result_printer_ = nullptr;
+ else if (listener == default_xml_generator_)
+ default_xml_generator_ = nullptr;
+ return repeater_->Release(listener);
+}
+
+// Returns repeater that broadcasts the TestEventListener events to all
+// subscribers.
+TestEventListener* TestEventListeners::repeater() { return repeater_; }
+
+// Sets the default_result_printer attribute to the provided listener.
+// The listener is also added to the listener list and previous
+// default_result_printer is removed from it and deleted. The listener can
+// also be NULL in which case it will not be added to the list. Does
+// nothing if the previous and the current listener objects are the same.
+void TestEventListeners::SetDefaultResultPrinter(TestEventListener* listener) {
+ if (default_result_printer_ != listener) {
+ // It is an error to pass this method a listener that is already in the
+ // list.
+ delete Release(default_result_printer_);
+ default_result_printer_ = listener;
+ if (listener != nullptr) Append(listener);
+ }
+}
+
+// Sets the default_xml_generator attribute to the provided listener. The
+// listener is also added to the listener list and previous
+// default_xml_generator is removed from it and deleted. The listener can
+// also be NULL in which case it will not be added to the list. Does
+// nothing if the previous and the current listener objects are the same.
+void TestEventListeners::SetDefaultXmlGenerator(TestEventListener* listener) {
+ if (default_xml_generator_ != listener) {
+ // It is an error to pass this method a listener that is already in the
+ // list.
+ delete Release(default_xml_generator_);
+ default_xml_generator_ = listener;
+ if (listener != nullptr) Append(listener);
+ }
+}
+
+// Controls whether events will be forwarded by the repeater to the
+// listeners in the list.
+bool TestEventListeners::EventForwardingEnabled() const {
+ return repeater_->forwarding_enabled();
+}
+
+void TestEventListeners::SuppressEventForwarding() {
+ repeater_->set_forwarding_enabled(false);
+}
+
+// class UnitTest
+
+// Gets the singleton UnitTest object. The first time this method is
+// called, a UnitTest object is constructed and returned. Consecutive
+// calls will return the same object.
+//
+// We don't protect this under mutex_ as a user is not supposed to
+// call this before main() starts, from which point on the return
+// value will never change.
+UnitTest* UnitTest::GetInstance() {
+ // CodeGear C++Builder insists on a public destructor for the
+ // default implementation. Use this implementation to keep good OO
+ // design with private destructor.
+
+#if defined(__BORLANDC__)
+ static UnitTest* const instance = new UnitTest;
+ return instance;
+#else
+ static UnitTest instance;
+ return &instance;
+#endif // defined(__BORLANDC__)
+}
+
+// Gets the number of successful test suites.
+int UnitTest::successful_test_suite_count() const {
+ return impl()->successful_test_suite_count();
+}
+
+// Gets the number of failed test suites.
+int UnitTest::failed_test_suite_count() const {
+ return impl()->failed_test_suite_count();
+}
+
+// Gets the number of all test suites.
+int UnitTest::total_test_suite_count() const {
+ return impl()->total_test_suite_count();
+}
+
+// Gets the number of all test suites that contain at least one test
+// that should run.
+int UnitTest::test_suite_to_run_count() const {
+ return impl()->test_suite_to_run_count();
+}
+
+// Legacy API is deprecated but still available
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+int UnitTest::successful_test_case_count() const {
+ return impl()->successful_test_suite_count();
+}
+int UnitTest::failed_test_case_count() const {
+ return impl()->failed_test_suite_count();
+}
+int UnitTest::total_test_case_count() const {
+ return impl()->total_test_suite_count();
+}
+int UnitTest::test_case_to_run_count() const {
+ return impl()->test_suite_to_run_count();
+}
+#endif // GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+
+// Gets the number of successful tests.
+int UnitTest::successful_test_count() const {
+ return impl()->successful_test_count();
+}
+
+// Gets the number of skipped tests.
+int UnitTest::skipped_test_count() const {
+ return impl()->skipped_test_count();
+}
+
+// Gets the number of failed tests.
+int UnitTest::failed_test_count() const { return impl()->failed_test_count(); }
+
+// Gets the number of disabled tests that will be reported in the XML report.
+int UnitTest::reportable_disabled_test_count() const {
+ return impl()->reportable_disabled_test_count();
+}
+
+// Gets the number of disabled tests.
+int UnitTest::disabled_test_count() const {
+ return impl()->disabled_test_count();
+}
+
+// Gets the number of tests to be printed in the XML report.
+int UnitTest::reportable_test_count() const {
+ return impl()->reportable_test_count();
+}
+
+// Gets the number of all tests.
+int UnitTest::total_test_count() const { return impl()->total_test_count(); }
+
+// Gets the number of tests that should run.
+int UnitTest::test_to_run_count() const { return impl()->test_to_run_count(); }
+
+// Gets the time of the test program start, in ms from the start of the
+// UNIX epoch.
+internal::TimeInMillis UnitTest::start_timestamp() const {
+ return impl()->start_timestamp();
+}
+
+// Gets the elapsed time, in milliseconds.
+internal::TimeInMillis UnitTest::elapsed_time() const {
+ return impl()->elapsed_time();
+}
+
+// Returns true if and only if the unit test passed (i.e. all test suites
+// passed).
+bool UnitTest::Passed() const { return impl()->Passed(); }
+
+// Returns true if and only if the unit test failed (i.e. some test suite
+// failed or something outside of all tests failed).
+bool UnitTest::Failed() const { return impl()->Failed(); }
+
+// Gets the i-th test suite among all the test suites. i can range from 0 to
+// total_test_suite_count() - 1. If i is not in that range, returns NULL.
+const TestSuite* UnitTest::GetTestSuite(int i) const {
+ return impl()->GetTestSuite(i);
+}
+
+// Legacy API is deprecated but still available
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+const TestCase* UnitTest::GetTestCase(int i) const {
+ return impl()->GetTestCase(i);
+}
+#endif // GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+
+// Returns the TestResult containing information on test failures and
+// properties logged outside of individual test suites.
+const TestResult& UnitTest::ad_hoc_test_result() const {
+ return *impl()->ad_hoc_test_result();
+}
+
+// Gets the i-th test suite among all the test suites. i can range from 0 to
+// total_test_suite_count() - 1. If i is not in that range, returns NULL.
+TestSuite* UnitTest::GetMutableTestSuite(int i) {
+ return impl()->GetMutableSuiteCase(i);
+}
+
+// Returns the list of event listeners that can be used to track events
+// inside Google Test.
+TestEventListeners& UnitTest::listeners() { return *impl()->listeners(); }
+
+// Registers and returns a global test environment. When a test
+// program is run, all global test environments will be set-up in the
+// order they were registered. After all tests in the program have
+// finished, all global test environments will be torn-down in the
+// *reverse* order they were registered.
+//
+// The UnitTest object takes ownership of the given environment.
+//
+// We don't protect this under mutex_, as we only support calling it
+// from the main thread.
+Environment* UnitTest::AddEnvironment(Environment* env) {
+ if (env == nullptr) {
+ return nullptr;
+ }
+
+ impl_->environments().push_back(env);
+ return env;
+}
+
+// Adds a TestPartResult to the current TestResult object. All Google Test
+// assertion macros (e.g. ASSERT_TRUE, EXPECT_EQ, etc) eventually call
+// this to report their results. The user code should use the
+// assertion macros instead of calling this directly.
+void UnitTest::AddTestPartResult(TestPartResult::Type result_type,
+ const char* file_name, int line_number,
+ const std::string& message,
+ const std::string& os_stack_trace)
+ GTEST_LOCK_EXCLUDED_(mutex_) {
+ Message msg;
+ msg << message;
+
+ internal::MutexLock lock(&mutex_);
+ if (impl_->gtest_trace_stack().size() > 0) {
+ msg << "\n" << GTEST_NAME_ << " trace:";
+
+ for (size_t i = impl_->gtest_trace_stack().size(); i > 0; --i) {
+ const internal::TraceInfo& trace = impl_->gtest_trace_stack()[i - 1];
+ msg << "\n"
+ << internal::FormatFileLocation(trace.file, trace.line) << " "
+ << trace.message;
+ }
+ }
+
+ if (os_stack_trace.c_str() != nullptr && !os_stack_trace.empty()) {
+ msg << internal::kStackTraceMarker << os_stack_trace;
+ }
+
+ const TestPartResult result = TestPartResult(
+ result_type, file_name, line_number, msg.GetString().c_str());
+ impl_->GetTestPartResultReporterForCurrentThread()->ReportTestPartResult(
+ result);
+
+ if (result_type != TestPartResult::kSuccess &&
+ result_type != TestPartResult::kSkip) {
+ // gtest_break_on_failure takes precedence over
+ // gtest_throw_on_failure. This allows a user to set the latter
+ // in the code (perhaps in order to use Google Test assertions
+ // with another testing framework) and specify the former on the
+ // command line for debugging.
+ if (GTEST_FLAG_GET(break_on_failure)) {
+#if GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_PHONE && !GTEST_OS_WINDOWS_RT
+ // Using DebugBreak on Windows allows gtest to still break into a debugger
+ // when a failure happens and both the --gtest_break_on_failure and
+ // the --gtest_catch_exceptions flags are specified.
+ DebugBreak();
+#elif (!defined(__native_client__)) && \
+ ((defined(__clang__) || defined(__GNUC__)) && \
+ (defined(__x86_64__) || defined(__i386__)))
+ // with clang/gcc we can achieve the same effect on x86 by invoking int3
+ asm("int3");
+#else
+ // Dereference nullptr through a volatile pointer to prevent the compiler
+ // from removing. We use this rather than abort() or __builtin_trap() for
+ // portability: some debuggers don't correctly trap abort().
+ *static_cast<volatile int*>(nullptr) = 1;
+#endif // GTEST_OS_WINDOWS
+ } else if (GTEST_FLAG_GET(throw_on_failure)) {
+#if GTEST_HAS_EXCEPTIONS
+ throw internal::GoogleTestFailureException(result);
+#else
+ // We cannot call abort() as it generates a pop-up in debug mode
+ // that cannot be suppressed in VC 7.1 or below.
+ exit(1);
+#endif
+ }
+ }
+}
+
+// Adds a TestProperty to the current TestResult object when invoked from
+// inside a test, to current TestSuite's ad_hoc_test_result_ when invoked
+// from SetUpTestSuite or TearDownTestSuite, or to the global property set
+// when invoked elsewhere. If the result already contains a property with
+// the same key, the value will be updated.
+void UnitTest::RecordProperty(const std::string& key,
+ const std::string& value) {
+ impl_->RecordProperty(TestProperty(key, value));
+}
+
+// Runs all tests in this UnitTest object and prints the result.
+// Returns 0 if successful, or 1 otherwise.
+//
+// We don't protect this under mutex_, as we only support calling it
+// from the main thread.
+int UnitTest::Run() {
+ const bool in_death_test_child_process =
+ GTEST_FLAG_GET(internal_run_death_test).length() > 0;
+
+ // Google Test implements this protocol for catching that a test
+ // program exits before returning control to Google Test:
+ //
+ // 1. Upon start, Google Test creates a file whose absolute path
+ // is specified by the environment variable
+ // TEST_PREMATURE_EXIT_FILE.
+ // 2. When Google Test has finished its work, it deletes the file.
+ //
+ // This allows a test runner to set TEST_PREMATURE_EXIT_FILE before
+ // running a Google-Test-based test program and check the existence
+ // of the file at the end of the test execution to see if it has
+ // exited prematurely.
+
+ // If we are in the child process of a death test, don't
+ // create/delete the premature exit file, as doing so is unnecessary
+ // and will confuse the parent process. Otherwise, create/delete
+ // the file upon entering/leaving this function. If the program
+ // somehow exits before this function has a chance to return, the
+ // premature-exit file will be left undeleted, causing a test runner
+ // that understands the premature-exit-file protocol to report the
+ // test as having failed.
+ const internal::ScopedPrematureExitFile premature_exit_file(
+ in_death_test_child_process
+ ? nullptr
+ : internal::posix::GetEnv("TEST_PREMATURE_EXIT_FILE"));
+
+ // Captures the value of GTEST_FLAG(catch_exceptions). This value will be
+ // used for the duration of the program.
+ impl()->set_catch_exceptions(GTEST_FLAG_GET(catch_exceptions));
+
+#if GTEST_OS_WINDOWS
+ // Either the user wants Google Test to catch exceptions thrown by the
+ // tests or this is executing in the context of death test child
+ // process. In either case the user does not want to see pop-up dialogs
+ // about crashes - they are expected.
+ if (impl()->catch_exceptions() || in_death_test_child_process) {
+#if !GTEST_OS_WINDOWS_MOBILE && !GTEST_OS_WINDOWS_PHONE && !GTEST_OS_WINDOWS_RT
+ // SetErrorMode doesn't exist on CE.
+ SetErrorMode(SEM_FAILCRITICALERRORS | SEM_NOALIGNMENTFAULTEXCEPT |
+ SEM_NOGPFAULTERRORBOX | SEM_NOOPENFILEERRORBOX);
+#endif // !GTEST_OS_WINDOWS_MOBILE
+
+#if (defined(_MSC_VER) || GTEST_OS_WINDOWS_MINGW) && !GTEST_OS_WINDOWS_MOBILE
+ // Death test children can be terminated with _abort(). On Windows,
+ // _abort() can show a dialog with a warning message. This forces the
+ // abort message to go to stderr instead.
+ _set_error_mode(_OUT_TO_STDERR);
+#endif
+
+#if defined(_MSC_VER) && !GTEST_OS_WINDOWS_MOBILE
+ // In the debug version, Visual Studio pops up a separate dialog
+ // offering a choice to debug the aborted program. We need to suppress
+ // this dialog or it will pop up for every EXPECT/ASSERT_DEATH statement
+ // executed. Google Test will notify the user of any unexpected
+ // failure via stderr.
+ if (!GTEST_FLAG_GET(break_on_failure))
+ _set_abort_behavior(
+ 0x0, // Clear the following flags:
+ _WRITE_ABORT_MSG | _CALL_REPORTFAULT); // pop-up window, core dump.
+
+ // In debug mode, the Windows CRT can crash with an assertion over invalid
+ // input (e.g. passing an invalid file descriptor). The default handling
+ // for these assertions is to pop up a dialog and wait for user input.
+ // Instead ask the CRT to dump such assertions to stderr non-interactively.
+ if (!IsDebuggerPresent()) {
+ (void)_CrtSetReportMode(_CRT_ASSERT,
+ _CRTDBG_MODE_FILE | _CRTDBG_MODE_DEBUG);
+ (void)_CrtSetReportFile(_CRT_ASSERT, _CRTDBG_FILE_STDERR);
+ }
+#endif
+ }
+#endif // GTEST_OS_WINDOWS
+
+ return internal::HandleExceptionsInMethodIfSupported(
+ impl(), &internal::UnitTestImpl::RunAllTests,
+ "auxiliary test code (environments or event listeners)")
+ ? 0
+ : 1;
+}
+
+// Returns the working directory when the first TEST() or TEST_F() was
+// executed.
+const char* UnitTest::original_working_dir() const {
+ return impl_->original_working_dir_.c_str();
+}
+
+// Returns the TestSuite object for the test that's currently running,
+// or NULL if no test is running.
+const TestSuite* UnitTest::current_test_suite() const
+ GTEST_LOCK_EXCLUDED_(mutex_) {
+ internal::MutexLock lock(&mutex_);
+ return impl_->current_test_suite();
+}
+
+// Legacy API is still available but deprecated
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+const TestCase* UnitTest::current_test_case() const
+ GTEST_LOCK_EXCLUDED_(mutex_) {
+ internal::MutexLock lock(&mutex_);
+ return impl_->current_test_suite();
+}
+#endif
+
+// Returns the TestInfo object for the test that's currently running,
+// or NULL if no test is running.
+const TestInfo* UnitTest::current_test_info() const
+ GTEST_LOCK_EXCLUDED_(mutex_) {
+ internal::MutexLock lock(&mutex_);
+ return impl_->current_test_info();
+}
+
+// Returns the random seed used at the start of the current test run.
+int UnitTest::random_seed() const { return impl_->random_seed(); }
+
+// Returns ParameterizedTestSuiteRegistry object used to keep track of
+// value-parameterized tests and instantiate and register them.
+internal::ParameterizedTestSuiteRegistry&
+UnitTest::parameterized_test_registry() GTEST_LOCK_EXCLUDED_(mutex_) {
+ return impl_->parameterized_test_registry();
+}
+
+// Creates an empty UnitTest.
+UnitTest::UnitTest() { impl_ = new internal::UnitTestImpl(this); }
+
+// Destructor of UnitTest.
+UnitTest::~UnitTest() { delete impl_; }
+
+// Pushes a trace defined by SCOPED_TRACE() on to the per-thread
+// Google Test trace stack.
+void UnitTest::PushGTestTrace(const internal::TraceInfo& trace)
+ GTEST_LOCK_EXCLUDED_(mutex_) {
+ internal::MutexLock lock(&mutex_);
+ impl_->gtest_trace_stack().push_back(trace);
+}
+
+// Pops a trace from the per-thread Google Test trace stack.
+void UnitTest::PopGTestTrace() GTEST_LOCK_EXCLUDED_(mutex_) {
+ internal::MutexLock lock(&mutex_);
+ impl_->gtest_trace_stack().pop_back();
+}
+
+namespace internal {
+
+UnitTestImpl::UnitTestImpl(UnitTest* parent)
+ : parent_(parent),
+ GTEST_DISABLE_MSC_WARNINGS_PUSH_(4355 /* using this in initializer */)
+ default_global_test_part_result_reporter_(this),
+ default_per_thread_test_part_result_reporter_(this),
+ GTEST_DISABLE_MSC_WARNINGS_POP_() global_test_part_result_repoter_(
+ &default_global_test_part_result_reporter_),
+ per_thread_test_part_result_reporter_(
+ &default_per_thread_test_part_result_reporter_),
+ parameterized_test_registry_(),
+ parameterized_tests_registered_(false),
+ last_death_test_suite_(-1),
+ current_test_suite_(nullptr),
+ current_test_info_(nullptr),
+ ad_hoc_test_result_(),
+ os_stack_trace_getter_(nullptr),
+ post_flag_parse_init_performed_(false),
+ random_seed_(0), // Will be overridden by the flag before first use.
+ random_(0), // Will be reseeded before first use.
+ start_timestamp_(0),
+ elapsed_time_(0),
+#if GTEST_HAS_DEATH_TEST
+ death_test_factory_(new DefaultDeathTestFactory),
+#endif
+ // Will be overridden by the flag before first use.
+ catch_exceptions_(false) {
+ listeners()->SetDefaultResultPrinter(new PrettyUnitTestResultPrinter);
+}
+
+UnitTestImpl::~UnitTestImpl() {
+ // Deletes every TestSuite.
+ ForEach(test_suites_, internal::Delete<TestSuite>);
+
+ // Deletes every Environment.
+ ForEach(environments_, internal::Delete<Environment>);
+
+ delete os_stack_trace_getter_;
+}
+
+// Adds a TestProperty to the current TestResult object when invoked in a
+// context of a test, to current test suite's ad_hoc_test_result when invoke
+// from SetUpTestSuite/TearDownTestSuite, or to the global property set
+// otherwise. If the result already contains a property with the same key,
+// the value will be updated.
+void UnitTestImpl::RecordProperty(const TestProperty& test_property) {
+ std::string xml_element;
+ TestResult* test_result; // TestResult appropriate for property recording.
+
+ if (current_test_info_ != nullptr) {
+ xml_element = "testcase";
+ test_result = &(current_test_info_->result_);
+ } else if (current_test_suite_ != nullptr) {
+ xml_element = "testsuite";
+ test_result = &(current_test_suite_->ad_hoc_test_result_);
+ } else {
+ xml_element = "testsuites";
+ test_result = &ad_hoc_test_result_;
+ }
+ test_result->RecordProperty(xml_element, test_property);
+}
+
+#if GTEST_HAS_DEATH_TEST
+// Disables event forwarding if the control is currently in a death test
+// subprocess. Must not be called before InitGoogleTest.
+void UnitTestImpl::SuppressTestEventsIfInSubprocess() {
+ if (internal_run_death_test_flag_.get() != nullptr)
+ listeners()->SuppressEventForwarding();
+}
+#endif // GTEST_HAS_DEATH_TEST
+
+// Initializes event listeners performing XML output as specified by
+// UnitTestOptions. Must not be called before InitGoogleTest.
+void UnitTestImpl::ConfigureXmlOutput() {
+ const std::string& output_format = UnitTestOptions::GetOutputFormat();
+ if (output_format == "xml") {
+ listeners()->SetDefaultXmlGenerator(new XmlUnitTestResultPrinter(
+ UnitTestOptions::GetAbsolutePathToOutputFile().c_str()));
+ } else if (output_format == "json") {
+ listeners()->SetDefaultXmlGenerator(new JsonUnitTestResultPrinter(
+ UnitTestOptions::GetAbsolutePathToOutputFile().c_str()));
+ } else if (output_format != "") {
+ GTEST_LOG_(WARNING) << "WARNING: unrecognized output format \""
+ << output_format << "\" ignored.";
+ }
+}
+
+#if GTEST_CAN_STREAM_RESULTS_
+// Initializes event listeners for streaming test results in string form.
+// Must not be called before InitGoogleTest.
+void UnitTestImpl::ConfigureStreamingOutput() {
+ const std::string& target = GTEST_FLAG_GET(stream_result_to);
+ if (!target.empty()) {
+ const size_t pos = target.find(':');
+ if (pos != std::string::npos) {
+ listeners()->Append(
+ new StreamingListener(target.substr(0, pos), target.substr(pos + 1)));
+ } else {
+ GTEST_LOG_(WARNING) << "unrecognized streaming target \"" << target
+ << "\" ignored.";
+ }
+ }
+}
+#endif // GTEST_CAN_STREAM_RESULTS_
+
+// Performs initialization dependent upon flag values obtained in
+// ParseGoogleTestFlagsOnly. Is called from InitGoogleTest after the call to
+// ParseGoogleTestFlagsOnly. In case a user neglects to call InitGoogleTest
+// this function is also called from RunAllTests. Since this function can be
+// called more than once, it has to be idempotent.
+void UnitTestImpl::PostFlagParsingInit() {
+ // Ensures that this function does not execute more than once.
+ if (!post_flag_parse_init_performed_) {
+ post_flag_parse_init_performed_ = true;
+
+#if defined(GTEST_CUSTOM_TEST_EVENT_LISTENER_)
+ // Register to send notifications about key process state changes.
+ listeners()->Append(new GTEST_CUSTOM_TEST_EVENT_LISTENER_());
+#endif // defined(GTEST_CUSTOM_TEST_EVENT_LISTENER_)
+
+#if GTEST_HAS_DEATH_TEST
+ InitDeathTestSubprocessControlInfo();
+ SuppressTestEventsIfInSubprocess();
+#endif // GTEST_HAS_DEATH_TEST
+
+ // Registers parameterized tests. This makes parameterized tests
+ // available to the UnitTest reflection API without running
+ // RUN_ALL_TESTS.
+ RegisterParameterizedTests();
+
+ // Configures listeners for XML output. This makes it possible for users
+ // to shut down the default XML output before invoking RUN_ALL_TESTS.
+ ConfigureXmlOutput();
+
+ if (GTEST_FLAG_GET(brief)) {
+ listeners()->SetDefaultResultPrinter(new BriefUnitTestResultPrinter);
+ }
+
+#if GTEST_CAN_STREAM_RESULTS_
+ // Configures listeners for streaming test results to the specified server.
+ ConfigureStreamingOutput();
+#endif // GTEST_CAN_STREAM_RESULTS_
+
+#if GTEST_HAS_ABSL
+ if (GTEST_FLAG_GET(install_failure_signal_handler)) {
+ absl::FailureSignalHandlerOptions options;
+ absl::InstallFailureSignalHandler(options);
+ }
+#endif // GTEST_HAS_ABSL
+ }
+}
+
+// A predicate that checks the name of a TestSuite against a known
+// value.
+//
+// This is used for implementation of the UnitTest class only. We put
+// it in the anonymous namespace to prevent polluting the outer
+// namespace.
+//
+// TestSuiteNameIs is copyable.
+class TestSuiteNameIs {
+ public:
+ // Constructor.
+ explicit TestSuiteNameIs(const std::string& name) : name_(name) {}
+
+ // Returns true if and only if the name of test_suite matches name_.
+ bool operator()(const TestSuite* test_suite) const {
+ return test_suite != nullptr &&
+ strcmp(test_suite->name(), name_.c_str()) == 0;
+ }
+
+ private:
+ std::string name_;
+};
+
+// Finds and returns a TestSuite with the given name. If one doesn't
+// exist, creates one and returns it. It's the CALLER'S
+// RESPONSIBILITY to ensure that this function is only called WHEN THE
+// TESTS ARE NOT SHUFFLED.
+//
+// Arguments:
+//
+// test_suite_name: name of the test suite
+// type_param: the name of the test suite's type parameter, or NULL if
+// this is not a typed or a type-parameterized test suite.
+// set_up_tc: pointer to the function that sets up the test suite
+// tear_down_tc: pointer to the function that tears down the test suite
+TestSuite* UnitTestImpl::GetTestSuite(
+ const char* test_suite_name, const char* type_param,
+ internal::SetUpTestSuiteFunc set_up_tc,
+ internal::TearDownTestSuiteFunc tear_down_tc) {
+ // Can we find a TestSuite with the given name?
+ const auto test_suite =
+ std::find_if(test_suites_.rbegin(), test_suites_.rend(),
+ TestSuiteNameIs(test_suite_name));
+
+ if (test_suite != test_suites_.rend()) return *test_suite;
+
+ // No. Let's create one.
+ auto* const new_test_suite =
+ new TestSuite(test_suite_name, type_param, set_up_tc, tear_down_tc);
+
+ const UnitTestFilter death_test_suite_filter(kDeathTestSuiteFilter);
+ // Is this a death test suite?
+ if (death_test_suite_filter.MatchesName(test_suite_name)) {
+ // Yes. Inserts the test suite after the last death test suite
+ // defined so far. This only works when the test suites haven't
+ // been shuffled. Otherwise we may end up running a death test
+ // after a non-death test.
+ ++last_death_test_suite_;
+ test_suites_.insert(test_suites_.begin() + last_death_test_suite_,
+ new_test_suite);
+ } else {
+ // No. Appends to the end of the list.
+ test_suites_.push_back(new_test_suite);
+ }
+
+ test_suite_indices_.push_back(static_cast<int>(test_suite_indices_.size()));
+ return new_test_suite;
+}
+
+// Helpers for setting up / tearing down the given environment. They
+// are for use in the ForEach() function.
+static void SetUpEnvironment(Environment* env) { env->SetUp(); }
+static void TearDownEnvironment(Environment* env) { env->TearDown(); }
+
+// Runs all tests in this UnitTest object, prints the result, and
+// returns true if all tests are successful. If any exception is
+// thrown during a test, the test is considered to be failed, but the
+// rest of the tests will still be run.
+//
+// When parameterized tests are enabled, it expands and registers
+// parameterized tests first in RegisterParameterizedTests().
+// All other functions called from RunAllTests() may safely assume that
+// parameterized tests are ready to be counted and run.
+bool UnitTestImpl::RunAllTests() {
+ // True if and only if Google Test is initialized before RUN_ALL_TESTS() is
+ // called.
+ const bool gtest_is_initialized_before_run_all_tests = GTestIsInitialized();
+
+ // Do not run any test if the --help flag was specified.
+ if (g_help_flag) return true;
+
+ // Repeats the call to the post-flag parsing initialization in case the
+ // user didn't call InitGoogleTest.
+ PostFlagParsingInit();
+
+ // Even if sharding is not on, test runners may want to use the
+ // GTEST_SHARD_STATUS_FILE to query whether the test supports the sharding
+ // protocol.
+ internal::WriteToShardStatusFileIfNeeded();
+
+ // True if and only if we are in a subprocess for running a thread-safe-style
+ // death test.
+ bool in_subprocess_for_death_test = false;
+
+#if GTEST_HAS_DEATH_TEST
+ in_subprocess_for_death_test =
+ (internal_run_death_test_flag_.get() != nullptr);
+#if defined(GTEST_EXTRA_DEATH_TEST_CHILD_SETUP_)
+ if (in_subprocess_for_death_test) {
+ GTEST_EXTRA_DEATH_TEST_CHILD_SETUP_();
+ }
+#endif // defined(GTEST_EXTRA_DEATH_TEST_CHILD_SETUP_)
+#endif // GTEST_HAS_DEATH_TEST
+
+ const bool should_shard = ShouldShard(kTestTotalShards, kTestShardIndex,
+ in_subprocess_for_death_test);
+
+ // Compares the full test names with the filter to decide which
+ // tests to run.
+ const bool has_tests_to_run =
+ FilterTests(should_shard ? HONOR_SHARDING_PROTOCOL
+ : IGNORE_SHARDING_PROTOCOL) > 0;
+
+ // Lists the tests and exits if the --gtest_list_tests flag was specified.
+ if (GTEST_FLAG_GET(list_tests)) {
+ // This must be called *after* FilterTests() has been called.
+ ListTestsMatchingFilter();
+ return true;
+ }
+
+ random_seed_ = GetRandomSeedFromFlag(GTEST_FLAG_GET(random_seed));
+
+ // True if and only if at least one test has failed.
+ bool failed = false;
+
+ TestEventListener* repeater = listeners()->repeater();
+
+ start_timestamp_ = GetTimeInMillis();
+ repeater->OnTestProgramStart(*parent_);
+
+ // How many times to repeat the tests? We don't want to repeat them
+ // when we are inside the subprocess of a death test.
+ const int repeat = in_subprocess_for_death_test ? 1 : GTEST_FLAG_GET(repeat);
+
+ // Repeats forever if the repeat count is negative.
+ const bool gtest_repeat_forever = repeat < 0;
+
+ // Should test environments be set up and torn down for each repeat, or only
+ // set up on the first and torn down on the last iteration? If there is no
+ // "last" iteration because the tests will repeat forever, always recreate the
+ // environments to avoid leaks in case one of the environments is using
+ // resources that are external to this process. Without this check there would
+ // be no way to clean up those external resources automatically.
+ const bool recreate_environments_when_repeating =
+ GTEST_FLAG_GET(recreate_environments_when_repeating) ||
+ gtest_repeat_forever;
+
+ for (int i = 0; gtest_repeat_forever || i != repeat; i++) {
+ // We want to preserve failures generated by ad-hoc test
+ // assertions executed before RUN_ALL_TESTS().
+ ClearNonAdHocTestResult();
+
+ Timer timer;
+
+ // Shuffles test suites and tests if requested.
+ if (has_tests_to_run && GTEST_FLAG_GET(shuffle)) {
+ random()->Reseed(static_cast<uint32_t>(random_seed_));
+ // This should be done before calling OnTestIterationStart(),
+ // such that a test event listener can see the actual test order
+ // in the event.
+ ShuffleTests();
+ }
+
+ // Tells the unit test event listeners that the tests are about to start.
+ repeater->OnTestIterationStart(*parent_, i);
+
+ // Runs each test suite if there is at least one test to run.
+ if (has_tests_to_run) {
+ // Sets up all environments beforehand. If test environments aren't
+ // recreated for each iteration, only do so on the first iteration.
+ if (i == 0 || recreate_environments_when_repeating) {
+ repeater->OnEnvironmentsSetUpStart(*parent_);
+ ForEach(environments_, SetUpEnvironment);
+ repeater->OnEnvironmentsSetUpEnd(*parent_);
+ }
+
+ // Runs the tests only if there was no fatal failure or skip triggered
+ // during global set-up.
+ if (Test::IsSkipped()) {
+ // Emit diagnostics when global set-up calls skip, as it will not be
+ // emitted by default.
+ TestResult& test_result =
+ *internal::GetUnitTestImpl()->current_test_result();
+ for (int j = 0; j < test_result.total_part_count(); ++j) {
+ const TestPartResult& test_part_result =
+ test_result.GetTestPartResult(j);
+ if (test_part_result.type() == TestPartResult::kSkip) {
+ const std::string& result = test_part_result.message();
+ printf("%s\n", result.c_str());
+ }
+ }
+ fflush(stdout);
+ } else if (!Test::HasFatalFailure()) {
+ for (int test_index = 0; test_index < total_test_suite_count();
+ test_index++) {
+ GetMutableSuiteCase(test_index)->Run();
+ if (GTEST_FLAG_GET(fail_fast) &&
+ GetMutableSuiteCase(test_index)->Failed()) {
+ for (int j = test_index + 1; j < total_test_suite_count(); j++) {
+ GetMutableSuiteCase(j)->Skip();
+ }
+ break;
+ }
+ }
+ } else if (Test::HasFatalFailure()) {
+ // If there was a fatal failure during the global setup then we know we
+ // aren't going to run any tests. Explicitly mark all of the tests as
+ // skipped to make this obvious in the output.
+ for (int test_index = 0; test_index < total_test_suite_count();
+ test_index++) {
+ GetMutableSuiteCase(test_index)->Skip();
+ }
+ }
+
+ // Tears down all environments in reverse order afterwards. If test
+ // environments aren't recreated for each iteration, only do so on the
+ // last iteration.
+ if (i == repeat - 1 || recreate_environments_when_repeating) {
+ repeater->OnEnvironmentsTearDownStart(*parent_);
+ std::for_each(environments_.rbegin(), environments_.rend(),
+ TearDownEnvironment);
+ repeater->OnEnvironmentsTearDownEnd(*parent_);
+ }
+ }
+
+ elapsed_time_ = timer.Elapsed();
+
+ // Tells the unit test event listener that the tests have just finished.
+ repeater->OnTestIterationEnd(*parent_, i);
+
+ // Gets the result and clears it.
+ if (!Passed()) {
+ failed = true;
+ }
+
+ // Restores the original test order after the iteration. This
+ // allows the user to quickly repro a failure that happens in the
+ // N-th iteration without repeating the first (N - 1) iterations.
+ // This is not enclosed in "if (GTEST_FLAG(shuffle)) { ... }", in
+ // case the user somehow changes the value of the flag somewhere
+ // (it's always safe to unshuffle the tests).
+ UnshuffleTests();
+
+ if (GTEST_FLAG_GET(shuffle)) {
+ // Picks a new random seed for each iteration.
+ random_seed_ = GetNextRandomSeed(random_seed_);
+ }
+ }
+
+ repeater->OnTestProgramEnd(*parent_);
+
+ if (!gtest_is_initialized_before_run_all_tests) {
+ ColoredPrintf(
+ GTestColor::kRed,
+ "\nIMPORTANT NOTICE - DO NOT IGNORE:\n"
+ "This test program did NOT call " GTEST_INIT_GOOGLE_TEST_NAME_
+ "() before calling RUN_ALL_TESTS(). This is INVALID. Soon " GTEST_NAME_
+ " will start to enforce the valid usage. "
+ "Please fix it ASAP, or IT WILL START TO FAIL.\n"); // NOLINT
+#if GTEST_FOR_GOOGLE_
+ ColoredPrintf(GTestColor::kRed,
+ "For more details, see http://wiki/Main/ValidGUnitMain.\n");
+#endif // GTEST_FOR_GOOGLE_
+ }
+
+ return !failed;
+}
+
+// Reads the GTEST_SHARD_STATUS_FILE environment variable, and creates the file
+// if the variable is present. If a file already exists at this location, this
+// function will write over it. If the variable is present, but the file cannot
+// be created, prints an error and exits.
+void WriteToShardStatusFileIfNeeded() {
+ const char* const test_shard_file = posix::GetEnv(kTestShardStatusFile);
+ if (test_shard_file != nullptr) {
+ FILE* const file = posix::FOpen(test_shard_file, "w");
+ if (file == nullptr) {
+ ColoredPrintf(GTestColor::kRed,
+ "Could not write to the test shard status file \"%s\" "
+ "specified by the %s environment variable.\n",
+ test_shard_file, kTestShardStatusFile);
+ fflush(stdout);
+ exit(EXIT_FAILURE);
+ }
+ fclose(file);
+ }
+}
+
+// Checks whether sharding is enabled by examining the relevant
+// environment variable values. If the variables are present,
+// but inconsistent (i.e., shard_index >= total_shards), prints
+// an error and exits. If in_subprocess_for_death_test, sharding is
+// disabled because it must only be applied to the original test
+// process. Otherwise, we could filter out death tests we intended to execute.
+bool ShouldShard(const char* total_shards_env, const char* shard_index_env,
+ bool in_subprocess_for_death_test) {
+ if (in_subprocess_for_death_test) {
+ return false;
+ }
+
+ const int32_t total_shards = Int32FromEnvOrDie(total_shards_env, -1);
+ const int32_t shard_index = Int32FromEnvOrDie(shard_index_env, -1);
+
+ if (total_shards == -1 && shard_index == -1) {
+ return false;
+ } else if (total_shards == -1 && shard_index != -1) {
+ const Message msg = Message() << "Invalid environment variables: you have "
+ << kTestShardIndex << " = " << shard_index
+ << ", but have left " << kTestTotalShards
+ << " unset.\n";
+ ColoredPrintf(GTestColor::kRed, "%s", msg.GetString().c_str());
+ fflush(stdout);
+ exit(EXIT_FAILURE);
+ } else if (total_shards != -1 && shard_index == -1) {
+ const Message msg = Message()
+ << "Invalid environment variables: you have "
+ << kTestTotalShards << " = " << total_shards
+ << ", but have left " << kTestShardIndex << " unset.\n";
+ ColoredPrintf(GTestColor::kRed, "%s", msg.GetString().c_str());
+ fflush(stdout);
+ exit(EXIT_FAILURE);
+ } else if (shard_index < 0 || shard_index >= total_shards) {
+ const Message msg =
+ Message() << "Invalid environment variables: we require 0 <= "
+ << kTestShardIndex << " < " << kTestTotalShards
+ << ", but you have " << kTestShardIndex << "=" << shard_index
+ << ", " << kTestTotalShards << "=" << total_shards << ".\n";
+ ColoredPrintf(GTestColor::kRed, "%s", msg.GetString().c_str());
+ fflush(stdout);
+ exit(EXIT_FAILURE);
+ }
+
+ return total_shards > 1;
+}
+
+// Parses the environment variable var as an Int32. If it is unset,
+// returns default_val. If it is not an Int32, prints an error
+// and aborts.
+int32_t Int32FromEnvOrDie(const char* var, int32_t default_val) {
+ const char* str_val = posix::GetEnv(var);
+ if (str_val == nullptr) {
+ return default_val;
+ }
+
+ int32_t result;
+ if (!ParseInt32(Message() << "The value of environment variable " << var,
+ str_val, &result)) {
+ exit(EXIT_FAILURE);
+ }
+ return result;
+}
+
+// Given the total number of shards, the shard index, and the test id,
+// returns true if and only if the test should be run on this shard. The test id
+// is some arbitrary but unique non-negative integer assigned to each test
+// method. Assumes that 0 <= shard_index < total_shards.
+bool ShouldRunTestOnShard(int total_shards, int shard_index, int test_id) {
+ return (test_id % total_shards) == shard_index;
+}
+
+// Compares the name of each test with the user-specified filter to
+// decide whether the test should be run, then records the result in
+// each TestSuite and TestInfo object.
+// If shard_tests == true, further filters tests based on sharding
+// variables in the environment - see
+// https://github.com/google/googletest/blob/master/googletest/docs/advanced.md
+// . Returns the number of tests that should run.
+int UnitTestImpl::FilterTests(ReactionToSharding shard_tests) {
+ const int32_t total_shards = shard_tests == HONOR_SHARDING_PROTOCOL
+ ? Int32FromEnvOrDie(kTestTotalShards, -1)
+ : -1;
+ const int32_t shard_index = shard_tests == HONOR_SHARDING_PROTOCOL
+ ? Int32FromEnvOrDie(kTestShardIndex, -1)
+ : -1;
+
+ const PositiveAndNegativeUnitTestFilter gtest_flag_filter(
+ GTEST_FLAG_GET(filter));
+ const UnitTestFilter disable_test_filter(kDisableTestFilter);
+ // num_runnable_tests are the number of tests that will
+ // run across all shards (i.e., match filter and are not disabled).
+ // num_selected_tests are the number of tests to be run on
+ // this shard.
+ int num_runnable_tests = 0;
+ int num_selected_tests = 0;
+ for (auto* test_suite : test_suites_) {
+ const std::string& test_suite_name = test_suite->name();
+ test_suite->set_should_run(false);
+
+ for (size_t j = 0; j < test_suite->test_info_list().size(); j++) {
+ TestInfo* const test_info = test_suite->test_info_list()[j];
+ const std::string test_name(test_info->name());
+ // A test is disabled if test suite name or test name matches
+ // kDisableTestFilter.
+ const bool is_disabled =
+ disable_test_filter.MatchesName(test_suite_name) ||
+ disable_test_filter.MatchesName(test_name);
+ test_info->is_disabled_ = is_disabled;
+
+ const bool matches_filter =
+ gtest_flag_filter.MatchesTest(test_suite_name, test_name);
+ test_info->matches_filter_ = matches_filter;
+
+ const bool is_runnable =
+ (GTEST_FLAG_GET(also_run_disabled_tests) || !is_disabled) &&
+ matches_filter;
+
+ const bool is_in_another_shard =
+ shard_tests != IGNORE_SHARDING_PROTOCOL &&
+ !ShouldRunTestOnShard(total_shards, shard_index, num_runnable_tests);
+ test_info->is_in_another_shard_ = is_in_another_shard;
+ const bool is_selected = is_runnable && !is_in_another_shard;
+
+ num_runnable_tests += is_runnable;
+ num_selected_tests += is_selected;
+
+ test_info->should_run_ = is_selected;
+ test_suite->set_should_run(test_suite->should_run() || is_selected);
+ }
+ }
+ return num_selected_tests;
+}
+
+// Prints the given C-string on a single line by replacing all '\n'
+// characters with string "\\n". If the output takes more than
+// max_length characters, only prints the first max_length characters
+// and "...".
+static void PrintOnOneLine(const char* str, int max_length) {
+ if (str != nullptr) {
+ for (int i = 0; *str != '\0'; ++str) {
+ if (i >= max_length) {
+ printf("...");
+ break;
+ }
+ if (*str == '\n') {
+ printf("\\n");
+ i += 2;
+ } else {
+ printf("%c", *str);
+ ++i;
+ }
+ }
+ }
+}
+
+// Prints the names of the tests matching the user-specified filter flag.
+void UnitTestImpl::ListTestsMatchingFilter() {
+ // Print at most this many characters for each type/value parameter.
+ const int kMaxParamLength = 250;
+
+ for (auto* test_suite : test_suites_) {
+ bool printed_test_suite_name = false;
+
+ for (size_t j = 0; j < test_suite->test_info_list().size(); j++) {
+ const TestInfo* const test_info = test_suite->test_info_list()[j];
+ if (test_info->matches_filter_) {
+ if (!printed_test_suite_name) {
+ printed_test_suite_name = true;
+ printf("%s.", test_suite->name());
+ if (test_suite->type_param() != nullptr) {
+ printf(" # %s = ", kTypeParamLabel);
+ // We print the type parameter on a single line to make
+ // the output easy to parse by a program.
+ PrintOnOneLine(test_suite->type_param(), kMaxParamLength);
+ }
+ printf("\n");
+ }
+ printf(" %s", test_info->name());
+ if (test_info->value_param() != nullptr) {
+ printf(" # %s = ", kValueParamLabel);
+ // We print the value parameter on a single line to make the
+ // output easy to parse by a program.
+ PrintOnOneLine(test_info->value_param(), kMaxParamLength);
+ }
+ printf("\n");
+ }
+ }
+ }
+ fflush(stdout);
+ const std::string& output_format = UnitTestOptions::GetOutputFormat();
+ if (output_format == "xml" || output_format == "json") {
+ FILE* fileout = OpenFileForWriting(
+ UnitTestOptions::GetAbsolutePathToOutputFile().c_str());
+ std::stringstream stream;
+ if (output_format == "xml") {
+ XmlUnitTestResultPrinter(
+ UnitTestOptions::GetAbsolutePathToOutputFile().c_str())
+ .PrintXmlTestsList(&stream, test_suites_);
+ } else if (output_format == "json") {
+ JsonUnitTestResultPrinter(
+ UnitTestOptions::GetAbsolutePathToOutputFile().c_str())
+ .PrintJsonTestList(&stream, test_suites_);
+ }
+ fprintf(fileout, "%s", StringStreamToString(&stream).c_str());
+ fclose(fileout);
+ }
+}
+
+// Sets the OS stack trace getter.
+//
+// Does nothing if the input and the current OS stack trace getter are
+// the same; otherwise, deletes the old getter and makes the input the
+// current getter.
+void UnitTestImpl::set_os_stack_trace_getter(
+ OsStackTraceGetterInterface* getter) {
+ if (os_stack_trace_getter_ != getter) {
+ delete os_stack_trace_getter_;
+ os_stack_trace_getter_ = getter;
+ }
+}
+
+// Returns the current OS stack trace getter if it is not NULL;
+// otherwise, creates an OsStackTraceGetter, makes it the current
+// getter, and returns it.
+OsStackTraceGetterInterface* UnitTestImpl::os_stack_trace_getter() {
+ if (os_stack_trace_getter_ == nullptr) {
+#ifdef GTEST_OS_STACK_TRACE_GETTER_
+ os_stack_trace_getter_ = new GTEST_OS_STACK_TRACE_GETTER_;
+#else
+ os_stack_trace_getter_ = new OsStackTraceGetter;
+#endif // GTEST_OS_STACK_TRACE_GETTER_
+ }
+
+ return os_stack_trace_getter_;
+}
+
+// Returns the most specific TestResult currently running.
+TestResult* UnitTestImpl::current_test_result() {
+ if (current_test_info_ != nullptr) {
+ return &current_test_info_->result_;
+ }
+ if (current_test_suite_ != nullptr) {
+ return &current_test_suite_->ad_hoc_test_result_;
+ }
+ return &ad_hoc_test_result_;
+}
+
+// Shuffles all test suites, and the tests within each test suite,
+// making sure that death tests are still run first.
+void UnitTestImpl::ShuffleTests() {
+ // Shuffles the death test suites.
+ ShuffleRange(random(), 0, last_death_test_suite_ + 1, &test_suite_indices_);
+
+ // Shuffles the non-death test suites.
+ ShuffleRange(random(), last_death_test_suite_ + 1,
+ static_cast<int>(test_suites_.size()), &test_suite_indices_);
+
+ // Shuffles the tests inside each test suite.
+ for (auto& test_suite : test_suites_) {
+ test_suite->ShuffleTests(random());
+ }
+}
+
+// Restores the test suites and tests to their order before the first shuffle.
+void UnitTestImpl::UnshuffleTests() {
+ for (size_t i = 0; i < test_suites_.size(); i++) {
+ // Unshuffles the tests in each test suite.
+ test_suites_[i]->UnshuffleTests();
+ // Resets the index of each test suite.
+ test_suite_indices_[i] = static_cast<int>(i);
+ }
+}
+
+// Returns the current OS stack trace as an std::string.
+//
+// The maximum number of stack frames to be included is specified by
+// the gtest_stack_trace_depth flag. The skip_count parameter
+// specifies the number of top frames to be skipped, which doesn't
+// count against the number of frames to be included.
+//
+// For example, if Foo() calls Bar(), which in turn calls
+// GetCurrentOsStackTraceExceptTop(..., 1), Foo() will be included in
+// the trace but Bar() and GetCurrentOsStackTraceExceptTop() won't.
+GTEST_NO_INLINE_ GTEST_NO_TAIL_CALL_ std::string
+GetCurrentOsStackTraceExceptTop(UnitTest* /*unit_test*/, int skip_count) {
+ // We pass skip_count + 1 to skip this wrapper function in addition
+ // to what the user really wants to skip.
+ return GetUnitTestImpl()->CurrentOsStackTraceExceptTop(skip_count + 1);
+}
+
+// Used by the GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_ macro to
+// suppress unreachable code warnings.
+namespace {
+class ClassUniqueToAlwaysTrue {};
+} // namespace
+
+bool IsTrue(bool condition) { return condition; }
+
+bool AlwaysTrue() {
+#if GTEST_HAS_EXCEPTIONS
+ // This condition is always false so AlwaysTrue() never actually throws,
+ // but it makes the compiler think that it may throw.
+ if (IsTrue(false)) throw ClassUniqueToAlwaysTrue();
+#endif // GTEST_HAS_EXCEPTIONS
+ return true;
+}
+
+// If *pstr starts with the given prefix, modifies *pstr to be right
+// past the prefix and returns true; otherwise leaves *pstr unchanged
+// and returns false. None of pstr, *pstr, and prefix can be NULL.
+bool SkipPrefix(const char* prefix, const char** pstr) {
+ const size_t prefix_len = strlen(prefix);
+ if (strncmp(*pstr, prefix, prefix_len) == 0) {
+ *pstr += prefix_len;
+ return true;
+ }
+ return false;
+}
+
+// Parses a string as a command line flag. The string should have
+// the format "--flag=value". When def_optional is true, the "=value"
+// part can be omitted.
+//
+// Returns the value of the flag, or NULL if the parsing failed.
+static const char* ParseFlagValue(const char* str, const char* flag_name,
+ bool def_optional) {
+ // str and flag must not be NULL.
+ if (str == nullptr || flag_name == nullptr) return nullptr;
+
+ // The flag must start with "--" followed by GTEST_FLAG_PREFIX_.
+ const std::string flag_str =
+ std::string("--") + GTEST_FLAG_PREFIX_ + flag_name;
+ const size_t flag_len = flag_str.length();
+ if (strncmp(str, flag_str.c_str(), flag_len) != 0) return nullptr;
+
+ // Skips the flag name.
+ const char* flag_end = str + flag_len;
+
+ // When def_optional is true, it's OK to not have a "=value" part.
+ if (def_optional && (flag_end[0] == '\0')) {
+ return flag_end;
+ }
+
+ // If def_optional is true and there are more characters after the
+ // flag name, or if def_optional is false, there must be a '=' after
+ // the flag name.
+ if (flag_end[0] != '=') return nullptr;
+
+ // Returns the string after "=".
+ return flag_end + 1;
+}
+
+// Parses a string for a bool flag, in the form of either
+// "--flag=value" or "--flag".
+//
+// In the former case, the value is taken as true as long as it does
+// not start with '0', 'f', or 'F'.
+//
+// In the latter case, the value is taken as true.
+//
+// On success, stores the value of the flag in *value, and returns
+// true. On failure, returns false without changing *value.
+static bool ParseFlag(const char* str, const char* flag_name, bool* value) {
+ // Gets the value of the flag as a string.
+ const char* const value_str = ParseFlagValue(str, flag_name, true);
+
+ // Aborts if the parsing failed.
+ if (value_str == nullptr) return false;
+
+ // Converts the string value to a bool.
+ *value = !(*value_str == '0' || *value_str == 'f' || *value_str == 'F');
+ return true;
+}
+
+// Parses a string for an int32_t flag, in the form of "--flag=value".
+//
+// On success, stores the value of the flag in *value, and returns
+// true. On failure, returns false without changing *value.
+bool ParseFlag(const char* str, const char* flag_name, int32_t* value) {
+ // Gets the value of the flag as a string.
+ const char* const value_str = ParseFlagValue(str, flag_name, false);
+
+ // Aborts if the parsing failed.
+ if (value_str == nullptr) return false;
+
+ // Sets *value to the value of the flag.
+ return ParseInt32(Message() << "The value of flag --" << flag_name, value_str,
+ value);
+}
+
+// Parses a string for a string flag, in the form of "--flag=value".
+//
+// On success, stores the value of the flag in *value, and returns
+// true. On failure, returns false without changing *value.
+template <typename String>
+static bool ParseFlag(const char* str, const char* flag_name, String* value) {
+ // Gets the value of the flag as a string.
+ const char* const value_str = ParseFlagValue(str, flag_name, false);
+
+ // Aborts if the parsing failed.
+ if (value_str == nullptr) return false;
+
+ // Sets *value to the value of the flag.
+ *value = value_str;
+ return true;
+}
+
+// Determines whether a string has a prefix that Google Test uses for its
+// flags, i.e., starts with GTEST_FLAG_PREFIX_ or GTEST_FLAG_PREFIX_DASH_.
+// If Google Test detects that a command line flag has its prefix but is not
+// recognized, it will print its help message. Flags starting with
+// GTEST_INTERNAL_PREFIX_ followed by "internal_" are considered Google Test
+// internal flags and do not trigger the help message.
+static bool HasGoogleTestFlagPrefix(const char* str) {
+ return (SkipPrefix("--", &str) || SkipPrefix("-", &str) ||
+ SkipPrefix("/", &str)) &&
+ !SkipPrefix(GTEST_FLAG_PREFIX_ "internal_", &str) &&
+ (SkipPrefix(GTEST_FLAG_PREFIX_, &str) ||
+ SkipPrefix(GTEST_FLAG_PREFIX_DASH_, &str));
+}
+
+// Prints a string containing code-encoded text. The following escape
+// sequences can be used in the string to control the text color:
+//
+// @@ prints a single '@' character.
+// @R changes the color to red.
+// @G changes the color to green.
+// @Y changes the color to yellow.
+// @D changes to the default terminal text color.
+//
+static void PrintColorEncoded(const char* str) {
+ GTestColor color = GTestColor::kDefault; // The current color.
+
+ // Conceptually, we split the string into segments divided by escape
+ // sequences. Then we print one segment at a time. At the end of
+ // each iteration, the str pointer advances to the beginning of the
+ // next segment.
+ for (;;) {
+ const char* p = strchr(str, '@');
+ if (p == nullptr) {
+ ColoredPrintf(color, "%s", str);
+ return;
+ }
+
+ ColoredPrintf(color, "%s", std::string(str, p).c_str());
+
+ const char ch = p[1];
+ str = p + 2;
+ if (ch == '@') {
+ ColoredPrintf(color, "@");
+ } else if (ch == 'D') {
+ color = GTestColor::kDefault;
+ } else if (ch == 'R') {
+ color = GTestColor::kRed;
+ } else if (ch == 'G') {
+ color = GTestColor::kGreen;
+ } else if (ch == 'Y') {
+ color = GTestColor::kYellow;
+ } else {
+ --str;
+ }
+ }
+}
+
+static const char kColorEncodedHelpMessage[] =
+ "This program contains tests written using " GTEST_NAME_
+ ". You can use the\n"
+ "following command line flags to control its behavior:\n"
+ "\n"
+ "Test Selection:\n"
+ " @G--" GTEST_FLAG_PREFIX_
+ "list_tests@D\n"
+ " List the names of all tests instead of running them. The name of\n"
+ " TEST(Foo, Bar) is \"Foo.Bar\".\n"
+ " @G--" GTEST_FLAG_PREFIX_
+ "filter=@YPOSITIVE_PATTERNS"
+ "[@G-@YNEGATIVE_PATTERNS]@D\n"
+ " Run only the tests whose name matches one of the positive patterns "
+ "but\n"
+ " none of the negative patterns. '?' matches any single character; "
+ "'*'\n"
+ " matches any substring; ':' separates two patterns.\n"
+ " @G--" GTEST_FLAG_PREFIX_
+ "also_run_disabled_tests@D\n"
+ " Run all disabled tests too.\n"
+ "\n"
+ "Test Execution:\n"
+ " @G--" GTEST_FLAG_PREFIX_
+ "repeat=@Y[COUNT]@D\n"
+ " Run the tests repeatedly; use a negative count to repeat forever.\n"
+ " @G--" GTEST_FLAG_PREFIX_
+ "shuffle@D\n"
+ " Randomize tests' orders on every iteration.\n"
+ " @G--" GTEST_FLAG_PREFIX_
+ "random_seed=@Y[NUMBER]@D\n"
+ " Random number seed to use for shuffling test orders (between 1 and\n"
+ " 99999, or 0 to use a seed based on the current time).\n"
+ " @G--" GTEST_FLAG_PREFIX_
+ "recreate_environments_when_repeating@D\n"
+ " Sets up and tears down the global test environment on each repeat\n"
+ " of the test.\n"
+ "\n"
+ "Test Output:\n"
+ " @G--" GTEST_FLAG_PREFIX_
+ "color=@Y(@Gyes@Y|@Gno@Y|@Gauto@Y)@D\n"
+ " Enable/disable colored output. The default is @Gauto@D.\n"
+ " @G--" GTEST_FLAG_PREFIX_
+ "brief=1@D\n"
+ " Only print test failures.\n"
+ " @G--" GTEST_FLAG_PREFIX_
+ "print_time=0@D\n"
+ " Don't print the elapsed time of each test.\n"
+ " @G--" GTEST_FLAG_PREFIX_
+ "output=@Y(@Gjson@Y|@Gxml@Y)[@G:@YDIRECTORY_PATH@G" GTEST_PATH_SEP_
+ "@Y|@G:@YFILE_PATH]@D\n"
+ " Generate a JSON or XML report in the given directory or with the "
+ "given\n"
+ " file name. @YFILE_PATH@D defaults to @Gtest_detail.xml@D.\n"
+#if GTEST_CAN_STREAM_RESULTS_
+ " @G--" GTEST_FLAG_PREFIX_
+ "stream_result_to=@YHOST@G:@YPORT@D\n"
+ " Stream test results to the given server.\n"
+#endif // GTEST_CAN_STREAM_RESULTS_
+ "\n"
+ "Assertion Behavior:\n"
+#if GTEST_HAS_DEATH_TEST && !GTEST_OS_WINDOWS
+ " @G--" GTEST_FLAG_PREFIX_
+ "death_test_style=@Y(@Gfast@Y|@Gthreadsafe@Y)@D\n"
+ " Set the default death test style.\n"
+#endif // GTEST_HAS_DEATH_TEST && !GTEST_OS_WINDOWS
+ " @G--" GTEST_FLAG_PREFIX_
+ "break_on_failure@D\n"
+ " Turn assertion failures into debugger break-points.\n"
+ " @G--" GTEST_FLAG_PREFIX_
+ "throw_on_failure@D\n"
+ " Turn assertion failures into C++ exceptions for use by an external\n"
+ " test framework.\n"
+ " @G--" GTEST_FLAG_PREFIX_
+ "catch_exceptions=0@D\n"
+ " Do not report exceptions as test failures. Instead, allow them\n"
+ " to crash the program or throw a pop-up (on Windows).\n"
+ "\n"
+ "Except for @G--" GTEST_FLAG_PREFIX_
+ "list_tests@D, you can alternatively set "
+ "the corresponding\n"
+ "environment variable of a flag (all letters in upper-case). For example, "
+ "to\n"
+ "disable colored text output, you can either specify "
+ "@G--" GTEST_FLAG_PREFIX_
+ "color=no@D or set\n"
+ "the @G" GTEST_FLAG_PREFIX_UPPER_
+ "COLOR@D environment variable to @Gno@D.\n"
+ "\n"
+ "For more information, please read the " GTEST_NAME_
+ " documentation at\n"
+ "@G" GTEST_PROJECT_URL_ "@D. If you find a bug in " GTEST_NAME_
+ "\n"
+ "(not one in your own code or tests), please report it to\n"
+ "@G<" GTEST_DEV_EMAIL_ ">@D.\n";
+
+static bool ParseGoogleTestFlag(const char* const arg) {
+#define GTEST_INTERNAL_PARSE_FLAG(flag_name) \
+ do { \
+ auto value = GTEST_FLAG_GET(flag_name); \
+ if (ParseFlag(arg, #flag_name, &value)) { \
+ GTEST_FLAG_SET(flag_name, value); \
+ return true; \
+ } \
+ } while (false)
+
+ GTEST_INTERNAL_PARSE_FLAG(also_run_disabled_tests);
+ GTEST_INTERNAL_PARSE_FLAG(break_on_failure);
+ GTEST_INTERNAL_PARSE_FLAG(catch_exceptions);
+ GTEST_INTERNAL_PARSE_FLAG(color);
+ GTEST_INTERNAL_PARSE_FLAG(death_test_style);
+ GTEST_INTERNAL_PARSE_FLAG(death_test_use_fork);
+ GTEST_INTERNAL_PARSE_FLAG(fail_fast);
+ GTEST_INTERNAL_PARSE_FLAG(filter);
+ GTEST_INTERNAL_PARSE_FLAG(internal_run_death_test);
+ GTEST_INTERNAL_PARSE_FLAG(list_tests);
+ GTEST_INTERNAL_PARSE_FLAG(output);
+ GTEST_INTERNAL_PARSE_FLAG(brief);
+ GTEST_INTERNAL_PARSE_FLAG(print_time);
+ GTEST_INTERNAL_PARSE_FLAG(print_utf8);
+ GTEST_INTERNAL_PARSE_FLAG(random_seed);
+ GTEST_INTERNAL_PARSE_FLAG(repeat);
+ GTEST_INTERNAL_PARSE_FLAG(recreate_environments_when_repeating);
+ GTEST_INTERNAL_PARSE_FLAG(shuffle);
+ GTEST_INTERNAL_PARSE_FLAG(stack_trace_depth);
+ GTEST_INTERNAL_PARSE_FLAG(stream_result_to);
+ GTEST_INTERNAL_PARSE_FLAG(throw_on_failure);
+ return false;
+}
+
+#if GTEST_USE_OWN_FLAGFILE_FLAG_
+static void LoadFlagsFromFile(const std::string& path) {
+ FILE* flagfile = posix::FOpen(path.c_str(), "r");
+ if (!flagfile) {
+ GTEST_LOG_(FATAL) << "Unable to open file \"" << GTEST_FLAG_GET(flagfile)
+ << "\"";
+ }
+ std::string contents(ReadEntireFile(flagfile));
+ posix::FClose(flagfile);
+ std::vector<std::string> lines;
+ SplitString(contents, '\n', &lines);
+ for (size_t i = 0; i < lines.size(); ++i) {
+ if (lines[i].empty()) continue;
+ if (!ParseGoogleTestFlag(lines[i].c_str())) g_help_flag = true;
+ }
+}
+#endif // GTEST_USE_OWN_FLAGFILE_FLAG_
+
+// Parses the command line for Google Test flags, without initializing
+// other parts of Google Test. The type parameter CharType can be
+// instantiated to either char or wchar_t.
+template <typename CharType>
+void ParseGoogleTestFlagsOnlyImpl(int* argc, CharType** argv) {
+ std::string flagfile_value;
+ for (int i = 1; i < *argc; i++) {
+ const std::string arg_string = StreamableToString(argv[i]);
+ const char* const arg = arg_string.c_str();
+
+ using internal::ParseFlag;
+
+ bool remove_flag = false;
+ if (ParseGoogleTestFlag(arg)) {
+ remove_flag = true;
+#if GTEST_USE_OWN_FLAGFILE_FLAG_
+ } else if (ParseFlag(arg, "flagfile", &flagfile_value)) {
+ GTEST_FLAG_SET(flagfile, flagfile_value);
+ LoadFlagsFromFile(flagfile_value);
+ remove_flag = true;
+#endif // GTEST_USE_OWN_FLAGFILE_FLAG_
+ } else if (arg_string == "--help" || HasGoogleTestFlagPrefix(arg)) {
+ // Both help flag and unrecognized Google Test flags (excluding
+ // internal ones) trigger help display.
+ g_help_flag = true;
+ }
+
+ if (remove_flag) {
+ // Shift the remainder of the argv list left by one. Note
+ // that argv has (*argc + 1) elements, the last one always being
+ // NULL. The following loop moves the trailing NULL element as
+ // well.
+ for (int j = i; j != *argc; j++) {
+ argv[j] = argv[j + 1];
+ }
+
+ // Decrements the argument count.
+ (*argc)--;
+
+ // We also need to decrement the iterator as we just removed
+ // an element.
+ i--;
+ }
+ }
+
+ if (g_help_flag) {
+ // We print the help here instead of in RUN_ALL_TESTS(), as the
+ // latter may not be called at all if the user is using Google
+ // Test with another testing framework.
+ PrintColorEncoded(kColorEncodedHelpMessage);
+ }
+}
+
+// Parses the command line for Google Test flags, without initializing
+// other parts of Google Test.
+void ParseGoogleTestFlagsOnly(int* argc, char** argv) {
+#if GTEST_HAS_ABSL
+ if (*argc > 0) {
+ // absl::ParseCommandLine() requires *argc > 0.
+ auto positional_args = absl::flags_internal::ParseCommandLineImpl(
+ *argc, argv, absl::flags_internal::ArgvListAction::kRemoveParsedArgs,
+ absl::flags_internal::UsageFlagsAction::kHandleUsage,
+ absl::flags_internal::OnUndefinedFlag::kReportUndefined);
+ // Any command-line positional arguments not part of any command-line flag
+ // (or arguments to a flag) are copied back out to argv, with the program
+ // invocation name at position 0, and argc is resized. This includes
+ // positional arguments after the flag-terminating delimiter '--'.
+ // See https://abseil.io/docs/cpp/guides/flags.
+ std::copy(positional_args.begin(), positional_args.end(), argv);
+ if (static_cast<int>(positional_args.size()) < *argc) {
+ argv[positional_args.size()] = nullptr;
+ *argc = static_cast<int>(positional_args.size());
+ }
+ }
+#else
+ ParseGoogleTestFlagsOnlyImpl(argc, argv);
+#endif
+
+ // Fix the value of *_NSGetArgc() on macOS, but if and only if
+ // *_NSGetArgv() == argv
+ // Only applicable to char** version of argv
+#if GTEST_OS_MAC
+#ifndef GTEST_OS_IOS
+ if (*_NSGetArgv() == argv) {
+ *_NSGetArgc() = *argc;
+ }
+#endif
+#endif
+}
+void ParseGoogleTestFlagsOnly(int* argc, wchar_t** argv) {
+ ParseGoogleTestFlagsOnlyImpl(argc, argv);
+}
+
+// The internal implementation of InitGoogleTest().
+//
+// The type parameter CharType can be instantiated to either char or
+// wchar_t.
+template <typename CharType>
+void InitGoogleTestImpl(int* argc, CharType** argv) {
+ // We don't want to run the initialization code twice.
+ if (GTestIsInitialized()) return;
+
+ if (*argc <= 0) return;
+
+ g_argvs.clear();
+ for (int i = 0; i != *argc; i++) {
+ g_argvs.push_back(StreamableToString(argv[i]));
+ }
+
+#if GTEST_HAS_ABSL
+ absl::InitializeSymbolizer(g_argvs[0].c_str());
+
+ // When using the Abseil Flags library, set the program usage message to the
+ // help message, but remove the color-encoding from the message first.
+ absl::SetProgramUsageMessage(absl::StrReplaceAll(
+ kColorEncodedHelpMessage,
+ {{"@D", ""}, {"@R", ""}, {"@G", ""}, {"@Y", ""}, {"@@", "@"}}));
+#endif // GTEST_HAS_ABSL
+
+ ParseGoogleTestFlagsOnly(argc, argv);
+ GetUnitTestImpl()->PostFlagParsingInit();
+}
+
+} // namespace internal
+
+// Initializes Google Test. This must be called before calling
+// RUN_ALL_TESTS(). In particular, it parses a command line for the
+// flags that Google Test recognizes. Whenever a Google Test flag is
+// seen, it is removed from argv, and *argc is decremented.
+//
+// No value is returned. Instead, the Google Test flag variables are
+// updated.
+//
+// Calling the function for the second time has no user-visible effect.
+void InitGoogleTest(int* argc, char** argv) {
+#if defined(GTEST_CUSTOM_INIT_GOOGLE_TEST_FUNCTION_)
+ GTEST_CUSTOM_INIT_GOOGLE_TEST_FUNCTION_(argc, argv);
+#else // defined(GTEST_CUSTOM_INIT_GOOGLE_TEST_FUNCTION_)
+ internal::InitGoogleTestImpl(argc, argv);
+#endif // defined(GTEST_CUSTOM_INIT_GOOGLE_TEST_FUNCTION_)
+}
+
+// This overloaded version can be used in Windows programs compiled in
+// UNICODE mode.
+void InitGoogleTest(int* argc, wchar_t** argv) {
+#if defined(GTEST_CUSTOM_INIT_GOOGLE_TEST_FUNCTION_)
+ GTEST_CUSTOM_INIT_GOOGLE_TEST_FUNCTION_(argc, argv);
+#else // defined(GTEST_CUSTOM_INIT_GOOGLE_TEST_FUNCTION_)
+ internal::InitGoogleTestImpl(argc, argv);
+#endif // defined(GTEST_CUSTOM_INIT_GOOGLE_TEST_FUNCTION_)
+}
+
+// This overloaded version can be used on Arduino/embedded platforms where
+// there is no argc/argv.
+void InitGoogleTest() {
+ // Since Arduino doesn't have a command line, fake out the argc/argv arguments
+ int argc = 1;
+ const auto arg0 = "dummy";
+ char* argv0 = const_cast<char*>(arg0);
+ char** argv = &argv0;
+
+#if defined(GTEST_CUSTOM_INIT_GOOGLE_TEST_FUNCTION_)
+ GTEST_CUSTOM_INIT_GOOGLE_TEST_FUNCTION_(&argc, argv);
+#else // defined(GTEST_CUSTOM_INIT_GOOGLE_TEST_FUNCTION_)
+ internal::InitGoogleTestImpl(&argc, argv);
+#endif // defined(GTEST_CUSTOM_INIT_GOOGLE_TEST_FUNCTION_)
+}
+
+#if !defined(GTEST_CUSTOM_TEMPDIR_FUNCTION_)
+// Return value of first environment variable that is set and contains
+// a non-empty string. If there are none, return the "fallback" string.
+// Since we like the temporary directory to have a directory separator suffix,
+// add it if not provided in the environment variable value.
+static std::string GetTempDirFromEnv(
+ std::initializer_list<const char*> environment_variables,
+ const char* fallback, char separator) {
+ for (const char* variable_name : environment_variables) {
+ const char* value = internal::posix::GetEnv(variable_name);
+ if (value != nullptr && value[0] != '\0') {
+ if (value[strlen(value) - 1] != separator) {
+ return std::string(value).append(1, separator);
+ }
+ return value;
+ }
+ }
+ return fallback;
+}
+#endif
+
+std::string TempDir() {
+#if defined(GTEST_CUSTOM_TEMPDIR_FUNCTION_)
+ return GTEST_CUSTOM_TEMPDIR_FUNCTION_();
+#elif GTEST_OS_WINDOWS || GTEST_OS_WINDOWS_MOBILE
+ return GetTempDirFromEnv({"TEST_TMPDIR", "TEMP"}, "\\temp\\", '\\');
+#elif GTEST_OS_LINUX_ANDROID
+ return GetTempDirFromEnv({"TEST_TMPDIR", "TMPDIR"}, "/data/local/tmp/", '/');
+#else
+ return GetTempDirFromEnv({"TEST_TMPDIR", "TMPDIR"}, "/tmp/", '/');
+#endif
+}
+
+// Class ScopedTrace
+
+// Pushes the given source file location and message onto a per-thread
+// trace stack maintained by Google Test.
+void ScopedTrace::PushTrace(const char* file, int line, std::string message) {
+ internal::TraceInfo trace;
+ trace.file = file;
+ trace.line = line;
+ trace.message.swap(message);
+
+ UnitTest::GetInstance()->PushGTestTrace(trace);
+}
+
+// Pops the info pushed by the c'tor.
+ScopedTrace::~ScopedTrace() GTEST_LOCK_EXCLUDED_(&UnitTest::mutex_) {
+ UnitTest::GetInstance()->PopGTestTrace();
+}
+
+} // namespace testing
diff --git a/media/libvpx/libvpx/third_party/googletest/src/src/gtest_main.cc b/media/libvpx/libvpx/third_party/googletest/src/src/gtest_main.cc
new file mode 100644
index 0000000000..44976375c9
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/googletest/src/src/gtest_main.cc
@@ -0,0 +1,53 @@
+// Copyright 2006, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <cstdio>
+
+#include "gtest/gtest.h"
+
+#if GTEST_OS_ESP8266 || GTEST_OS_ESP32
+#if GTEST_OS_ESP8266
+extern "C" {
+#endif
+void setup() { testing::InitGoogleTest(); }
+
+void loop() { RUN_ALL_TESTS(); }
+
+#if GTEST_OS_ESP8266
+}
+#endif
+
+#else
+
+GTEST_API_ int main(int argc, char **argv) {
+ printf("Running main() from %s\n", __FILE__);
+ testing::InitGoogleTest(&argc, argv);
+ return RUN_ALL_TESTS();
+}
+#endif
diff --git a/media/libvpx/libvpx/third_party/libyuv/LICENSE b/media/libvpx/libvpx/third_party/libyuv/LICENSE
new file mode 100644
index 0000000000..c911747a6b
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/libyuv/LICENSE
@@ -0,0 +1,29 @@
+Copyright 2011 The LibYuv Project Authors. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+
+ * Neither the name of Google nor the names of its contributors may
+ be used to endorse or promote products derived from this software
+ without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/media/libvpx/libvpx/third_party/libyuv/README.libvpx b/media/libvpx/libvpx/third_party/libyuv/README.libvpx
new file mode 100644
index 0000000000..9519dc4bee
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/libyuv/README.libvpx
@@ -0,0 +1,23 @@
+Name: libyuv
+URL: https://chromium.googlesource.com/libyuv/libyuv
+Version: a37e7bfece9e0676ae90a1700b0ec85b0f4f22a1
+License: BSD
+License File: LICENSE
+
+Description:
+libyuv is an open source project that includes YUV conversion and scaling
+functionality.
+
+The optimized scaler in libyuv is used in the multiple resolution encoder
+example which down-samples the original input video (f.g. 1280x720) a number of
+times in order to encode multiple resolution bit streams.
+
+Local Modifications:
+Disable ARGBToRGB24Row_AVX512VBMI due to build failure on Mac.
+rm libyuv/include/libyuv.h libyuv/include/libyuv/compare_row.h
+mv libyuv/include tmp/
+mv libyuv/source tmp/
+mv libyuv/LICENSE tmp/
+rm -rf libyuv
+
+mv tmp/* third_party/libyuv/
diff --git a/media/libvpx/libvpx/third_party/libyuv/include/libyuv/basic_types.h b/media/libvpx/libvpx/third_party/libyuv/include/libyuv/basic_types.h
new file mode 100644
index 0000000000..01d9dfc773
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/libyuv/include/libyuv/basic_types.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright 2011 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef INCLUDE_LIBYUV_BASIC_TYPES_H_
+#define INCLUDE_LIBYUV_BASIC_TYPES_H_
+
+#include <stddef.h> // For size_t and NULL
+
+#if !defined(INT_TYPES_DEFINED) && !defined(GG_LONGLONG)
+#define INT_TYPES_DEFINED
+
+#if defined(_MSC_VER) && (_MSC_VER < 1600)
+#include <sys/types.h> // for uintptr_t on x86
+typedef unsigned __int64 uint64_t;
+typedef __int64 int64_t;
+typedef unsigned int uint32_t;
+typedef int int32_t;
+typedef unsigned short uint16_t;
+typedef short int16_t;
+typedef unsigned char uint8_t;
+typedef signed char int8_t;
+#else
+#include <stdint.h> // for uintptr_t and C99 types
+#endif // defined(_MSC_VER) && (_MSC_VER < 1600)
+typedef uint64_t uint64;
+typedef int64_t int64;
+typedef uint32_t uint32;
+typedef int32_t int32;
+typedef uint16_t uint16;
+typedef int16_t int16;
+typedef uint8_t uint8;
+typedef int8_t int8;
+#endif // INT_TYPES_DEFINED
+
+#if !defined(LIBYUV_API)
+#if defined(_WIN32) || defined(__CYGWIN__)
+#if defined(LIBYUV_BUILDING_SHARED_LIBRARY)
+#define LIBYUV_API __declspec(dllexport)
+#elif defined(LIBYUV_USING_SHARED_LIBRARY)
+#define LIBYUV_API __declspec(dllimport)
+#else
+#define LIBYUV_API
+#endif // LIBYUV_BUILDING_SHARED_LIBRARY
+#elif defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__APPLE__) && \
+ (defined(LIBYUV_BUILDING_SHARED_LIBRARY) || \
+ defined(LIBYUV_USING_SHARED_LIBRARY))
+#define LIBYUV_API __attribute__((visibility("default")))
+#else
+#define LIBYUV_API
+#endif // __GNUC__
+#endif // LIBYUV_API
+
+// TODO(fbarchard): Remove bool macros.
+#define LIBYUV_BOOL int
+#define LIBYUV_FALSE 0
+#define LIBYUV_TRUE 1
+
+#endif // INCLUDE_LIBYUV_BASIC_TYPES_H_
diff --git a/media/libvpx/libvpx/third_party/libyuv/include/libyuv/compare.h b/media/libvpx/libvpx/third_party/libyuv/include/libyuv/compare.h
new file mode 100644
index 0000000000..3353ad71c6
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/libyuv/include/libyuv/compare.h
@@ -0,0 +1,111 @@
+/*
+ * Copyright 2011 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef INCLUDE_LIBYUV_COMPARE_H_
+#define INCLUDE_LIBYUV_COMPARE_H_
+
+#include "libyuv/basic_types.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+// Compute a hash for specified memory. Seed of 5381 recommended.
+LIBYUV_API
+uint32_t HashDjb2(const uint8_t* src, uint64_t count, uint32_t seed);
+
+// Hamming Distance
+LIBYUV_API
+uint64_t ComputeHammingDistance(const uint8_t* src_a,
+ const uint8_t* src_b,
+ int count);
+
+// Scan an opaque argb image and return fourcc based on alpha offset.
+// Returns FOURCC_ARGB, FOURCC_BGRA, or 0 if unknown.
+LIBYUV_API
+uint32_t ARGBDetect(const uint8_t* argb,
+ int stride_argb,
+ int width,
+ int height);
+
+// Sum Square Error - used to compute Mean Square Error or PSNR.
+LIBYUV_API
+uint64_t ComputeSumSquareError(const uint8_t* src_a,
+ const uint8_t* src_b,
+ int count);
+
+LIBYUV_API
+uint64_t ComputeSumSquareErrorPlane(const uint8_t* src_a,
+ int stride_a,
+ const uint8_t* src_b,
+ int stride_b,
+ int width,
+ int height);
+
+static const int kMaxPsnr = 128;
+
+LIBYUV_API
+double SumSquareErrorToPsnr(uint64_t sse, uint64_t count);
+
+LIBYUV_API
+double CalcFramePsnr(const uint8_t* src_a,
+ int stride_a,
+ const uint8_t* src_b,
+ int stride_b,
+ int width,
+ int height);
+
+LIBYUV_API
+double I420Psnr(const uint8_t* src_y_a,
+ int stride_y_a,
+ const uint8_t* src_u_a,
+ int stride_u_a,
+ const uint8_t* src_v_a,
+ int stride_v_a,
+ const uint8_t* src_y_b,
+ int stride_y_b,
+ const uint8_t* src_u_b,
+ int stride_u_b,
+ const uint8_t* src_v_b,
+ int stride_v_b,
+ int width,
+ int height);
+
+LIBYUV_API
+double CalcFrameSsim(const uint8_t* src_a,
+ int stride_a,
+ const uint8_t* src_b,
+ int stride_b,
+ int width,
+ int height);
+
+LIBYUV_API
+double I420Ssim(const uint8_t* src_y_a,
+ int stride_y_a,
+ const uint8_t* src_u_a,
+ int stride_u_a,
+ const uint8_t* src_v_a,
+ int stride_v_a,
+ const uint8_t* src_y_b,
+ int stride_y_b,
+ const uint8_t* src_u_b,
+ int stride_u_b,
+ const uint8_t* src_v_b,
+ int stride_v_b,
+ int width,
+ int height);
+
+#ifdef __cplusplus
+} // extern "C"
+} // namespace libyuv
+#endif
+
+#endif // INCLUDE_LIBYUV_COMPARE_H_
diff --git a/media/libvpx/libvpx/third_party/libyuv/include/libyuv/convert.h b/media/libvpx/libvpx/third_party/libyuv/include/libyuv/convert.h
new file mode 100644
index 0000000000..d12ef24f79
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/libyuv/include/libyuv/convert.h
@@ -0,0 +1,406 @@
+/*
+ * Copyright 2011 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef INCLUDE_LIBYUV_CONVERT_H_
+#define INCLUDE_LIBYUV_CONVERT_H_
+
+#include "libyuv/basic_types.h"
+
+#include "libyuv/rotate.h" // For enum RotationMode.
+
+// TODO(fbarchard): fix WebRTC source to include following libyuv headers:
+#include "libyuv/convert_argb.h" // For WebRTC I420ToARGB. b/620
+#include "libyuv/convert_from.h" // For WebRTC ConvertFromI420. b/620
+#include "libyuv/planar_functions.h" // For WebRTC I420Rect, CopyPlane. b/618
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+// Convert I444 to I420.
+LIBYUV_API
+int I444ToI420(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height);
+
+// Convert I422 to I420.
+LIBYUV_API
+int I422ToI420(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height);
+
+// Copy I420 to I420.
+#define I420ToI420 I420Copy
+LIBYUV_API
+int I420Copy(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height);
+
+// Copy I010 to I010
+#define I010ToI010 I010Copy
+#define H010ToH010 I010Copy
+LIBYUV_API
+int I010Copy(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint16_t* dst_y,
+ int dst_stride_y,
+ uint16_t* dst_u,
+ int dst_stride_u,
+ uint16_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height);
+
+// Convert 10 bit YUV to 8 bit
+#define H010ToH420 I010ToI420
+LIBYUV_API
+int I010ToI420(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height);
+
+// Convert I400 (grey) to I420.
+LIBYUV_API
+int I400ToI420(const uint8_t* src_y,
+ int src_stride_y,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height);
+
+#define J400ToJ420 I400ToI420
+
+// Convert NV12 to I420.
+LIBYUV_API
+int NV12ToI420(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_uv,
+ int src_stride_uv,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height);
+
+// Convert NV21 to I420.
+LIBYUV_API
+int NV21ToI420(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_vu,
+ int src_stride_vu,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height);
+
+// Convert YUY2 to I420.
+LIBYUV_API
+int YUY2ToI420(const uint8_t* src_yuy2,
+ int src_stride_yuy2,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height);
+
+// Convert UYVY to I420.
+LIBYUV_API
+int UYVYToI420(const uint8_t* src_uyvy,
+ int src_stride_uyvy,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height);
+
+// Convert M420 to I420.
+LIBYUV_API
+int M420ToI420(const uint8_t* src_m420,
+ int src_stride_m420,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height);
+
+// Convert Android420 to I420.
+LIBYUV_API
+int Android420ToI420(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ int src_pixel_stride_uv,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height);
+
+// ARGB little endian (bgra in memory) to I420.
+LIBYUV_API
+int ARGBToI420(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height);
+
+// BGRA little endian (argb in memory) to I420.
+LIBYUV_API
+int BGRAToI420(const uint8_t* src_bgra,
+ int src_stride_bgra,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height);
+
+// ABGR little endian (rgba in memory) to I420.
+LIBYUV_API
+int ABGRToI420(const uint8_t* src_abgr,
+ int src_stride_abgr,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height);
+
+// RGBA little endian (abgr in memory) to I420.
+LIBYUV_API
+int RGBAToI420(const uint8_t* src_rgba,
+ int src_stride_rgba,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height);
+
+// RGB little endian (bgr in memory) to I420.
+LIBYUV_API
+int RGB24ToI420(const uint8_t* src_rgb24,
+ int src_stride_rgb24,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height);
+
+// RGB big endian (rgb in memory) to I420.
+LIBYUV_API
+int RAWToI420(const uint8_t* src_raw,
+ int src_stride_raw,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height);
+
+// RGB16 (RGBP fourcc) little endian to I420.
+LIBYUV_API
+int RGB565ToI420(const uint8_t* src_rgb565,
+ int src_stride_rgb565,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height);
+
+// RGB15 (RGBO fourcc) little endian to I420.
+LIBYUV_API
+int ARGB1555ToI420(const uint8_t* src_argb1555,
+ int src_stride_argb1555,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height);
+
+// RGB12 (R444 fourcc) little endian to I420.
+LIBYUV_API
+int ARGB4444ToI420(const uint8_t* src_argb4444,
+ int src_stride_argb4444,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height);
+
+#ifdef HAVE_JPEG
+// src_width/height provided by capture.
+// dst_width/height for clipping determine final size.
+LIBYUV_API
+int MJPGToI420(const uint8_t* sample,
+ size_t sample_size,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height);
+
+// Query size of MJPG in pixels.
+LIBYUV_API
+int MJPGSize(const uint8_t* sample,
+ size_t sample_size,
+ int* width,
+ int* height);
+#endif
+
+// Convert camera sample to I420 with cropping, rotation and vertical flip.
+// "src_size" is needed to parse MJPG.
+// "dst_stride_y" number of bytes in a row of the dst_y plane.
+// Normally this would be the same as dst_width, with recommended alignment
+// to 16 bytes for better efficiency.
+// If rotation of 90 or 270 is used, stride is affected. The caller should
+// allocate the I420 buffer according to rotation.
+// "dst_stride_u" number of bytes in a row of the dst_u plane.
+// Normally this would be the same as (dst_width + 1) / 2, with
+// recommended alignment to 16 bytes for better efficiency.
+// If rotation of 90 or 270 is used, stride is affected.
+// "crop_x" and "crop_y" are starting position for cropping.
+// To center, crop_x = (src_width - dst_width) / 2
+// crop_y = (src_height - dst_height) / 2
+// "src_width" / "src_height" is size of src_frame in pixels.
+// "src_height" can be negative indicating a vertically flipped image source.
+// "crop_width" / "crop_height" is the size to crop the src to.
+// Must be less than or equal to src_width/src_height
+// Cropping parameters are pre-rotation.
+// "rotation" can be 0, 90, 180 or 270.
+// "fourcc" is a fourcc. ie 'I420', 'YUY2'
+// Returns 0 for successful; -1 for invalid parameter. Non-zero for failure.
+LIBYUV_API
+int ConvertToI420(const uint8_t* sample,
+ size_t sample_size,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int crop_x,
+ int crop_y,
+ int src_width,
+ int src_height,
+ int crop_width,
+ int crop_height,
+ enum RotationMode rotation,
+ uint32_t fourcc);
+
+#ifdef __cplusplus
+} // extern "C"
+} // namespace libyuv
+#endif
+
+#endif // INCLUDE_LIBYUV_CONVERT_H_
diff --git a/media/libvpx/libvpx/third_party/libyuv/include/libyuv/convert_argb.h b/media/libvpx/libvpx/third_party/libyuv/include/libyuv/convert_argb.h
new file mode 100644
index 0000000000..ab772b6c32
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/libyuv/include/libyuv/convert_argb.h
@@ -0,0 +1,687 @@
+/*
+ * Copyright 2012 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef INCLUDE_LIBYUV_CONVERT_ARGB_H_
+#define INCLUDE_LIBYUV_CONVERT_ARGB_H_
+
+#include "libyuv/basic_types.h"
+
+#include "libyuv/rotate.h" // For enum RotationMode.
+
+// TODO(fbarchard): This set of functions should exactly match convert.h
+// TODO(fbarchard): Add tests. Create random content of right size and convert
+// with C vs Opt and or to I420 and compare.
+// TODO(fbarchard): Some of these functions lack parameter setting.
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+// Alias.
+#define ARGBToARGB ARGBCopy
+
+// Copy ARGB to ARGB.
+LIBYUV_API
+int ARGBCopy(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height);
+
+// Convert I420 to ARGB.
+LIBYUV_API
+int I420ToARGB(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height);
+
+// Duplicate prototype for function in convert_from.h for remoting.
+LIBYUV_API
+int I420ToABGR(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_abgr,
+ int dst_stride_abgr,
+ int width,
+ int height);
+
+// Convert I010 to ARGB.
+LIBYUV_API
+int I010ToARGB(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height);
+
+// Convert I010 to ARGB.
+LIBYUV_API
+int I010ToARGB(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height);
+
+// Convert I010 to ABGR.
+LIBYUV_API
+int I010ToABGR(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_abgr,
+ int dst_stride_abgr,
+ int width,
+ int height);
+
+// Convert H010 to ARGB.
+LIBYUV_API
+int H010ToARGB(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height);
+
+// Convert H010 to ABGR.
+LIBYUV_API
+int H010ToABGR(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_abgr,
+ int dst_stride_abgr,
+ int width,
+ int height);
+
+// Convert I422 to ARGB.
+LIBYUV_API
+int I422ToARGB(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height);
+
+// Convert I444 to ARGB.
+LIBYUV_API
+int I444ToARGB(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height);
+
+// Convert J444 to ARGB.
+LIBYUV_API
+int J444ToARGB(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height);
+
+// Convert I444 to ABGR.
+LIBYUV_API
+int I444ToABGR(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_abgr,
+ int dst_stride_abgr,
+ int width,
+ int height);
+
+// Convert I420 with Alpha to preattenuated ARGB.
+LIBYUV_API
+int I420AlphaToARGB(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ const uint8_t* src_a,
+ int src_stride_a,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height,
+ int attenuate);
+
+// Convert I420 with Alpha to preattenuated ABGR.
+LIBYUV_API
+int I420AlphaToABGR(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ const uint8_t* src_a,
+ int src_stride_a,
+ uint8_t* dst_abgr,
+ int dst_stride_abgr,
+ int width,
+ int height,
+ int attenuate);
+
+// Convert I400 (grey) to ARGB. Reverse of ARGBToI400.
+LIBYUV_API
+int I400ToARGB(const uint8_t* src_y,
+ int src_stride_y,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height);
+
+// Convert J400 (jpeg grey) to ARGB.
+LIBYUV_API
+int J400ToARGB(const uint8_t* src_y,
+ int src_stride_y,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height);
+
+// Alias.
+#define YToARGB I400ToARGB
+
+// Convert NV12 to ARGB.
+LIBYUV_API
+int NV12ToARGB(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_uv,
+ int src_stride_uv,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height);
+
+// Convert NV21 to ARGB.
+LIBYUV_API
+int NV21ToARGB(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_vu,
+ int src_stride_vu,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height);
+
+// Convert NV12 to ABGR.
+int NV12ToABGR(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_uv,
+ int src_stride_uv,
+ uint8_t* dst_abgr,
+ int dst_stride_abgr,
+ int width,
+ int height);
+
+// Convert NV21 to ABGR.
+LIBYUV_API
+int NV21ToABGR(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_vu,
+ int src_stride_vu,
+ uint8_t* dst_abgr,
+ int dst_stride_abgr,
+ int width,
+ int height);
+
+// Convert NV12 to RGB24.
+LIBYUV_API
+int NV12ToRGB24(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_uv,
+ int src_stride_uv,
+ uint8_t* dst_rgb24,
+ int dst_stride_rgb24,
+ int width,
+ int height);
+
+// Convert NV21 to RGB24.
+LIBYUV_API
+int NV21ToRGB24(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_vu,
+ int src_stride_vu,
+ uint8_t* dst_rgb24,
+ int dst_stride_rgb24,
+ int width,
+ int height);
+
+// Convert M420 to ARGB.
+LIBYUV_API
+int M420ToARGB(const uint8_t* src_m420,
+ int src_stride_m420,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height);
+
+// Convert YUY2 to ARGB.
+LIBYUV_API
+int YUY2ToARGB(const uint8_t* src_yuy2,
+ int src_stride_yuy2,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height);
+
+// Convert UYVY to ARGB.
+LIBYUV_API
+int UYVYToARGB(const uint8_t* src_uyvy,
+ int src_stride_uyvy,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height);
+
+// Convert J420 to ARGB.
+LIBYUV_API
+int J420ToARGB(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height);
+
+// Convert J422 to ARGB.
+LIBYUV_API
+int J422ToARGB(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height);
+
+// Convert J420 to ABGR.
+LIBYUV_API
+int J420ToABGR(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_abgr,
+ int dst_stride_abgr,
+ int width,
+ int height);
+
+// Convert J422 to ABGR.
+LIBYUV_API
+int J422ToABGR(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_abgr,
+ int dst_stride_abgr,
+ int width,
+ int height);
+
+// Convert H420 to ARGB.
+LIBYUV_API
+int H420ToARGB(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height);
+
+// Convert H422 to ARGB.
+LIBYUV_API
+int H422ToARGB(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height);
+
+// Convert H420 to ABGR.
+LIBYUV_API
+int H420ToABGR(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_abgr,
+ int dst_stride_abgr,
+ int width,
+ int height);
+
+// Convert H422 to ABGR.
+LIBYUV_API
+int H422ToABGR(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_abgr,
+ int dst_stride_abgr,
+ int width,
+ int height);
+
+// Convert H010 to ARGB.
+LIBYUV_API
+int H010ToARGB(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height);
+
+// Convert I010 to AR30.
+LIBYUV_API
+int I010ToAR30(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_ar30,
+ int dst_stride_ar30,
+ int width,
+ int height);
+
+// Convert H010 to AR30.
+LIBYUV_API
+int H010ToAR30(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_ar30,
+ int dst_stride_ar30,
+ int width,
+ int height);
+
+// Convert I010 to AB30.
+LIBYUV_API
+int I010ToAB30(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_ab30,
+ int dst_stride_ab30,
+ int width,
+ int height);
+
+// Convert H010 to AB30.
+LIBYUV_API
+int H010ToAB30(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_ab30,
+ int dst_stride_ab30,
+ int width,
+ int height);
+
+// BGRA little endian (argb in memory) to ARGB.
+LIBYUV_API
+int BGRAToARGB(const uint8_t* src_bgra,
+ int src_stride_bgra,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height);
+
+// ABGR little endian (rgba in memory) to ARGB.
+LIBYUV_API
+int ABGRToARGB(const uint8_t* src_abgr,
+ int src_stride_abgr,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height);
+
+// RGBA little endian (abgr in memory) to ARGB.
+LIBYUV_API
+int RGBAToARGB(const uint8_t* src_rgba,
+ int src_stride_rgba,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height);
+
+// Deprecated function name.
+#define BG24ToARGB RGB24ToARGB
+
+// RGB little endian (bgr in memory) to ARGB.
+LIBYUV_API
+int RGB24ToARGB(const uint8_t* src_rgb24,
+ int src_stride_rgb24,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height);
+
+// RGB big endian (rgb in memory) to ARGB.
+LIBYUV_API
+int RAWToARGB(const uint8_t* src_raw,
+ int src_stride_raw,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height);
+
+// RGB16 (RGBP fourcc) little endian to ARGB.
+LIBYUV_API
+int RGB565ToARGB(const uint8_t* src_rgb565,
+ int src_stride_rgb565,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height);
+
+// RGB15 (RGBO fourcc) little endian to ARGB.
+LIBYUV_API
+int ARGB1555ToARGB(const uint8_t* src_argb1555,
+ int src_stride_argb1555,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height);
+
+// RGB12 (R444 fourcc) little endian to ARGB.
+LIBYUV_API
+int ARGB4444ToARGB(const uint8_t* src_argb4444,
+ int src_stride_argb4444,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height);
+
+// Aliases
+#define AB30ToARGB AR30ToABGR
+#define AB30ToABGR AR30ToARGB
+#define AB30ToAR30 AR30ToAB30
+
+// Convert AR30 To ARGB.
+LIBYUV_API
+int AR30ToARGB(const uint8_t* src_ar30,
+ int src_stride_ar30,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height);
+
+// Convert AR30 To ABGR.
+LIBYUV_API
+int AR30ToABGR(const uint8_t* src_ar30,
+ int src_stride_ar30,
+ uint8_t* dst_abgr,
+ int dst_stride_abgr,
+ int width,
+ int height);
+
+// Convert AR30 To AB30.
+LIBYUV_API
+int AR30ToAB30(const uint8_t* src_ar30,
+ int src_stride_ar30,
+ uint8_t* dst_ab30,
+ int dst_stride_ab30,
+ int width,
+ int height);
+
+#ifdef HAVE_JPEG
+// src_width/height provided by capture
+// dst_width/height for clipping determine final size.
+LIBYUV_API
+int MJPGToARGB(const uint8_t* sample,
+ size_t sample_size,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height);
+#endif
+
+// Convert Android420 to ARGB.
+LIBYUV_API
+int Android420ToARGB(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ int src_pixel_stride_uv,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height);
+
+// Convert Android420 to ABGR.
+LIBYUV_API
+int Android420ToABGR(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ int src_pixel_stride_uv,
+ uint8_t* dst_abgr,
+ int dst_stride_abgr,
+ int width,
+ int height);
+
+// Convert camera sample to ARGB with cropping, rotation and vertical flip.
+// "sample_size" is needed to parse MJPG.
+// "dst_stride_argb" number of bytes in a row of the dst_argb plane.
+// Normally this would be the same as dst_width, with recommended alignment
+// to 16 bytes for better efficiency.
+// If rotation of 90 or 270 is used, stride is affected. The caller should
+// allocate the I420 buffer according to rotation.
+// "dst_stride_u" number of bytes in a row of the dst_u plane.
+// Normally this would be the same as (dst_width + 1) / 2, with
+// recommended alignment to 16 bytes for better efficiency.
+// If rotation of 90 or 270 is used, stride is affected.
+// "crop_x" and "crop_y" are starting position for cropping.
+// To center, crop_x = (src_width - dst_width) / 2
+// crop_y = (src_height - dst_height) / 2
+// "src_width" / "src_height" is size of src_frame in pixels.
+// "src_height" can be negative indicating a vertically flipped image source.
+// "crop_width" / "crop_height" is the size to crop the src to.
+// Must be less than or equal to src_width/src_height
+// Cropping parameters are pre-rotation.
+// "rotation" can be 0, 90, 180 or 270.
+// "fourcc" is a fourcc. ie 'I420', 'YUY2'
+// Returns 0 for successful; -1 for invalid parameter. Non-zero for failure.
+LIBYUV_API
+int ConvertToARGB(const uint8_t* sample,
+ size_t sample_size,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int crop_x,
+ int crop_y,
+ int src_width,
+ int src_height,
+ int crop_width,
+ int crop_height,
+ enum RotationMode rotation,
+ uint32_t fourcc);
+
+#ifdef __cplusplus
+} // extern "C"
+} // namespace libyuv
+#endif
+
+#endif // INCLUDE_LIBYUV_CONVERT_ARGB_H_
diff --git a/media/libvpx/libvpx/third_party/libyuv/include/libyuv/convert_from.h b/media/libvpx/libvpx/third_party/libyuv/include/libyuv/convert_from.h
new file mode 100644
index 0000000000..5cd8a4bfc0
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/libyuv/include/libyuv/convert_from.h
@@ -0,0 +1,342 @@
+/*
+ * Copyright 2011 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef INCLUDE_LIBYUV_CONVERT_FROM_H_
+#define INCLUDE_LIBYUV_CONVERT_FROM_H_
+
+#include "libyuv/basic_types.h"
+#include "libyuv/rotate.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+// See Also convert.h for conversions from formats to I420.
+
+// Convert 8 bit YUV to 10 bit.
+#define H420ToH010 I420ToI010
+int I420ToI010(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint16_t* dst_y,
+ int dst_stride_y,
+ uint16_t* dst_u,
+ int dst_stride_u,
+ uint16_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height);
+
+LIBYUV_API
+int I420ToI422(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height);
+
+LIBYUV_API
+int I420ToI444(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height);
+
+// Copy to I400. Source can be I420, I422, I444, I400, NV12 or NV21.
+LIBYUV_API
+int I400Copy(const uint8_t* src_y,
+ int src_stride_y,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ int width,
+ int height);
+
+LIBYUV_API
+int I420ToNV12(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_uv,
+ int dst_stride_uv,
+ int width,
+ int height);
+
+LIBYUV_API
+int I420ToNV21(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_vu,
+ int dst_stride_vu,
+ int width,
+ int height);
+
+LIBYUV_API
+int I420ToYUY2(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_yuy2,
+ int dst_stride_yuy2,
+ int width,
+ int height);
+
+LIBYUV_API
+int I420ToUYVY(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_uyvy,
+ int dst_stride_uyvy,
+ int width,
+ int height);
+
+LIBYUV_API
+int I420ToARGB(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height);
+
+LIBYUV_API
+int I420ToBGRA(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_bgra,
+ int dst_stride_bgra,
+ int width,
+ int height);
+
+LIBYUV_API
+int I420ToABGR(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_abgr,
+ int dst_stride_abgr,
+ int width,
+ int height);
+
+LIBYUV_API
+int I420ToRGBA(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_rgba,
+ int dst_stride_rgba,
+ int width,
+ int height);
+
+LIBYUV_API
+int I420ToRGB24(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_rgb24,
+ int dst_stride_rgb24,
+ int width,
+ int height);
+
+LIBYUV_API
+int I420ToRAW(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_raw,
+ int dst_stride_raw,
+ int width,
+ int height);
+
+LIBYUV_API
+int H420ToRGB24(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_rgb24,
+ int dst_stride_rgb24,
+ int width,
+ int height);
+
+LIBYUV_API
+int H420ToRAW(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_raw,
+ int dst_stride_raw,
+ int width,
+ int height);
+
+LIBYUV_API
+int I420ToRGB565(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_rgb565,
+ int dst_stride_rgb565,
+ int width,
+ int height);
+
+LIBYUV_API
+int I422ToRGB565(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_rgb565,
+ int dst_stride_rgb565,
+ int width,
+ int height);
+
+// Convert I420 To RGB565 with 4x4 dither matrix (16 bytes).
+// Values in dither matrix from 0 to 7 recommended.
+// The order of the dither matrix is first byte is upper left.
+
+LIBYUV_API
+int I420ToRGB565Dither(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_rgb565,
+ int dst_stride_rgb565,
+ const uint8_t* dither4x4,
+ int width,
+ int height);
+
+LIBYUV_API
+int I420ToARGB1555(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_argb1555,
+ int dst_stride_argb1555,
+ int width,
+ int height);
+
+LIBYUV_API
+int I420ToARGB4444(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_argb4444,
+ int dst_stride_argb4444,
+ int width,
+ int height);
+
+// Convert I420 to AR30.
+LIBYUV_API
+int I420ToAR30(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_ar30,
+ int dst_stride_ar30,
+ int width,
+ int height);
+
+// Convert H420 to AR30.
+LIBYUV_API
+int H420ToAR30(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_ar30,
+ int dst_stride_ar30,
+ int width,
+ int height);
+
+// Convert I420 to specified format.
+// "dst_sample_stride" is bytes in a row for the destination. Pass 0 if the
+// buffer has contiguous rows. Can be negative. A multiple of 16 is optimal.
+LIBYUV_API
+int ConvertFromI420(const uint8_t* y,
+ int y_stride,
+ const uint8_t* u,
+ int u_stride,
+ const uint8_t* v,
+ int v_stride,
+ uint8_t* dst_sample,
+ int dst_sample_stride,
+ int width,
+ int height,
+ uint32_t fourcc);
+
+#ifdef __cplusplus
+} // extern "C"
+} // namespace libyuv
+#endif
+
+#endif // INCLUDE_LIBYUV_CONVERT_FROM_H_
diff --git a/media/libvpx/libvpx/third_party/libyuv/include/libyuv/convert_from_argb.h b/media/libvpx/libvpx/third_party/libyuv/include/libyuv/convert_from_argb.h
new file mode 100644
index 0000000000..05c815a093
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/libyuv/include/libyuv/convert_from_argb.h
@@ -0,0 +1,287 @@
+/*
+ * Copyright 2012 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef INCLUDE_LIBYUV_CONVERT_FROM_ARGB_H_
+#define INCLUDE_LIBYUV_CONVERT_FROM_ARGB_H_
+
+#include "libyuv/basic_types.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+// Copy ARGB to ARGB.
+#define ARGBToARGB ARGBCopy
+LIBYUV_API
+int ARGBCopy(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height);
+
+// Convert ARGB To BGRA.
+LIBYUV_API
+int ARGBToBGRA(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_bgra,
+ int dst_stride_bgra,
+ int width,
+ int height);
+
+// Convert ARGB To ABGR.
+LIBYUV_API
+int ARGBToABGR(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_abgr,
+ int dst_stride_abgr,
+ int width,
+ int height);
+
+// Convert ARGB To RGBA.
+LIBYUV_API
+int ARGBToRGBA(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_rgba,
+ int dst_stride_rgba,
+ int width,
+ int height);
+
+// Aliases
+#define ARGBToAB30 ABGRToAR30
+#define ABGRToAB30 ARGBToAR30
+
+// Convert ABGR To AR30.
+LIBYUV_API
+int ABGRToAR30(const uint8_t* src_abgr,
+ int src_stride_abgr,
+ uint8_t* dst_ar30,
+ int dst_stride_ar30,
+ int width,
+ int height);
+
+// Convert ARGB To AR30.
+LIBYUV_API
+int ARGBToAR30(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_ar30,
+ int dst_stride_ar30,
+ int width,
+ int height);
+
+// Convert ARGB To RGB24.
+LIBYUV_API
+int ARGBToRGB24(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_rgb24,
+ int dst_stride_rgb24,
+ int width,
+ int height);
+
+// Convert ARGB To RAW.
+LIBYUV_API
+int ARGBToRAW(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_raw,
+ int dst_stride_raw,
+ int width,
+ int height);
+
+// Convert ARGB To RGB565.
+LIBYUV_API
+int ARGBToRGB565(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_rgb565,
+ int dst_stride_rgb565,
+ int width,
+ int height);
+
+// Convert ARGB To RGB565 with 4x4 dither matrix (16 bytes).
+// Values in dither matrix from 0 to 7 recommended.
+// The order of the dither matrix is first byte is upper left.
+// TODO(fbarchard): Consider pointer to 2d array for dither4x4.
+// const uint8_t(*dither)[4][4];
+LIBYUV_API
+int ARGBToRGB565Dither(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_rgb565,
+ int dst_stride_rgb565,
+ const uint8_t* dither4x4,
+ int width,
+ int height);
+
+// Convert ARGB To ARGB1555.
+LIBYUV_API
+int ARGBToARGB1555(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_argb1555,
+ int dst_stride_argb1555,
+ int width,
+ int height);
+
+// Convert ARGB To ARGB4444.
+LIBYUV_API
+int ARGBToARGB4444(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_argb4444,
+ int dst_stride_argb4444,
+ int width,
+ int height);
+
+// Convert ARGB To I444.
+LIBYUV_API
+int ARGBToI444(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height);
+
+// Convert ARGB To I422.
+LIBYUV_API
+int ARGBToI422(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height);
+
+// Convert ARGB To I420. (also in convert.h)
+LIBYUV_API
+int ARGBToI420(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height);
+
+// Convert ARGB to J420. (JPeg full range I420).
+LIBYUV_API
+int ARGBToJ420(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_yj,
+ int dst_stride_yj,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height);
+
+// Convert ARGB to J422.
+LIBYUV_API
+int ARGBToJ422(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_yj,
+ int dst_stride_yj,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height);
+
+// Convert ARGB to J400. (JPeg full range).
+LIBYUV_API
+int ARGBToJ400(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_yj,
+ int dst_stride_yj,
+ int width,
+ int height);
+
+// Convert ARGB to I400.
+LIBYUV_API
+int ARGBToI400(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ int width,
+ int height);
+
+// Convert ARGB to G. (Reverse of J400toARGB, which replicates G back to ARGB)
+LIBYUV_API
+int ARGBToG(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_g,
+ int dst_stride_g,
+ int width,
+ int height);
+
+// Convert ARGB To NV12.
+LIBYUV_API
+int ARGBToNV12(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_uv,
+ int dst_stride_uv,
+ int width,
+ int height);
+
+// Convert ARGB To NV21.
+LIBYUV_API
+int ARGBToNV21(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_vu,
+ int dst_stride_vu,
+ int width,
+ int height);
+
+// Convert ARGB To NV21.
+LIBYUV_API
+int ARGBToNV21(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_vu,
+ int dst_stride_vu,
+ int width,
+ int height);
+
+// Convert ARGB To YUY2.
+LIBYUV_API
+int ARGBToYUY2(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_yuy2,
+ int dst_stride_yuy2,
+ int width,
+ int height);
+
+// Convert ARGB To UYVY.
+LIBYUV_API
+int ARGBToUYVY(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_uyvy,
+ int dst_stride_uyvy,
+ int width,
+ int height);
+
+#ifdef __cplusplus
+} // extern "C"
+} // namespace libyuv
+#endif
+
+#endif // INCLUDE_LIBYUV_CONVERT_FROM_ARGB_H_
diff --git a/media/libvpx/libvpx/third_party/libyuv/include/libyuv/cpu_id.h b/media/libvpx/libvpx/third_party/libyuv/include/libyuv/cpu_id.h
new file mode 100644
index 0000000000..0229cb5e73
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/libyuv/include/libyuv/cpu_id.h
@@ -0,0 +1,119 @@
+/*
+ * Copyright 2011 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef INCLUDE_LIBYUV_CPU_ID_H_
+#define INCLUDE_LIBYUV_CPU_ID_H_
+
+#include "libyuv/basic_types.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+// Internal flag to indicate cpuid requires initialization.
+static const int kCpuInitialized = 0x1;
+
+// These flags are only valid on ARM processors.
+static const int kCpuHasARM = 0x2;
+static const int kCpuHasNEON = 0x4;
+// 0x8 reserved for future ARM flag.
+
+// These flags are only valid on x86 processors.
+static const int kCpuHasX86 = 0x10;
+static const int kCpuHasSSE2 = 0x20;
+static const int kCpuHasSSSE3 = 0x40;
+static const int kCpuHasSSE41 = 0x80;
+static const int kCpuHasSSE42 = 0x100; // unused at this time.
+static const int kCpuHasAVX = 0x200;
+static const int kCpuHasAVX2 = 0x400;
+static const int kCpuHasERMS = 0x800;
+static const int kCpuHasFMA3 = 0x1000;
+static const int kCpuHasF16C = 0x2000;
+static const int kCpuHasGFNI = 0x4000;
+static const int kCpuHasAVX512BW = 0x8000;
+static const int kCpuHasAVX512VL = 0x10000;
+static const int kCpuHasAVX512VBMI = 0x20000;
+static const int kCpuHasAVX512VBMI2 = 0x40000;
+static const int kCpuHasAVX512VBITALG = 0x80000;
+static const int kCpuHasAVX512VPOPCNTDQ = 0x100000;
+
+// These flags are only valid on MIPS processors.
+static const int kCpuHasMIPS = 0x200000;
+static const int kCpuHasMSA = 0x400000;
+
+// Optional init function. TestCpuFlag does an auto-init.
+// Returns cpu_info flags.
+LIBYUV_API
+int InitCpuFlags(void);
+
+// Detect CPU has SSE2 etc.
+// Test_flag parameter should be one of kCpuHas constants above.
+// Returns non-zero if instruction set is detected
+static __inline int TestCpuFlag(int test_flag) {
+ LIBYUV_API extern int cpu_info_;
+#ifdef __ATOMIC_RELAXED
+ int cpu_info = __atomic_load_n(&cpu_info_, __ATOMIC_RELAXED);
+#else
+ int cpu_info = cpu_info_;
+#endif
+ return (!cpu_info ? InitCpuFlags() : cpu_info) & test_flag;
+}
+
+// Internal function for parsing /proc/cpuinfo.
+LIBYUV_API
+int ArmCpuCaps(const char* cpuinfo_name);
+
+// For testing, allow CPU flags to be disabled.
+// ie MaskCpuFlags(~kCpuHasSSSE3) to disable SSSE3.
+// MaskCpuFlags(-1) to enable all cpu specific optimizations.
+// MaskCpuFlags(1) to disable all cpu specific optimizations.
+// MaskCpuFlags(0) to reset state so next call will auto init.
+// Returns cpu_info flags.
+LIBYUV_API
+int MaskCpuFlags(int enable_flags);
+
+// Sets the CPU flags to |cpu_flags|, bypassing the detection code. |cpu_flags|
+// should be a valid combination of the kCpuHas constants above and include
+// kCpuInitialized. Use this method when running in a sandboxed process where
+// the detection code might fail (as it might access /proc/cpuinfo). In such
+// cases the cpu_info can be obtained from a non sandboxed process by calling
+// InitCpuFlags() and passed to the sandboxed process (via command line
+// parameters, IPC...) which can then call this method to initialize the CPU
+// flags.
+// Notes:
+// - when specifying 0 for |cpu_flags|, the auto initialization is enabled
+// again.
+// - enabling CPU features that are not supported by the CPU will result in
+// undefined behavior.
+// TODO(fbarchard): consider writing a helper function that translates from
+// other library CPU info to libyuv CPU info and add a .md doc that explains
+// CPU detection.
+static __inline void SetCpuFlags(int cpu_flags) {
+ LIBYUV_API extern int cpu_info_;
+#ifdef __ATOMIC_RELAXED
+ __atomic_store_n(&cpu_info_, cpu_flags, __ATOMIC_RELAXED);
+#else
+ cpu_info_ = cpu_flags;
+#endif
+}
+
+// Low level cpuid for X86. Returns zeros on other CPUs.
+// eax is the info type that you want.
+// ecx is typically the cpu number, and should normally be zero.
+LIBYUV_API
+void CpuId(int info_eax, int info_ecx, int* cpu_info);
+
+#ifdef __cplusplus
+} // extern "C"
+} // namespace libyuv
+#endif
+
+#endif // INCLUDE_LIBYUV_CPU_ID_H_
diff --git a/media/libvpx/libvpx/third_party/libyuv/include/libyuv/macros_msa.h b/media/libvpx/libvpx/third_party/libyuv/include/libyuv/macros_msa.h
new file mode 100644
index 0000000000..bba0e8aeda
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/libyuv/include/libyuv/macros_msa.h
@@ -0,0 +1,233 @@
+/*
+ * Copyright 2016 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef INCLUDE_LIBYUV_MACROS_MSA_H_
+#define INCLUDE_LIBYUV_MACROS_MSA_H_
+
+#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
+#include <msa.h>
+#include <stdint.h>
+
+#if (__mips_isa_rev >= 6)
+#define LW(psrc) \
+ ({ \
+ const uint8_t* psrc_lw_m = (const uint8_t*)(psrc); \
+ uint32_t val_m; \
+ asm volatile("lw %[val_m], %[psrc_lw_m] \n" \
+ : [val_m] "=r"(val_m) \
+ : [psrc_lw_m] "m"(*psrc_lw_m)); \
+ val_m; \
+ })
+
+#if (__mips == 64)
+#define LD(psrc) \
+ ({ \
+ const uint8_t* psrc_ld_m = (const uint8_t*)(psrc); \
+ uint64_t val_m = 0; \
+ asm volatile("ld %[val_m], %[psrc_ld_m] \n" \
+ : [val_m] "=r"(val_m) \
+ : [psrc_ld_m] "m"(*psrc_ld_m)); \
+ val_m; \
+ })
+#else // !(__mips == 64)
+#define LD(psrc) \
+ ({ \
+ const uint8_t* psrc_ld_m = (const uint8_t*)(psrc); \
+ uint32_t val0_m, val1_m; \
+ uint64_t val_m = 0; \
+ val0_m = LW(psrc_ld_m); \
+ val1_m = LW(psrc_ld_m + 4); \
+ val_m = (uint64_t)(val1_m); /* NOLINT */ \
+ val_m = (uint64_t)((val_m << 32) & 0xFFFFFFFF00000000); /* NOLINT */ \
+ val_m = (uint64_t)(val_m | (uint64_t)val0_m); /* NOLINT */ \
+ val_m; \
+ })
+#endif // (__mips == 64)
+
+#define SW(val, pdst) \
+ ({ \
+ uint8_t* pdst_sw_m = (uint8_t*)(pdst); /* NOLINT */ \
+ uint32_t val_m = (val); \
+ asm volatile("sw %[val_m], %[pdst_sw_m] \n" \
+ : [pdst_sw_m] "=m"(*pdst_sw_m) \
+ : [val_m] "r"(val_m)); \
+ })
+
+#if (__mips == 64)
+#define SD(val, pdst) \
+ ({ \
+ uint8_t* pdst_sd_m = (uint8_t*)(pdst); /* NOLINT */ \
+ uint64_t val_m = (val); \
+ asm volatile("sd %[val_m], %[pdst_sd_m] \n" \
+ : [pdst_sd_m] "=m"(*pdst_sd_m) \
+ : [val_m] "r"(val_m)); \
+ })
+#else // !(__mips == 64)
+#define SD(val, pdst) \
+ ({ \
+ uint8_t* pdst_sd_m = (uint8_t*)(pdst); /* NOLINT */ \
+ uint32_t val0_m, val1_m; \
+ val0_m = (uint32_t)((val)&0x00000000FFFFFFFF); \
+ val1_m = (uint32_t)(((val) >> 32) & 0x00000000FFFFFFFF); \
+ SW(val0_m, pdst_sd_m); \
+ SW(val1_m, pdst_sd_m + 4); \
+ })
+#endif // !(__mips == 64)
+#else // !(__mips_isa_rev >= 6)
+#define LW(psrc) \
+ ({ \
+ const uint8_t* psrc_lw_m = (const uint8_t*)(psrc); \
+ uint32_t val_m; \
+ asm volatile("ulw %[val_m], %[psrc_lw_m] \n" \
+ : [val_m] "=r"(val_m) \
+ : [psrc_lw_m] "m"(*psrc_lw_m)); \
+ val_m; \
+ })
+
+#if (__mips == 64)
+#define LD(psrc) \
+ ({ \
+ const uint8_t* psrc_ld_m = (const uint8_t*)(psrc); \
+ uint64_t val_m = 0; \
+ asm volatile("uld %[val_m], %[psrc_ld_m] \n" \
+ : [val_m] "=r"(val_m) \
+ : [psrc_ld_m] "m"(*psrc_ld_m)); \
+ val_m; \
+ })
+#else // !(__mips == 64)
+#define LD(psrc) \
+ ({ \
+ const uint8_t* psrc_ld_m = (const uint8_t*)(psrc); \
+ uint32_t val0_m, val1_m; \
+ uint64_t val_m = 0; \
+ val0_m = LW(psrc_ld_m); \
+ val1_m = LW(psrc_ld_m + 4); \
+ val_m = (uint64_t)(val1_m); /* NOLINT */ \
+ val_m = (uint64_t)((val_m << 32) & 0xFFFFFFFF00000000); /* NOLINT */ \
+ val_m = (uint64_t)(val_m | (uint64_t)val0_m); /* NOLINT */ \
+ val_m; \
+ })
+#endif // (__mips == 64)
+
+#define SW(val, pdst) \
+ ({ \
+ uint8_t* pdst_sw_m = (uint8_t*)(pdst); /* NOLINT */ \
+ uint32_t val_m = (val); \
+ asm volatile("usw %[val_m], %[pdst_sw_m] \n" \
+ : [pdst_sw_m] "=m"(*pdst_sw_m) \
+ : [val_m] "r"(val_m)); \
+ })
+
+#define SD(val, pdst) \
+ ({ \
+ uint8_t* pdst_sd_m = (uint8_t*)(pdst); /* NOLINT */ \
+ uint32_t val0_m, val1_m; \
+ val0_m = (uint32_t)((val)&0x00000000FFFFFFFF); \
+ val1_m = (uint32_t)(((val) >> 32) & 0x00000000FFFFFFFF); \
+ SW(val0_m, pdst_sd_m); \
+ SW(val1_m, pdst_sd_m + 4); \
+ })
+#endif // (__mips_isa_rev >= 6)
+
+// TODO(fbarchard): Consider removing __VAR_ARGS versions.
+#define LD_B(RTYPE, psrc) *((RTYPE*)(psrc)) /* NOLINT */
+#define LD_UB(...) LD_B(const v16u8, __VA_ARGS__)
+
+#define ST_B(RTYPE, in, pdst) *((RTYPE*)(pdst)) = (in) /* NOLINT */
+#define ST_UB(...) ST_B(v16u8, __VA_ARGS__)
+
+#define ST_H(RTYPE, in, pdst) *((RTYPE*)(pdst)) = (in) /* NOLINT */
+#define ST_UH(...) ST_H(v8u16, __VA_ARGS__)
+
+/* Description : Load two vectors with 16 'byte' sized elements
+ Arguments : Inputs - psrc, stride
+ Outputs - out0, out1
+ Return Type - as per RTYPE
+ Details : Load 16 byte elements in 'out0' from (psrc)
+ Load 16 byte elements in 'out1' from (psrc + stride)
+*/
+#define LD_B2(RTYPE, psrc, stride, out0, out1) \
+ { \
+ out0 = LD_B(RTYPE, (psrc)); \
+ out1 = LD_B(RTYPE, (psrc) + stride); \
+ }
+#define LD_UB2(...) LD_B2(const v16u8, __VA_ARGS__)
+
+#define LD_B4(RTYPE, psrc, stride, out0, out1, out2, out3) \
+ { \
+ LD_B2(RTYPE, (psrc), stride, out0, out1); \
+ LD_B2(RTYPE, (psrc) + 2 * stride, stride, out2, out3); \
+ }
+#define LD_UB4(...) LD_B4(const v16u8, __VA_ARGS__)
+
+/* Description : Store two vectors with stride each having 16 'byte' sized
+ elements
+ Arguments : Inputs - in0, in1, pdst, stride
+ Details : Store 16 byte elements from 'in0' to (pdst)
+ Store 16 byte elements from 'in1' to (pdst + stride)
+*/
+#define ST_B2(RTYPE, in0, in1, pdst, stride) \
+ { \
+ ST_B(RTYPE, in0, (pdst)); \
+ ST_B(RTYPE, in1, (pdst) + stride); \
+ }
+#define ST_UB2(...) ST_B2(v16u8, __VA_ARGS__)
+
+#define ST_B4(RTYPE, in0, in1, in2, in3, pdst, stride) \
+ { \
+ ST_B2(RTYPE, in0, in1, (pdst), stride); \
+ ST_B2(RTYPE, in2, in3, (pdst) + 2 * stride, stride); \
+ }
+#define ST_UB4(...) ST_B4(v16u8, __VA_ARGS__)
+
+/* Description : Store vectors of 8 halfword elements with stride
+ Arguments : Inputs - in0, in1, pdst, stride
+ Details : Store 8 halfword elements from 'in0' to (pdst)
+ Store 8 halfword elements from 'in1' to (pdst + stride)
+*/
+#define ST_H2(RTYPE, in0, in1, pdst, stride) \
+ { \
+ ST_H(RTYPE, in0, (pdst)); \
+ ST_H(RTYPE, in1, (pdst) + stride); \
+ }
+#define ST_UH2(...) ST_H2(v8u16, __VA_ARGS__)
+
+// TODO(fbarchard): Consider using __msa_vshf_b and __msa_ilvr_b directly.
+/* Description : Shuffle byte vector elements as per mask vector
+ Arguments : Inputs - in0, in1, in2, in3, mask0, mask1
+ Outputs - out0, out1
+ Return Type - as per RTYPE
+ Details : Byte elements from 'in0' & 'in1' are copied selectively to
+ 'out0' as per control vector 'mask0'
+*/
+#define VSHF_B2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1) \
+ { \
+ out0 = (RTYPE)__msa_vshf_b((v16i8)mask0, (v16i8)in1, (v16i8)in0); \
+ out1 = (RTYPE)__msa_vshf_b((v16i8)mask1, (v16i8)in3, (v16i8)in2); \
+ }
+#define VSHF_B2_UB(...) VSHF_B2(v16u8, __VA_ARGS__)
+
+/* Description : Interleave both left and right half of input vectors
+ Arguments : Inputs - in0, in1
+ Outputs - out0, out1
+ Return Type - as per RTYPE
+ Details : Right half of byte elements from 'in0' and 'in1' are
+ interleaved and written to 'out0'
+*/
+#define ILVRL_B2(RTYPE, in0, in1, out0, out1) \
+ { \
+ out0 = (RTYPE)__msa_ilvr_b((v16i8)in0, (v16i8)in1); \
+ out1 = (RTYPE)__msa_ilvl_b((v16i8)in0, (v16i8)in1); \
+ }
+#define ILVRL_B2_UB(...) ILVRL_B2(v16u8, __VA_ARGS__)
+
+#endif /* !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) */
+
+#endif // INCLUDE_LIBYUV_MACROS_MSA_H_
diff --git a/media/libvpx/libvpx/third_party/libyuv/include/libyuv/mjpeg_decoder.h b/media/libvpx/libvpx/third_party/libyuv/include/libyuv/mjpeg_decoder.h
new file mode 100644
index 0000000000..275f8d4c18
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/libyuv/include/libyuv/mjpeg_decoder.h
@@ -0,0 +1,195 @@
+/*
+ * Copyright 2012 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef INCLUDE_LIBYUV_MJPEG_DECODER_H_
+#define INCLUDE_LIBYUV_MJPEG_DECODER_H_
+
+#include "libyuv/basic_types.h"
+
+#ifdef __cplusplus
+// NOTE: For a simplified public API use convert.h MJPGToI420().
+
+struct jpeg_common_struct;
+struct jpeg_decompress_struct;
+struct jpeg_source_mgr;
+
+namespace libyuv {
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+LIBYUV_BOOL ValidateJpeg(const uint8_t* sample, size_t sample_size);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+static const uint32_t kUnknownDataSize = 0xFFFFFFFF;
+
+enum JpegSubsamplingType {
+ kJpegYuv420,
+ kJpegYuv422,
+ kJpegYuv444,
+ kJpegYuv400,
+ kJpegUnknown
+};
+
+struct Buffer {
+ const uint8_t* data;
+ int len;
+};
+
+struct BufferVector {
+ Buffer* buffers;
+ int len;
+ int pos;
+};
+
+struct SetJmpErrorMgr;
+
+// MJPEG ("Motion JPEG") is a pseudo-standard video codec where the frames are
+// simply independent JPEG images with a fixed huffman table (which is omitted).
+// It is rarely used in video transmission, but is common as a camera capture
+// format, especially in Logitech devices. This class implements a decoder for
+// MJPEG frames.
+//
+// See http://tools.ietf.org/html/rfc2435
+class LIBYUV_API MJpegDecoder {
+ public:
+ typedef void (*CallbackFunction)(void* opaque,
+ const uint8_t* const* data,
+ const int* strides,
+ int rows);
+
+ static const int kColorSpaceUnknown;
+ static const int kColorSpaceGrayscale;
+ static const int kColorSpaceRgb;
+ static const int kColorSpaceYCbCr;
+ static const int kColorSpaceCMYK;
+ static const int kColorSpaceYCCK;
+
+ MJpegDecoder();
+ ~MJpegDecoder();
+
+ // Loads a new frame, reads its headers, and determines the uncompressed
+ // image format.
+ // Returns LIBYUV_TRUE if image looks valid and format is supported.
+ // If return value is LIBYUV_TRUE, then the values for all the following
+ // getters are populated.
+ // src_len is the size of the compressed mjpeg frame in bytes.
+ LIBYUV_BOOL LoadFrame(const uint8_t* src, size_t src_len);
+
+ // Returns width of the last loaded frame in pixels.
+ int GetWidth();
+
+ // Returns height of the last loaded frame in pixels.
+ int GetHeight();
+
+ // Returns format of the last loaded frame. The return value is one of the
+ // kColorSpace* constants.
+ int GetColorSpace();
+
+ // Number of color components in the color space.
+ int GetNumComponents();
+
+ // Sample factors of the n-th component.
+ int GetHorizSampFactor(int component);
+
+ int GetVertSampFactor(int component);
+
+ int GetHorizSubSampFactor(int component);
+
+ int GetVertSubSampFactor(int component);
+
+ // Public for testability.
+ int GetImageScanlinesPerImcuRow();
+
+ // Public for testability.
+ int GetComponentScanlinesPerImcuRow(int component);
+
+ // Width of a component in bytes.
+ int GetComponentWidth(int component);
+
+ // Height of a component.
+ int GetComponentHeight(int component);
+
+ // Width of a component in bytes with padding for DCTSIZE. Public for testing.
+ int GetComponentStride(int component);
+
+ // Size of a component in bytes.
+ int GetComponentSize(int component);
+
+ // Call this after LoadFrame() if you decide you don't want to decode it
+ // after all.
+ LIBYUV_BOOL UnloadFrame();
+
+ // Decodes the entire image into a one-buffer-per-color-component format.
+ // dst_width must match exactly. dst_height must be <= to image height; if
+ // less, the image is cropped. "planes" must have size equal to at least
+ // GetNumComponents() and they must point to non-overlapping buffers of size
+ // at least GetComponentSize(i). The pointers in planes are incremented
+ // to point to after the end of the written data.
+ // TODO(fbarchard): Add dst_x, dst_y to allow specific rect to be decoded.
+ LIBYUV_BOOL DecodeToBuffers(uint8_t** planes, int dst_width, int dst_height);
+
+ // Decodes the entire image and passes the data via repeated calls to a
+ // callback function. Each call will get the data for a whole number of
+ // image scanlines.
+ // TODO(fbarchard): Add dst_x, dst_y to allow specific rect to be decoded.
+ LIBYUV_BOOL DecodeToCallback(CallbackFunction fn,
+ void* opaque,
+ int dst_width,
+ int dst_height);
+
+ // The helper function which recognizes the jpeg sub-sampling type.
+ static JpegSubsamplingType JpegSubsamplingTypeHelper(
+ int* subsample_x,
+ int* subsample_y,
+ int number_of_components);
+
+ private:
+ void AllocOutputBuffers(int num_outbufs);
+ void DestroyOutputBuffers();
+
+ LIBYUV_BOOL StartDecode();
+ LIBYUV_BOOL FinishDecode();
+
+ void SetScanlinePointers(uint8_t** data);
+ LIBYUV_BOOL DecodeImcuRow();
+
+ int GetComponentScanlinePadding(int component);
+
+ // A buffer holding the input data for a frame.
+ Buffer buf_;
+ BufferVector buf_vec_;
+
+ jpeg_decompress_struct* decompress_struct_;
+ jpeg_source_mgr* source_mgr_;
+ SetJmpErrorMgr* error_mgr_;
+
+ // LIBYUV_TRUE iff at least one component has scanline padding. (i.e.,
+ // GetComponentScanlinePadding() != 0.)
+ LIBYUV_BOOL has_scanline_padding_;
+
+ // Temporaries used to point to scanline outputs.
+ int num_outbufs_; // Outermost size of all arrays below.
+ uint8_t*** scanlines_;
+ int* scanlines_sizes_;
+ // Temporary buffer used for decoding when we can't decode directly to the
+ // output buffers. Large enough for just one iMCU row.
+ uint8_t** databuf_;
+ int* databuf_strides_;
+};
+
+} // namespace libyuv
+
+#endif // __cplusplus
+#endif // INCLUDE_LIBYUV_MJPEG_DECODER_H_
diff --git a/media/libvpx/libvpx/third_party/libyuv/include/libyuv/planar_functions.h b/media/libvpx/libvpx/third_party/libyuv/include/libyuv/planar_functions.h
new file mode 100644
index 0000000000..91137baba2
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/libyuv/include/libyuv/planar_functions.h
@@ -0,0 +1,847 @@
+/*
+ * Copyright 2011 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef INCLUDE_LIBYUV_PLANAR_FUNCTIONS_H_
+#define INCLUDE_LIBYUV_PLANAR_FUNCTIONS_H_
+
+#include "libyuv/basic_types.h"
+
+// TODO(fbarchard): Remove the following headers includes.
+#include "libyuv/convert.h"
+#include "libyuv/convert_argb.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+// TODO(fbarchard): Move cpu macros to row.h
+#if defined(__pnacl__) || defined(__CLR_VER) || \
+ (defined(__native_client__) && defined(__x86_64__)) || \
+ (defined(__i386__) && !defined(__SSE__) && !defined(__clang__))
+#define LIBYUV_DISABLE_X86
+#endif
+// MemorySanitizer does not support assembly code yet. http://crbug.com/344505
+#if defined(__has_feature)
+#if __has_feature(memory_sanitizer)
+#define LIBYUV_DISABLE_X86
+#endif
+#endif
+// The following are available on all x86 platforms:
+#if !defined(LIBYUV_DISABLE_X86) && \
+ (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
+#define HAS_ARGBAFFINEROW_SSE2
+#endif
+
+// Copy a plane of data.
+LIBYUV_API
+void CopyPlane(const uint8_t* src_y,
+ int src_stride_y,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ int width,
+ int height);
+
+LIBYUV_API
+void CopyPlane_16(const uint16_t* src_y,
+ int src_stride_y,
+ uint16_t* dst_y,
+ int dst_stride_y,
+ int width,
+ int height);
+
+LIBYUV_API
+void Convert16To8Plane(const uint16_t* src_y,
+ int src_stride_y,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ int scale, // 16384 for 10 bits
+ int width,
+ int height);
+
+LIBYUV_API
+void Convert8To16Plane(const uint8_t* src_y,
+ int src_stride_y,
+ uint16_t* dst_y,
+ int dst_stride_y,
+ int scale, // 1024 for 10 bits
+ int width,
+ int height);
+
+// Set a plane of data to a 32 bit value.
+LIBYUV_API
+void SetPlane(uint8_t* dst_y,
+ int dst_stride_y,
+ int width,
+ int height,
+ uint32_t value);
+
+// Split interleaved UV plane into separate U and V planes.
+LIBYUV_API
+void SplitUVPlane(const uint8_t* src_uv,
+ int src_stride_uv,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height);
+
+// Merge separate U and V planes into one interleaved UV plane.
+LIBYUV_API
+void MergeUVPlane(const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_uv,
+ int dst_stride_uv,
+ int width,
+ int height);
+
+// Split interleaved RGB plane into separate R, G and B planes.
+LIBYUV_API
+void SplitRGBPlane(const uint8_t* src_rgb,
+ int src_stride_rgb,
+ uint8_t* dst_r,
+ int dst_stride_r,
+ uint8_t* dst_g,
+ int dst_stride_g,
+ uint8_t* dst_b,
+ int dst_stride_b,
+ int width,
+ int height);
+
+// Merge separate R, G and B planes into one interleaved RGB plane.
+LIBYUV_API
+void MergeRGBPlane(const uint8_t* src_r,
+ int src_stride_r,
+ const uint8_t* src_g,
+ int src_stride_g,
+ const uint8_t* src_b,
+ int src_stride_b,
+ uint8_t* dst_rgb,
+ int dst_stride_rgb,
+ int width,
+ int height);
+
+// Copy I400. Supports inverting.
+LIBYUV_API
+int I400ToI400(const uint8_t* src_y,
+ int src_stride_y,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ int width,
+ int height);
+
+#define J400ToJ400 I400ToI400
+
+// Copy I422 to I422.
+#define I422ToI422 I422Copy
+LIBYUV_API
+int I422Copy(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height);
+
+// Copy I444 to I444.
+#define I444ToI444 I444Copy
+LIBYUV_API
+int I444Copy(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height);
+
+// Convert YUY2 to I422.
+LIBYUV_API
+int YUY2ToI422(const uint8_t* src_yuy2,
+ int src_stride_yuy2,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height);
+
+// Convert UYVY to I422.
+LIBYUV_API
+int UYVYToI422(const uint8_t* src_uyvy,
+ int src_stride_uyvy,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height);
+
+LIBYUV_API
+int YUY2ToNV12(const uint8_t* src_yuy2,
+ int src_stride_yuy2,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_uv,
+ int dst_stride_uv,
+ int width,
+ int height);
+
+LIBYUV_API
+int UYVYToNV12(const uint8_t* src_uyvy,
+ int src_stride_uyvy,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_uv,
+ int dst_stride_uv,
+ int width,
+ int height);
+
+LIBYUV_API
+int YUY2ToY(const uint8_t* src_yuy2,
+ int src_stride_yuy2,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ int width,
+ int height);
+
+// Convert I420 to I400. (calls CopyPlane ignoring u/v).
+LIBYUV_API
+int I420ToI400(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ int width,
+ int height);
+
+// Alias
+#define J420ToJ400 I420ToI400
+#define I420ToI420Mirror I420Mirror
+
+// I420 mirror.
+LIBYUV_API
+int I420Mirror(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height);
+
+// Alias
+#define I400ToI400Mirror I400Mirror
+
+// I400 mirror. A single plane is mirrored horizontally.
+// Pass negative height to achieve 180 degree rotation.
+LIBYUV_API
+int I400Mirror(const uint8_t* src_y,
+ int src_stride_y,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ int width,
+ int height);
+
+// Alias
+#define ARGBToARGBMirror ARGBMirror
+
+// ARGB mirror.
+LIBYUV_API
+int ARGBMirror(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height);
+
+// Convert NV12 to RGB565.
+LIBYUV_API
+int NV12ToRGB565(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_uv,
+ int src_stride_uv,
+ uint8_t* dst_rgb565,
+ int dst_stride_rgb565,
+ int width,
+ int height);
+
+// I422ToARGB is in convert_argb.h
+// Convert I422 to BGRA.
+LIBYUV_API
+int I422ToBGRA(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_bgra,
+ int dst_stride_bgra,
+ int width,
+ int height);
+
+// Convert I422 to ABGR.
+LIBYUV_API
+int I422ToABGR(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_abgr,
+ int dst_stride_abgr,
+ int width,
+ int height);
+
+// Convert I422 to RGBA.
+LIBYUV_API
+int I422ToRGBA(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_rgba,
+ int dst_stride_rgba,
+ int width,
+ int height);
+
+// Alias
+#define RGB24ToRAW RAWToRGB24
+
+LIBYUV_API
+int RAWToRGB24(const uint8_t* src_raw,
+ int src_stride_raw,
+ uint8_t* dst_rgb24,
+ int dst_stride_rgb24,
+ int width,
+ int height);
+
+// Draw a rectangle into I420.
+LIBYUV_API
+int I420Rect(uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int x,
+ int y,
+ int width,
+ int height,
+ int value_y,
+ int value_u,
+ int value_v);
+
+// Draw a rectangle into ARGB.
+LIBYUV_API
+int ARGBRect(uint8_t* dst_argb,
+ int dst_stride_argb,
+ int dst_x,
+ int dst_y,
+ int width,
+ int height,
+ uint32_t value);
+
+// Convert ARGB to gray scale ARGB.
+LIBYUV_API
+int ARGBGrayTo(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height);
+
+// Make a rectangle of ARGB gray scale.
+LIBYUV_API
+int ARGBGray(uint8_t* dst_argb,
+ int dst_stride_argb,
+ int dst_x,
+ int dst_y,
+ int width,
+ int height);
+
+// Make a rectangle of ARGB Sepia tone.
+LIBYUV_API
+int ARGBSepia(uint8_t* dst_argb,
+ int dst_stride_argb,
+ int dst_x,
+ int dst_y,
+ int width,
+ int height);
+
+// Apply a matrix rotation to each ARGB pixel.
+// matrix_argb is 4 signed ARGB values. -128 to 127 representing -2 to 2.
+// The first 4 coefficients apply to B, G, R, A and produce B of the output.
+// The next 4 coefficients apply to B, G, R, A and produce G of the output.
+// The next 4 coefficients apply to B, G, R, A and produce R of the output.
+// The last 4 coefficients apply to B, G, R, A and produce A of the output.
+LIBYUV_API
+int ARGBColorMatrix(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ const int8_t* matrix_argb,
+ int width,
+ int height);
+
+// Deprecated. Use ARGBColorMatrix instead.
+// Apply a matrix rotation to each ARGB pixel.
+// matrix_argb is 3 signed ARGB values. -128 to 127 representing -1 to 1.
+// The first 4 coefficients apply to B, G, R, A and produce B of the output.
+// The next 4 coefficients apply to B, G, R, A and produce G of the output.
+// The last 4 coefficients apply to B, G, R, A and produce R of the output.
+LIBYUV_API
+int RGBColorMatrix(uint8_t* dst_argb,
+ int dst_stride_argb,
+ const int8_t* matrix_rgb,
+ int dst_x,
+ int dst_y,
+ int width,
+ int height);
+
+// Apply a color table each ARGB pixel.
+// Table contains 256 ARGB values.
+LIBYUV_API
+int ARGBColorTable(uint8_t* dst_argb,
+ int dst_stride_argb,
+ const uint8_t* table_argb,
+ int dst_x,
+ int dst_y,
+ int width,
+ int height);
+
+// Apply a color table each ARGB pixel but preserve destination alpha.
+// Table contains 256 ARGB values.
+LIBYUV_API
+int RGBColorTable(uint8_t* dst_argb,
+ int dst_stride_argb,
+ const uint8_t* table_argb,
+ int dst_x,
+ int dst_y,
+ int width,
+ int height);
+
+// Apply a luma/color table each ARGB pixel but preserve destination alpha.
+// Table contains 32768 values indexed by [Y][C] where 7 it 7 bit luma from
+// RGB (YJ style) and C is an 8 bit color component (R, G or B).
+LIBYUV_API
+int ARGBLumaColorTable(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ const uint8_t* luma,
+ int width,
+ int height);
+
+// Apply a 3 term polynomial to ARGB values.
+// poly points to a 4x4 matrix. The first row is constants. The 2nd row is
+// coefficients for b, g, r and a. The 3rd row is coefficients for b squared,
+// g squared, r squared and a squared. The 4rd row is coefficients for b to
+// the 3, g to the 3, r to the 3 and a to the 3. The values are summed and
+// result clamped to 0 to 255.
+// A polynomial approximation can be dirived using software such as 'R'.
+
+LIBYUV_API
+int ARGBPolynomial(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ const float* poly,
+ int width,
+ int height);
+
+// Convert plane of 16 bit shorts to half floats.
+// Source values are multiplied by scale before storing as half float.
+LIBYUV_API
+int HalfFloatPlane(const uint16_t* src_y,
+ int src_stride_y,
+ uint16_t* dst_y,
+ int dst_stride_y,
+ float scale,
+ int width,
+ int height);
+
+// Convert a buffer of bytes to floats, scale the values and store as floats.
+LIBYUV_API
+int ByteToFloat(const uint8_t* src_y, float* dst_y, float scale, int width);
+
+// Quantize a rectangle of ARGB. Alpha unaffected.
+// scale is a 16 bit fractional fixed point scaler between 0 and 65535.
+// interval_size should be a value between 1 and 255.
+// interval_offset should be a value between 0 and 255.
+LIBYUV_API
+int ARGBQuantize(uint8_t* dst_argb,
+ int dst_stride_argb,
+ int scale,
+ int interval_size,
+ int interval_offset,
+ int dst_x,
+ int dst_y,
+ int width,
+ int height);
+
+// Copy ARGB to ARGB.
+LIBYUV_API
+int ARGBCopy(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height);
+
+// Copy Alpha channel of ARGB to alpha of ARGB.
+LIBYUV_API
+int ARGBCopyAlpha(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height);
+
+// Extract the alpha channel from ARGB.
+LIBYUV_API
+int ARGBExtractAlpha(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_a,
+ int dst_stride_a,
+ int width,
+ int height);
+
+// Copy Y channel to Alpha of ARGB.
+LIBYUV_API
+int ARGBCopyYToAlpha(const uint8_t* src_y,
+ int src_stride_y,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height);
+
+typedef void (*ARGBBlendRow)(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
+ int width);
+
+// Get function to Alpha Blend ARGB pixels and store to destination.
+LIBYUV_API
+ARGBBlendRow GetARGBBlend();
+
+// Alpha Blend ARGB images and store to destination.
+// Source is pre-multiplied by alpha using ARGBAttenuate.
+// Alpha of destination is set to 255.
+LIBYUV_API
+int ARGBBlend(const uint8_t* src_argb0,
+ int src_stride_argb0,
+ const uint8_t* src_argb1,
+ int src_stride_argb1,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height);
+
+// Alpha Blend plane and store to destination.
+// Source is not pre-multiplied by alpha.
+LIBYUV_API
+int BlendPlane(const uint8_t* src_y0,
+ int src_stride_y0,
+ const uint8_t* src_y1,
+ int src_stride_y1,
+ const uint8_t* alpha,
+ int alpha_stride,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ int width,
+ int height);
+
+// Alpha Blend YUV images and store to destination.
+// Source is not pre-multiplied by alpha.
+// Alpha is full width x height and subsampled to half size to apply to UV.
+LIBYUV_API
+int I420Blend(const uint8_t* src_y0,
+ int src_stride_y0,
+ const uint8_t* src_u0,
+ int src_stride_u0,
+ const uint8_t* src_v0,
+ int src_stride_v0,
+ const uint8_t* src_y1,
+ int src_stride_y1,
+ const uint8_t* src_u1,
+ int src_stride_u1,
+ const uint8_t* src_v1,
+ int src_stride_v1,
+ const uint8_t* alpha,
+ int alpha_stride,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height);
+
+// Multiply ARGB image by ARGB image. Shifted down by 8. Saturates to 255.
+LIBYUV_API
+int ARGBMultiply(const uint8_t* src_argb0,
+ int src_stride_argb0,
+ const uint8_t* src_argb1,
+ int src_stride_argb1,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height);
+
+// Add ARGB image with ARGB image. Saturates to 255.
+LIBYUV_API
+int ARGBAdd(const uint8_t* src_argb0,
+ int src_stride_argb0,
+ const uint8_t* src_argb1,
+ int src_stride_argb1,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height);
+
+// Subtract ARGB image (argb1) from ARGB image (argb0). Saturates to 0.
+LIBYUV_API
+int ARGBSubtract(const uint8_t* src_argb0,
+ int src_stride_argb0,
+ const uint8_t* src_argb1,
+ int src_stride_argb1,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height);
+
+// Convert I422 to YUY2.
+LIBYUV_API
+int I422ToYUY2(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_yuy2,
+ int dst_stride_yuy2,
+ int width,
+ int height);
+
+// Convert I422 to UYVY.
+LIBYUV_API
+int I422ToUYVY(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_uyvy,
+ int dst_stride_uyvy,
+ int width,
+ int height);
+
+// Convert unattentuated ARGB to preattenuated ARGB.
+LIBYUV_API
+int ARGBAttenuate(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height);
+
+// Convert preattentuated ARGB to unattenuated ARGB.
+LIBYUV_API
+int ARGBUnattenuate(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height);
+
+// Internal function - do not call directly.
+// Computes table of cumulative sum for image where the value is the sum
+// of all values above and to the left of the entry. Used by ARGBBlur.
+LIBYUV_API
+int ARGBComputeCumulativeSum(const uint8_t* src_argb,
+ int src_stride_argb,
+ int32_t* dst_cumsum,
+ int dst_stride32_cumsum,
+ int width,
+ int height);
+
+// Blur ARGB image.
+// dst_cumsum table of width * (height + 1) * 16 bytes aligned to
+// 16 byte boundary.
+// dst_stride32_cumsum is number of ints in a row (width * 4).
+// radius is number of pixels around the center. e.g. 1 = 3x3. 2=5x5.
+// Blur is optimized for radius of 5 (11x11) or less.
+LIBYUV_API
+int ARGBBlur(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int32_t* dst_cumsum,
+ int dst_stride32_cumsum,
+ int width,
+ int height,
+ int radius);
+
+// Multiply ARGB image by ARGB value.
+LIBYUV_API
+int ARGBShade(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height,
+ uint32_t value);
+
+// Interpolate between two images using specified amount of interpolation
+// (0 to 255) and store to destination.
+// 'interpolation' is specified as 8 bit fraction where 0 means 100% src0
+// and 255 means 1% src0 and 99% src1.
+LIBYUV_API
+int InterpolatePlane(const uint8_t* src0,
+ int src_stride0,
+ const uint8_t* src1,
+ int src_stride1,
+ uint8_t* dst,
+ int dst_stride,
+ int width,
+ int height,
+ int interpolation);
+
+// Interpolate between two ARGB images using specified amount of interpolation
+// Internally calls InterpolatePlane with width * 4 (bpp).
+LIBYUV_API
+int ARGBInterpolate(const uint8_t* src_argb0,
+ int src_stride_argb0,
+ const uint8_t* src_argb1,
+ int src_stride_argb1,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height,
+ int interpolation);
+
+// Interpolate between two YUV images using specified amount of interpolation
+// Internally calls InterpolatePlane on each plane where the U and V planes
+// are half width and half height.
+LIBYUV_API
+int I420Interpolate(const uint8_t* src0_y,
+ int src0_stride_y,
+ const uint8_t* src0_u,
+ int src0_stride_u,
+ const uint8_t* src0_v,
+ int src0_stride_v,
+ const uint8_t* src1_y,
+ int src1_stride_y,
+ const uint8_t* src1_u,
+ int src1_stride_u,
+ const uint8_t* src1_v,
+ int src1_stride_v,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height,
+ int interpolation);
+
+// Row function for copying pixels from a source with a slope to a row
+// of destination. Useful for scaling, rotation, mirror, texture mapping.
+LIBYUV_API
+void ARGBAffineRow_C(const uint8_t* src_argb,
+ int src_argb_stride,
+ uint8_t* dst_argb,
+ const float* uv_dudv,
+ int width);
+// TODO(fbarchard): Move ARGBAffineRow_SSE2 to row.h
+LIBYUV_API
+void ARGBAffineRow_SSE2(const uint8_t* src_argb,
+ int src_argb_stride,
+ uint8_t* dst_argb,
+ const float* uv_dudv,
+ int width);
+
+// Shuffle ARGB channel order. e.g. BGRA to ARGB.
+// shuffler is 16 bytes and must be aligned.
+LIBYUV_API
+int ARGBShuffle(const uint8_t* src_bgra,
+ int src_stride_bgra,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ const uint8_t* shuffler,
+ int width,
+ int height);
+
+// Sobel ARGB effect with planar output.
+LIBYUV_API
+int ARGBSobelToPlane(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ int width,
+ int height);
+
+// Sobel ARGB effect.
+LIBYUV_API
+int ARGBSobel(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height);
+
+// Sobel ARGB effect w/ Sobel X, Sobel, Sobel Y in ARGB.
+LIBYUV_API
+int ARGBSobelXY(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height);
+
+#ifdef __cplusplus
+} // extern "C"
+} // namespace libyuv
+#endif
+
+#endif // INCLUDE_LIBYUV_PLANAR_FUNCTIONS_H_
diff --git a/media/libvpx/libvpx/third_party/libyuv/include/libyuv/rotate.h b/media/libvpx/libvpx/third_party/libyuv/include/libyuv/rotate.h
new file mode 100644
index 0000000000..76b692be8b
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/libyuv/include/libyuv/rotate.h
@@ -0,0 +1,164 @@
+/*
+ * Copyright 2011 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef INCLUDE_LIBYUV_ROTATE_H_
+#define INCLUDE_LIBYUV_ROTATE_H_
+
+#include "libyuv/basic_types.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+// Supported rotation.
+typedef enum RotationMode {
+ kRotate0 = 0, // No rotation.
+ kRotate90 = 90, // Rotate 90 degrees clockwise.
+ kRotate180 = 180, // Rotate 180 degrees.
+ kRotate270 = 270, // Rotate 270 degrees clockwise.
+
+ // Deprecated.
+ kRotateNone = 0,
+ kRotateClockwise = 90,
+ kRotateCounterClockwise = 270,
+} RotationModeEnum;
+
+// Rotate I420 frame.
+LIBYUV_API
+int I420Rotate(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height,
+ enum RotationMode mode);
+
+// Rotate NV12 input and store in I420.
+LIBYUV_API
+int NV12ToI420Rotate(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_uv,
+ int src_stride_uv,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height,
+ enum RotationMode mode);
+
+// Rotate a plane by 0, 90, 180, or 270.
+LIBYUV_API
+int RotatePlane(const uint8_t* src,
+ int src_stride,
+ uint8_t* dst,
+ int dst_stride,
+ int width,
+ int height,
+ enum RotationMode mode);
+
+// Rotate planes by 90, 180, 270. Deprecated.
+LIBYUV_API
+void RotatePlane90(const uint8_t* src,
+ int src_stride,
+ uint8_t* dst,
+ int dst_stride,
+ int width,
+ int height);
+
+LIBYUV_API
+void RotatePlane180(const uint8_t* src,
+ int src_stride,
+ uint8_t* dst,
+ int dst_stride,
+ int width,
+ int height);
+
+LIBYUV_API
+void RotatePlane270(const uint8_t* src,
+ int src_stride,
+ uint8_t* dst,
+ int dst_stride,
+ int width,
+ int height);
+
+LIBYUV_API
+void RotateUV90(const uint8_t* src,
+ int src_stride,
+ uint8_t* dst_a,
+ int dst_stride_a,
+ uint8_t* dst_b,
+ int dst_stride_b,
+ int width,
+ int height);
+
+// Rotations for when U and V are interleaved.
+// These functions take one input pointer and
+// split the data into two buffers while
+// rotating them. Deprecated.
+LIBYUV_API
+void RotateUV180(const uint8_t* src,
+ int src_stride,
+ uint8_t* dst_a,
+ int dst_stride_a,
+ uint8_t* dst_b,
+ int dst_stride_b,
+ int width,
+ int height);
+
+LIBYUV_API
+void RotateUV270(const uint8_t* src,
+ int src_stride,
+ uint8_t* dst_a,
+ int dst_stride_a,
+ uint8_t* dst_b,
+ int dst_stride_b,
+ int width,
+ int height);
+
+// The 90 and 270 functions are based on transposes.
+// Doing a transpose with reversing the read/write
+// order will result in a rotation by +- 90 degrees.
+// Deprecated.
+LIBYUV_API
+void TransposePlane(const uint8_t* src,
+ int src_stride,
+ uint8_t* dst,
+ int dst_stride,
+ int width,
+ int height);
+
+LIBYUV_API
+void TransposeUV(const uint8_t* src,
+ int src_stride,
+ uint8_t* dst_a,
+ int dst_stride_a,
+ uint8_t* dst_b,
+ int dst_stride_b,
+ int width,
+ int height);
+
+#ifdef __cplusplus
+} // extern "C"
+} // namespace libyuv
+#endif
+
+#endif // INCLUDE_LIBYUV_ROTATE_H_
diff --git a/media/libvpx/libvpx/third_party/libyuv/include/libyuv/rotate_argb.h b/media/libvpx/libvpx/third_party/libyuv/include/libyuv/rotate_argb.h
new file mode 100644
index 0000000000..20432949ab
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/libyuv/include/libyuv/rotate_argb.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2012 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef INCLUDE_LIBYUV_ROTATE_ARGB_H_
+#define INCLUDE_LIBYUV_ROTATE_ARGB_H_
+
+#include "libyuv/basic_types.h"
+#include "libyuv/rotate.h" // For RotationMode.
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+// Rotate ARGB frame
+LIBYUV_API
+int ARGBRotate(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int src_width,
+ int src_height,
+ enum RotationMode mode);
+
+#ifdef __cplusplus
+} // extern "C"
+} // namespace libyuv
+#endif
+
+#endif // INCLUDE_LIBYUV_ROTATE_ARGB_H_
diff --git a/media/libvpx/libvpx/third_party/libyuv/include/libyuv/rotate_row.h b/media/libvpx/libvpx/third_party/libyuv/include/libyuv/rotate_row.h
new file mode 100644
index 0000000000..5edc0fcf13
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/libyuv/include/libyuv/rotate_row.h
@@ -0,0 +1,194 @@
+/*
+ * Copyright 2013 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef INCLUDE_LIBYUV_ROTATE_ROW_H_
+#define INCLUDE_LIBYUV_ROTATE_ROW_H_
+
+#include "libyuv/basic_types.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+#if defined(__pnacl__) || defined(__CLR_VER) || \
+ (defined(__native_client__) && defined(__x86_64__)) || \
+ (defined(__i386__) && !defined(__SSE__) && !defined(__clang__))
+#define LIBYUV_DISABLE_X86
+#endif
+#if defined(__native_client__)
+#define LIBYUV_DISABLE_NEON
+#endif
+// MemorySanitizer does not support assembly code yet. http://crbug.com/344505
+#if defined(__has_feature)
+#if __has_feature(memory_sanitizer)
+#define LIBYUV_DISABLE_X86
+#endif
+#endif
+// The following are available for Visual C and clangcl 32 bit:
+#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)
+#define HAS_TRANSPOSEWX8_SSSE3
+#define HAS_TRANSPOSEUVWX8_SSE2
+#endif
+
+// The following are available for GCC 32 or 64 bit:
+#if !defined(LIBYUV_DISABLE_X86) && (defined(__i386__) || defined(__x86_64__))
+#define HAS_TRANSPOSEWX8_SSSE3
+#endif
+
+// The following are available for 64 bit GCC:
+#if !defined(LIBYUV_DISABLE_X86) && defined(__x86_64__)
+#define HAS_TRANSPOSEWX8_FAST_SSSE3
+#define HAS_TRANSPOSEUVWX8_SSE2
+#endif
+
+#if !defined(LIBYUV_DISABLE_NEON) && \
+ (defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__))
+#define HAS_TRANSPOSEWX8_NEON
+#define HAS_TRANSPOSEUVWX8_NEON
+#endif
+
+#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
+#define HAS_TRANSPOSEWX16_MSA
+#define HAS_TRANSPOSEUVWX16_MSA
+#endif
+
+void TransposeWxH_C(const uint8_t* src,
+ int src_stride,
+ uint8_t* dst,
+ int dst_stride,
+ int width,
+ int height);
+
+void TransposeWx8_C(const uint8_t* src,
+ int src_stride,
+ uint8_t* dst,
+ int dst_stride,
+ int width);
+void TransposeWx16_C(const uint8_t* src,
+ int src_stride,
+ uint8_t* dst,
+ int dst_stride,
+ int width);
+void TransposeWx8_NEON(const uint8_t* src,
+ int src_stride,
+ uint8_t* dst,
+ int dst_stride,
+ int width);
+void TransposeWx8_SSSE3(const uint8_t* src,
+ int src_stride,
+ uint8_t* dst,
+ int dst_stride,
+ int width);
+void TransposeWx8_Fast_SSSE3(const uint8_t* src,
+ int src_stride,
+ uint8_t* dst,
+ int dst_stride,
+ int width);
+void TransposeWx16_MSA(const uint8_t* src,
+ int src_stride,
+ uint8_t* dst,
+ int dst_stride,
+ int width);
+
+void TransposeWx8_Any_NEON(const uint8_t* src,
+ int src_stride,
+ uint8_t* dst,
+ int dst_stride,
+ int width);
+void TransposeWx8_Any_SSSE3(const uint8_t* src,
+ int src_stride,
+ uint8_t* dst,
+ int dst_stride,
+ int width);
+void TransposeWx8_Fast_Any_SSSE3(const uint8_t* src,
+ int src_stride,
+ uint8_t* dst,
+ int dst_stride,
+ int width);
+void TransposeWx16_Any_MSA(const uint8_t* src,
+ int src_stride,
+ uint8_t* dst,
+ int dst_stride,
+ int width);
+
+void TransposeUVWxH_C(const uint8_t* src,
+ int src_stride,
+ uint8_t* dst_a,
+ int dst_stride_a,
+ uint8_t* dst_b,
+ int dst_stride_b,
+ int width,
+ int height);
+
+void TransposeUVWx8_C(const uint8_t* src,
+ int src_stride,
+ uint8_t* dst_a,
+ int dst_stride_a,
+ uint8_t* dst_b,
+ int dst_stride_b,
+ int width);
+void TransposeUVWx16_C(const uint8_t* src,
+ int src_stride,
+ uint8_t* dst_a,
+ int dst_stride_a,
+ uint8_t* dst_b,
+ int dst_stride_b,
+ int width);
+void TransposeUVWx8_SSE2(const uint8_t* src,
+ int src_stride,
+ uint8_t* dst_a,
+ int dst_stride_a,
+ uint8_t* dst_b,
+ int dst_stride_b,
+ int width);
+void TransposeUVWx8_NEON(const uint8_t* src,
+ int src_stride,
+ uint8_t* dst_a,
+ int dst_stride_a,
+ uint8_t* dst_b,
+ int dst_stride_b,
+ int width);
+void TransposeUVWx16_MSA(const uint8_t* src,
+ int src_stride,
+ uint8_t* dst_a,
+ int dst_stride_a,
+ uint8_t* dst_b,
+ int dst_stride_b,
+ int width);
+
+void TransposeUVWx8_Any_SSE2(const uint8_t* src,
+ int src_stride,
+ uint8_t* dst_a,
+ int dst_stride_a,
+ uint8_t* dst_b,
+ int dst_stride_b,
+ int width);
+void TransposeUVWx8_Any_NEON(const uint8_t* src,
+ int src_stride,
+ uint8_t* dst_a,
+ int dst_stride_a,
+ uint8_t* dst_b,
+ int dst_stride_b,
+ int width);
+void TransposeUVWx16_Any_MSA(const uint8_t* src,
+ int src_stride,
+ uint8_t* dst_a,
+ int dst_stride_a,
+ uint8_t* dst_b,
+ int dst_stride_b,
+ int width);
+
+#ifdef __cplusplus
+} // extern "C"
+} // namespace libyuv
+#endif
+
+#endif // INCLUDE_LIBYUV_ROTATE_ROW_H_
diff --git a/media/libvpx/libvpx/third_party/libyuv/include/libyuv/row.h b/media/libvpx/libvpx/third_party/libyuv/include/libyuv/row.h
new file mode 100644
index 0000000000..65ef448b8c
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/libyuv/include/libyuv/row.h
@@ -0,0 +1,3471 @@
+/*
+ * Copyright 2011 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef INCLUDE_LIBYUV_ROW_H_
+#define INCLUDE_LIBYUV_ROW_H_
+
+#include <stdlib.h> // For malloc.
+
+#include "libyuv/basic_types.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+#if defined(__pnacl__) || defined(__CLR_VER) || \
+ (defined(__native_client__) && defined(__x86_64__)) || \
+ (defined(__i386__) && !defined(__SSE__) && !defined(__clang__))
+#define LIBYUV_DISABLE_X86
+#endif
+#if defined(__native_client__)
+#define LIBYUV_DISABLE_NEON
+#endif
+// MemorySanitizer does not support assembly code yet. http://crbug.com/344505
+#if defined(__has_feature)
+#if __has_feature(memory_sanitizer)
+#define LIBYUV_DISABLE_X86
+#endif
+#endif
+// clang >= 3.5.0 required for Arm64.
+#if defined(__clang__) && defined(__aarch64__) && !defined(LIBYUV_DISABLE_NEON)
+#if (__clang_major__ < 3) || (__clang_major__ == 3 && (__clang_minor__ < 5))
+#define LIBYUV_DISABLE_NEON
+#endif // clang >= 3.5
+#endif // __clang__
+
+// GCC >= 4.7.0 required for AVX2.
+#if defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__))
+#if (__GNUC__ > 4) || (__GNUC__ == 4 && (__GNUC_MINOR__ >= 7))
+#define GCC_HAS_AVX2 1
+#endif // GNUC >= 4.7
+#endif // __GNUC__
+
+// clang >= 3.4.0 required for AVX2.
+#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
+#if (__clang_major__ > 3) || (__clang_major__ == 3 && (__clang_minor__ >= 4))
+#define CLANG_HAS_AVX2 1
+#endif // clang >= 3.4
+#endif // __clang__
+
+// clang >= 6.0.0 required for AVX512.
+// TODO(fbarchard): fix xcode 9 ios b/789.
+#if 0 // Build fails in libvpx on Mac
+#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
+#if (__clang_major__ >= 7) && !defined(__APPLE_EMBEDDED_SIMULATOR__)
+#define CLANG_HAS_AVX512 1
+#endif // clang >= 7
+#endif // __clang__
+#endif // 0
+
+// Visual C 2012 required for AVX2.
+#if defined(_M_IX86) && !defined(__clang__) && defined(_MSC_VER) && \
+ _MSC_VER >= 1700
+#define VISUALC_HAS_AVX2 1
+#endif // VisualStudio >= 2012
+
+// The following are available on all x86 platforms:
+#if !defined(LIBYUV_DISABLE_X86) && \
+ (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
+// Conversions:
+#define HAS_ABGRTOUVROW_SSSE3
+#define HAS_ABGRTOYROW_SSSE3
+#define HAS_ARGB1555TOARGBROW_SSE2
+#define HAS_ARGB4444TOARGBROW_SSE2
+#define HAS_ARGBEXTRACTALPHAROW_SSE2
+#define HAS_ARGBSETROW_X86
+#define HAS_ARGBSHUFFLEROW_SSSE3
+#define HAS_ARGBTOARGB1555ROW_SSE2
+#define HAS_ARGBTOARGB4444ROW_SSE2
+#define HAS_ARGBTORAWROW_SSSE3
+#define HAS_ARGBTORGB24ROW_SSSE3
+#define HAS_ARGBTORGB565DITHERROW_SSE2
+#define HAS_ARGBTORGB565ROW_SSE2
+#define HAS_ARGBTOUV444ROW_SSSE3
+#define HAS_ARGBTOUVJROW_SSSE3
+#define HAS_ARGBTOUVROW_SSSE3
+#define HAS_ARGBTOYJROW_SSSE3
+#define HAS_ARGBTOYROW_SSSE3
+#define HAS_BGRATOUVROW_SSSE3
+#define HAS_BGRATOYROW_SSSE3
+#define HAS_COPYROW_ERMS
+#define HAS_COPYROW_SSE2
+#define HAS_H422TOARGBROW_SSSE3
+#define HAS_HALFFLOATROW_SSE2
+#define HAS_I400TOARGBROW_SSE2
+#define HAS_I422TOARGB1555ROW_SSSE3
+#define HAS_I422TOARGB4444ROW_SSSE3
+#define HAS_I422TOARGBROW_SSSE3
+#define HAS_I422TORGB24ROW_SSSE3
+#define HAS_I422TORGB565ROW_SSSE3
+#define HAS_I422TORGBAROW_SSSE3
+#define HAS_I422TOUYVYROW_SSE2
+#define HAS_I422TOYUY2ROW_SSE2
+#define HAS_I444TOARGBROW_SSSE3
+#define HAS_J400TOARGBROW_SSE2
+#define HAS_J422TOARGBROW_SSSE3
+#define HAS_MERGEUVROW_SSE2
+#define HAS_MIRRORROW_SSSE3
+#define HAS_MIRRORUVROW_SSSE3
+#define HAS_NV12TOARGBROW_SSSE3
+#define HAS_NV12TORGB24ROW_SSSE3
+#define HAS_NV12TORGB565ROW_SSSE3
+#define HAS_NV21TOARGBROW_SSSE3
+#define HAS_NV21TORGB24ROW_SSSE3
+#define HAS_RAWTOARGBROW_SSSE3
+#define HAS_RAWTORGB24ROW_SSSE3
+#define HAS_RAWTOYROW_SSSE3
+#define HAS_RGB24TOARGBROW_SSSE3
+#define HAS_RGB24TOYROW_SSSE3
+#define HAS_RGB565TOARGBROW_SSE2
+#define HAS_RGBATOUVROW_SSSE3
+#define HAS_RGBATOYROW_SSSE3
+#define HAS_SETROW_ERMS
+#define HAS_SETROW_X86
+#define HAS_SPLITUVROW_SSE2
+#define HAS_UYVYTOARGBROW_SSSE3
+#define HAS_UYVYTOUV422ROW_SSE2
+#define HAS_UYVYTOUVROW_SSE2
+#define HAS_UYVYTOYROW_SSE2
+#define HAS_YUY2TOARGBROW_SSSE3
+#define HAS_YUY2TOUV422ROW_SSE2
+#define HAS_YUY2TOUVROW_SSE2
+#define HAS_YUY2TOYROW_SSE2
+
+// Effects:
+#define HAS_ARGBADDROW_SSE2
+#define HAS_ARGBAFFINEROW_SSE2
+#define HAS_ARGBATTENUATEROW_SSSE3
+#define HAS_ARGBBLENDROW_SSSE3
+#define HAS_ARGBCOLORMATRIXROW_SSSE3
+#define HAS_ARGBCOLORTABLEROW_X86
+#define HAS_ARGBCOPYALPHAROW_SSE2
+#define HAS_ARGBCOPYYTOALPHAROW_SSE2
+#define HAS_ARGBGRAYROW_SSSE3
+#define HAS_ARGBLUMACOLORTABLEROW_SSSE3
+#define HAS_ARGBMIRRORROW_SSE2
+#define HAS_ARGBMULTIPLYROW_SSE2
+#define HAS_ARGBPOLYNOMIALROW_SSE2
+#define HAS_ARGBQUANTIZEROW_SSE2
+#define HAS_ARGBSEPIAROW_SSSE3
+#define HAS_ARGBSHADEROW_SSE2
+#define HAS_ARGBSUBTRACTROW_SSE2
+#define HAS_ARGBUNATTENUATEROW_SSE2
+#define HAS_BLENDPLANEROW_SSSE3
+#define HAS_COMPUTECUMULATIVESUMROW_SSE2
+#define HAS_CUMULATIVESUMTOAVERAGEROW_SSE2
+#define HAS_INTERPOLATEROW_SSSE3
+#define HAS_RGBCOLORTABLEROW_X86
+#define HAS_SOBELROW_SSE2
+#define HAS_SOBELTOPLANEROW_SSE2
+#define HAS_SOBELXROW_SSE2
+#define HAS_SOBELXYROW_SSE2
+#define HAS_SOBELYROW_SSE2
+
+// The following functions fail on gcc/clang 32 bit with fpic and framepointer.
+// caveat: clangcl uses row_win.cc which works.
+#if defined(__x86_64__) || !defined(__pic__) || defined(__clang__) || \
+ defined(_MSC_VER)
+// TODO(fbarchard): fix build error on android_full_debug=1
+// https://code.google.com/p/libyuv/issues/detail?id=517
+#define HAS_I422ALPHATOARGBROW_SSSE3
+#endif
+#endif
+
+// The following are available on all x86 platforms, but
+// require VS2012, clang 3.4 or gcc 4.7.
+#if !defined(LIBYUV_DISABLE_X86) && \
+ (defined(VISUALC_HAS_AVX2) || defined(CLANG_HAS_AVX2) || \
+ defined(GCC_HAS_AVX2))
+#define HAS_ARGBCOPYALPHAROW_AVX2
+#define HAS_ARGBCOPYYTOALPHAROW_AVX2
+#define HAS_ARGBEXTRACTALPHAROW_AVX2
+#define HAS_ARGBMIRRORROW_AVX2
+#define HAS_ARGBPOLYNOMIALROW_AVX2
+#define HAS_ARGBSHUFFLEROW_AVX2
+#define HAS_ARGBTORGB565DITHERROW_AVX2
+#define HAS_ARGBTOUVJROW_AVX2
+#define HAS_ARGBTOUVROW_AVX2
+#define HAS_ARGBTOYJROW_AVX2
+#define HAS_ARGBTOYROW_AVX2
+#define HAS_COPYROW_AVX
+#define HAS_H422TOARGBROW_AVX2
+#define HAS_HALFFLOATROW_AVX2
+// #define HAS_HALFFLOATROW_F16C // Enable to test halffloat cast
+#define HAS_I400TOARGBROW_AVX2
+#define HAS_I422TOARGB1555ROW_AVX2
+#define HAS_I422TOARGB4444ROW_AVX2
+#define HAS_I422TOARGBROW_AVX2
+#define HAS_I422TORGB24ROW_AVX2
+#define HAS_I422TORGB565ROW_AVX2
+#define HAS_I422TORGBAROW_AVX2
+#define HAS_I444TOARGBROW_AVX2
+#define HAS_INTERPOLATEROW_AVX2
+#define HAS_J422TOARGBROW_AVX2
+#define HAS_MERGEUVROW_AVX2
+#define HAS_MIRRORROW_AVX2
+#define HAS_NV12TOARGBROW_AVX2
+#define HAS_NV12TORGB24ROW_AVX2
+#define HAS_NV12TORGB565ROW_AVX2
+#define HAS_NV21TOARGBROW_AVX2
+#define HAS_NV21TORGB24ROW_AVX2
+#define HAS_SPLITUVROW_AVX2
+#define HAS_UYVYTOARGBROW_AVX2
+#define HAS_UYVYTOUV422ROW_AVX2
+#define HAS_UYVYTOUVROW_AVX2
+#define HAS_UYVYTOYROW_AVX2
+#define HAS_YUY2TOARGBROW_AVX2
+#define HAS_YUY2TOUV422ROW_AVX2
+#define HAS_YUY2TOUVROW_AVX2
+#define HAS_YUY2TOYROW_AVX2
+
+// Effects:
+#define HAS_ARGBADDROW_AVX2
+#define HAS_ARGBATTENUATEROW_AVX2
+#define HAS_ARGBMULTIPLYROW_AVX2
+#define HAS_ARGBSUBTRACTROW_AVX2
+#define HAS_ARGBUNATTENUATEROW_AVX2
+#define HAS_BLENDPLANEROW_AVX2
+
+#if defined(__x86_64__) || !defined(__pic__) || defined(__clang__) || \
+ defined(_MSC_VER)
+// TODO(fbarchard): fix build error on android_full_debug=1
+// https://code.google.com/p/libyuv/issues/detail?id=517
+#define HAS_I422ALPHATOARGBROW_AVX2
+#endif
+#endif
+
+// The following are available for AVX2 Visual C and clangcl 32 bit:
+// TODO(fbarchard): Port to gcc.
+#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER) && \
+ (defined(VISUALC_HAS_AVX2) || defined(CLANG_HAS_AVX2))
+#define HAS_ARGB1555TOARGBROW_AVX2
+#define HAS_ARGB4444TOARGBROW_AVX2
+#define HAS_ARGBTOARGB1555ROW_AVX2
+#define HAS_ARGBTOARGB4444ROW_AVX2
+#define HAS_ARGBTORGB565ROW_AVX2
+#define HAS_J400TOARGBROW_AVX2
+#define HAS_RGB565TOARGBROW_AVX2
+#endif
+
+// The following are also available on x64 Visual C.
+#if !defined(LIBYUV_DISABLE_X86) && defined(_MSC_VER) && defined(_M_X64) && \
+ (!defined(__clang__) || defined(__SSSE3__))
+#define HAS_I422ALPHATOARGBROW_SSSE3
+#define HAS_I422TOARGBROW_SSSE3
+#endif
+
+// The following are available for gcc/clang x86 platforms:
+// TODO(fbarchard): Port to Visual C
+#if !defined(LIBYUV_DISABLE_X86) && \
+ (defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER)))
+#define HAS_ABGRTOAR30ROW_SSSE3
+#define HAS_ARGBTOAR30ROW_SSSE3
+#define HAS_CONVERT16TO8ROW_SSSE3
+#define HAS_CONVERT8TO16ROW_SSE2
+// I210 is for H010. 2 = 422. I for 601 vs H for 709.
+#define HAS_I210TOAR30ROW_SSSE3
+#define HAS_I210TOARGBROW_SSSE3
+#define HAS_I422TOAR30ROW_SSSE3
+#define HAS_MERGERGBROW_SSSE3
+#define HAS_SPLITRGBROW_SSSE3
+#endif
+
+// The following are available for AVX2 gcc/clang x86 platforms:
+// TODO(fbarchard): Port to Visual C
+#if !defined(LIBYUV_DISABLE_X86) && \
+ (defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER))) && \
+ (defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2))
+#define HAS_ABGRTOAR30ROW_AVX2
+#define HAS_ARGBTOAR30ROW_AVX2
+#define HAS_ARGBTORAWROW_AVX2
+#define HAS_ARGBTORGB24ROW_AVX2
+#define HAS_CONVERT16TO8ROW_AVX2
+#define HAS_CONVERT8TO16ROW_AVX2
+#define HAS_I210TOAR30ROW_AVX2
+#define HAS_I210TOARGBROW_AVX2
+#define HAS_I422TOAR30ROW_AVX2
+#define HAS_I422TOUYVYROW_AVX2
+#define HAS_I422TOYUY2ROW_AVX2
+#define HAS_MERGEUVROW_16_AVX2
+#define HAS_MULTIPLYROW_16_AVX2
+#endif
+
+// The following are available for AVX512 clang x86 platforms:
+// TODO(fbarchard): Port to GCC and Visual C
+// TODO(fbarchard): re-enable HAS_ARGBTORGB24ROW_AVX512VBMI. Issue libyuv:789
+#if !defined(LIBYUV_DISABLE_X86) && \
+ (defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER))) && \
+ (defined(CLANG_HAS_AVX512))
+#define HAS_ARGBTORGB24ROW_AVX512VBMI
+#endif
+
+// The following are available on Neon platforms:
+#if !defined(LIBYUV_DISABLE_NEON) && \
+ (defined(__aarch64__) || defined(__ARM_NEON__) || defined(LIBYUV_NEON))
+#define HAS_ABGRTOUVROW_NEON
+#define HAS_ABGRTOYROW_NEON
+#define HAS_ARGB1555TOARGBROW_NEON
+#define HAS_ARGB1555TOUVROW_NEON
+#define HAS_ARGB1555TOYROW_NEON
+#define HAS_ARGB4444TOARGBROW_NEON
+#define HAS_ARGB4444TOUVROW_NEON
+#define HAS_ARGB4444TOYROW_NEON
+#define HAS_ARGBEXTRACTALPHAROW_NEON
+#define HAS_ARGBSETROW_NEON
+#define HAS_ARGBTOARGB1555ROW_NEON
+#define HAS_ARGBTOARGB4444ROW_NEON
+#define HAS_ARGBTORAWROW_NEON
+#define HAS_ARGBTORGB24ROW_NEON
+#define HAS_ARGBTORGB565DITHERROW_NEON
+#define HAS_ARGBTORGB565ROW_NEON
+#define HAS_ARGBTOUV444ROW_NEON
+#define HAS_ARGBTOUVJROW_NEON
+#define HAS_ARGBTOUVROW_NEON
+#define HAS_ARGBTOYJROW_NEON
+#define HAS_ARGBTOYROW_NEON
+#define HAS_BGRATOUVROW_NEON
+#define HAS_BGRATOYROW_NEON
+#define HAS_BYTETOFLOATROW_NEON
+#define HAS_COPYROW_NEON
+#define HAS_HALFFLOATROW_NEON
+#define HAS_I400TOARGBROW_NEON
+#define HAS_I422ALPHATOARGBROW_NEON
+#define HAS_I422TOARGB1555ROW_NEON
+#define HAS_I422TOARGB4444ROW_NEON
+#define HAS_I422TOARGBROW_NEON
+#define HAS_I422TORGB24ROW_NEON
+#define HAS_I422TORGB565ROW_NEON
+#define HAS_I422TORGBAROW_NEON
+#define HAS_I422TOUYVYROW_NEON
+#define HAS_I422TOYUY2ROW_NEON
+#define HAS_I444TOARGBROW_NEON
+#define HAS_J400TOARGBROW_NEON
+#define HAS_MERGEUVROW_NEON
+#define HAS_MIRRORROW_NEON
+#define HAS_MIRRORUVROW_NEON
+#define HAS_NV12TOARGBROW_NEON
+#define HAS_NV12TORGB24ROW_NEON
+#define HAS_NV12TORGB565ROW_NEON
+#define HAS_NV21TOARGBROW_NEON
+#define HAS_NV21TORGB24ROW_NEON
+#define HAS_RAWTOARGBROW_NEON
+#define HAS_RAWTORGB24ROW_NEON
+#define HAS_RAWTOUVROW_NEON
+#define HAS_RAWTOYROW_NEON
+#define HAS_RGB24TOARGBROW_NEON
+#define HAS_RGB24TOUVROW_NEON
+#define HAS_RGB24TOYROW_NEON
+#define HAS_RGB565TOARGBROW_NEON
+#define HAS_RGB565TOUVROW_NEON
+#define HAS_RGB565TOYROW_NEON
+#define HAS_RGBATOUVROW_NEON
+#define HAS_RGBATOYROW_NEON
+#define HAS_SETROW_NEON
+#define HAS_SPLITRGBROW_NEON
+#define HAS_SPLITUVROW_NEON
+#define HAS_UYVYTOARGBROW_NEON
+#define HAS_UYVYTOUV422ROW_NEON
+#define HAS_UYVYTOUVROW_NEON
+#define HAS_UYVYTOYROW_NEON
+#define HAS_YUY2TOARGBROW_NEON
+#define HAS_YUY2TOUV422ROW_NEON
+#define HAS_YUY2TOUVROW_NEON
+#define HAS_YUY2TOYROW_NEON
+
+// Effects:
+#define HAS_ARGBADDROW_NEON
+#define HAS_ARGBATTENUATEROW_NEON
+#define HAS_ARGBBLENDROW_NEON
+#define HAS_ARGBCOLORMATRIXROW_NEON
+#define HAS_ARGBGRAYROW_NEON
+#define HAS_ARGBMIRRORROW_NEON
+#define HAS_ARGBMULTIPLYROW_NEON
+#define HAS_ARGBQUANTIZEROW_NEON
+#define HAS_ARGBSEPIAROW_NEON
+#define HAS_ARGBSHADEROW_NEON
+#define HAS_ARGBSHUFFLEROW_NEON
+#define HAS_ARGBSUBTRACTROW_NEON
+#define HAS_INTERPOLATEROW_NEON
+#define HAS_SOBELROW_NEON
+#define HAS_SOBELTOPLANEROW_NEON
+#define HAS_SOBELXROW_NEON
+#define HAS_SOBELXYROW_NEON
+#define HAS_SOBELYROW_NEON
+#endif
+
+// The following are available on AArch64 platforms:
+#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
+#define HAS_SCALESUMSAMPLES_NEON
+#endif
+#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
+#define HAS_ABGRTOUVROW_MSA
+#define HAS_ABGRTOYROW_MSA
+#define HAS_ARGB1555TOARGBROW_MSA
+#define HAS_ARGB1555TOUVROW_MSA
+#define HAS_ARGB1555TOYROW_MSA
+#define HAS_ARGB4444TOARGBROW_MSA
+#define HAS_ARGBADDROW_MSA
+#define HAS_ARGBATTENUATEROW_MSA
+#define HAS_ARGBBLENDROW_MSA
+#define HAS_ARGBCOLORMATRIXROW_MSA
+#define HAS_ARGBEXTRACTALPHAROW_MSA
+#define HAS_ARGBGRAYROW_MSA
+#define HAS_ARGBMIRRORROW_MSA
+#define HAS_ARGBMULTIPLYROW_MSA
+#define HAS_ARGBQUANTIZEROW_MSA
+#define HAS_ARGBSEPIAROW_MSA
+#define HAS_ARGBSETROW_MSA
+#define HAS_ARGBSHADEROW_MSA
+#define HAS_ARGBSHUFFLEROW_MSA
+#define HAS_ARGBSUBTRACTROW_MSA
+#define HAS_ARGBTOARGB1555ROW_MSA
+#define HAS_ARGBTOARGB4444ROW_MSA
+#define HAS_ARGBTORAWROW_MSA
+#define HAS_ARGBTORGB24ROW_MSA
+#define HAS_ARGBTORGB565DITHERROW_MSA
+#define HAS_ARGBTORGB565ROW_MSA
+#define HAS_ARGBTOUV444ROW_MSA
+#define HAS_ARGBTOUVJROW_MSA
+#define HAS_ARGBTOUVROW_MSA
+#define HAS_ARGBTOYJROW_MSA
+#define HAS_ARGBTOYROW_MSA
+#define HAS_BGRATOUVROW_MSA
+#define HAS_BGRATOYROW_MSA
+#define HAS_HALFFLOATROW_MSA
+#define HAS_I400TOARGBROW_MSA
+#define HAS_I422ALPHATOARGBROW_MSA
+#define HAS_I422TOARGBROW_MSA
+#define HAS_I422TORGB24ROW_MSA
+#define HAS_I422TORGBAROW_MSA
+#define HAS_I422TOUYVYROW_MSA
+#define HAS_I422TOYUY2ROW_MSA
+#define HAS_I444TOARGBROW_MSA
+#define HAS_INTERPOLATEROW_MSA
+#define HAS_J400TOARGBROW_MSA
+#define HAS_MERGEUVROW_MSA
+#define HAS_MIRRORROW_MSA
+#define HAS_MIRRORUVROW_MSA
+#define HAS_NV12TOARGBROW_MSA
+#define HAS_NV12TORGB565ROW_MSA
+#define HAS_NV21TOARGBROW_MSA
+#define HAS_RAWTOARGBROW_MSA
+#define HAS_RAWTORGB24ROW_MSA
+#define HAS_RAWTOUVROW_MSA
+#define HAS_RAWTOYROW_MSA
+#define HAS_RGB24TOARGBROW_MSA
+#define HAS_RGB24TOUVROW_MSA
+#define HAS_RGB24TOYROW_MSA
+#define HAS_RGB565TOARGBROW_MSA
+#define HAS_RGB565TOUVROW_MSA
+#define HAS_RGB565TOYROW_MSA
+#define HAS_RGBATOUVROW_MSA
+#define HAS_RGBATOYROW_MSA
+#define HAS_SETROW_MSA
+#define HAS_SOBELROW_MSA
+#define HAS_SOBELTOPLANEROW_MSA
+#define HAS_SOBELXROW_MSA
+#define HAS_SOBELXYROW_MSA
+#define HAS_SOBELYROW_MSA
+#define HAS_SPLITUVROW_MSA
+#define HAS_UYVYTOARGBROW_MSA
+#define HAS_UYVYTOUVROW_MSA
+#define HAS_UYVYTOYROW_MSA
+#define HAS_YUY2TOARGBROW_MSA
+#define HAS_YUY2TOUV422ROW_MSA
+#define HAS_YUY2TOUVROW_MSA
+#define HAS_YUY2TOYROW_MSA
+#endif
+
+#if defined(_MSC_VER) && !defined(__CLR_VER) && !defined(__clang__)
+#if defined(VISUALC_HAS_AVX2)
+#define SIMD_ALIGNED(var) __declspec(align(32)) var
+#else
+#define SIMD_ALIGNED(var) __declspec(align(16)) var
+#endif
+typedef __declspec(align(16)) int16_t vec16[8];
+typedef __declspec(align(16)) int32_t vec32[4];
+typedef __declspec(align(16)) int8_t vec8[16];
+typedef __declspec(align(16)) uint16_t uvec16[8];
+typedef __declspec(align(16)) uint32_t uvec32[4];
+typedef __declspec(align(16)) uint8_t uvec8[16];
+typedef __declspec(align(32)) int16_t lvec16[16];
+typedef __declspec(align(32)) int32_t lvec32[8];
+typedef __declspec(align(32)) int8_t lvec8[32];
+typedef __declspec(align(32)) uint16_t ulvec16[16];
+typedef __declspec(align(32)) uint32_t ulvec32[8];
+typedef __declspec(align(32)) uint8_t ulvec8[32];
+#elif !defined(__pnacl__) && (defined(__GNUC__) || defined(__clang__))
+// Caveat GCC 4.2 to 4.7 have a known issue using vectors with const.
+#if defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2)
+#define SIMD_ALIGNED(var) var __attribute__((aligned(32)))
+#else
+#define SIMD_ALIGNED(var) var __attribute__((aligned(16)))
+#endif
+typedef int16_t __attribute__((vector_size(16))) vec16;
+typedef int32_t __attribute__((vector_size(16))) vec32;
+typedef int8_t __attribute__((vector_size(16))) vec8;
+typedef uint16_t __attribute__((vector_size(16))) uvec16;
+typedef uint32_t __attribute__((vector_size(16))) uvec32;
+typedef uint8_t __attribute__((vector_size(16))) uvec8;
+typedef int16_t __attribute__((vector_size(32))) lvec16;
+typedef int32_t __attribute__((vector_size(32))) lvec32;
+typedef int8_t __attribute__((vector_size(32))) lvec8;
+typedef uint16_t __attribute__((vector_size(32))) ulvec16;
+typedef uint32_t __attribute__((vector_size(32))) ulvec32;
+typedef uint8_t __attribute__((vector_size(32))) ulvec8;
+#else
+#define SIMD_ALIGNED(var) var
+typedef int16_t vec16[8];
+typedef int32_t vec32[4];
+typedef int8_t vec8[16];
+typedef uint16_t uvec16[8];
+typedef uint32_t uvec32[4];
+typedef uint8_t uvec8[16];
+typedef int16_t lvec16[16];
+typedef int32_t lvec32[8];
+typedef int8_t lvec8[32];
+typedef uint16_t ulvec16[16];
+typedef uint32_t ulvec32[8];
+typedef uint8_t ulvec8[32];
+#endif
+
+#if defined(__aarch64__)
+// This struct is for Arm64 color conversion.
+struct YuvConstants {
+ uvec16 kUVToRB;
+ uvec16 kUVToRB2;
+ uvec16 kUVToG;
+ uvec16 kUVToG2;
+ vec16 kUVBiasBGR;
+ vec32 kYToRgb;
+};
+#elif defined(__arm__)
+// This struct is for ArmV7 color conversion.
+struct YuvConstants {
+ uvec8 kUVToRB;
+ uvec8 kUVToG;
+ vec16 kUVBiasBGR;
+ vec32 kYToRgb;
+};
+#else
+// This struct is for Intel color conversion.
+struct YuvConstants {
+ int8_t kUVToB[32];
+ int8_t kUVToG[32];
+ int8_t kUVToR[32];
+ int16_t kUVBiasB[16];
+ int16_t kUVBiasG[16];
+ int16_t kUVBiasR[16];
+ int16_t kYToRgb[16];
+};
+
+// Offsets into YuvConstants structure
+#define KUVTOB 0
+#define KUVTOG 32
+#define KUVTOR 64
+#define KUVBIASB 96
+#define KUVBIASG 128
+#define KUVBIASR 160
+#define KYTORGB 192
+#endif
+
+// Conversion matrix for YUV to RGB
+extern const struct YuvConstants SIMD_ALIGNED(kYuvI601Constants); // BT.601
+extern const struct YuvConstants SIMD_ALIGNED(kYuvJPEGConstants); // JPeg
+extern const struct YuvConstants SIMD_ALIGNED(kYuvH709Constants); // BT.709
+
+// Conversion matrix for YVU to BGR
+extern const struct YuvConstants SIMD_ALIGNED(kYvuI601Constants); // BT.601
+extern const struct YuvConstants SIMD_ALIGNED(kYvuJPEGConstants); // JPeg
+extern const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants); // BT.709
+
+#define IS_ALIGNED(p, a) (!((uintptr_t)(p) & ((a)-1)))
+
+#define align_buffer_64(var, size) \
+ uint8_t* var##_mem = (uint8_t*)(malloc((size) + 63)); /* NOLINT */ \
+ uint8_t* var = (uint8_t*)(((intptr_t)(var##_mem) + 63) & ~63) /* NOLINT */
+
+#define free_aligned_buffer_64(var) \
+ free(var##_mem); \
+ var = 0
+
+#if defined(__APPLE__) || defined(__x86_64__) || defined(__llvm__)
+#define OMITFP
+#else
+#define OMITFP __attribute__((optimize("omit-frame-pointer")))
+#endif
+
+// NaCL macros for GCC x86 and x64.
+#if defined(__native_client__)
+#define LABELALIGN ".p2align 5\n"
+#else
+#define LABELALIGN
+#endif
+
+// Intel Code Analizer markers. Insert IACA_START IACA_END around code to be
+// measured and then run with iaca -64 libyuv_unittest.
+// IACA_ASM_START amd IACA_ASM_END are equivalents that can be used within
+// inline assembly blocks.
+// example of iaca:
+// ~/iaca-lin64/bin/iaca.sh -64 -analysis LATENCY out/Release/libyuv_unittest
+
+#if defined(__x86_64__) || defined(__i386__)
+
+#define IACA_ASM_START \
+ ".byte 0x0F, 0x0B\n" \
+ " movl $111, %%ebx\n" \
+ ".byte 0x64, 0x67, 0x90\n"
+
+#define IACA_ASM_END \
+ " movl $222, %%ebx\n" \
+ ".byte 0x64, 0x67, 0x90\n" \
+ ".byte 0x0F, 0x0B\n"
+
+#define IACA_SSC_MARK(MARK_ID) \
+ __asm__ __volatile__("\n\t movl $" #MARK_ID \
+ ", %%ebx" \
+ "\n\t .byte 0x64, 0x67, 0x90" \
+ : \
+ : \
+ : "memory");
+
+#define IACA_UD_BYTES __asm__ __volatile__("\n\t .byte 0x0F, 0x0B");
+
+#else /* Visual C */
+#define IACA_UD_BYTES \
+ { __asm _emit 0x0F __asm _emit 0x0B }
+
+#define IACA_SSC_MARK(x) \
+ { __asm mov ebx, x __asm _emit 0x64 __asm _emit 0x67 __asm _emit 0x90 }
+
+#define IACA_VC64_START __writegsbyte(111, 111);
+#define IACA_VC64_END __writegsbyte(222, 222);
+#endif
+
+#define IACA_START \
+ { \
+ IACA_UD_BYTES \
+ IACA_SSC_MARK(111) \
+ }
+#define IACA_END \
+ { \
+ IACA_SSC_MARK(222) \
+ IACA_UD_BYTES \
+ }
+
+void I444ToARGBRow_NEON(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422ToARGBRow_NEON(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422AlphaToARGBRow_NEON(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ const uint8_t* src_a,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422ToARGBRow_NEON(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422ToRGBARow_NEON(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_rgba,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422ToRGB24Row_NEON(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_rgb24,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422ToRGB565Row_NEON(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_rgb565,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422ToARGB1555Row_NEON(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb1555,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422ToARGB4444Row_NEON(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb4444,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void NV12ToARGBRow_NEON(const uint8_t* src_y,
+ const uint8_t* src_uv,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void NV12ToRGB565Row_NEON(const uint8_t* src_y,
+ const uint8_t* src_uv,
+ uint8_t* dst_rgb565,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void NV21ToARGBRow_NEON(const uint8_t* src_y,
+ const uint8_t* src_vu,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void NV12ToRGB24Row_NEON(const uint8_t* src_y,
+ const uint8_t* src_uv,
+ uint8_t* dst_rgb24,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void NV21ToRGB24Row_NEON(const uint8_t* src_y,
+ const uint8_t* src_vu,
+ uint8_t* dst_rgb24,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void YUY2ToARGBRow_NEON(const uint8_t* src_yuy2,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void UYVYToARGBRow_NEON(const uint8_t* src_uyvy,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I444ToARGBRow_MSA(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
+
+void I422ToARGBRow_MSA(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422ToRGBARow_MSA(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422AlphaToARGBRow_MSA(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ const uint8_t* src_a,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422ToRGB24Row_MSA(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422ToRGB565Row_MSA(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_rgb565,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422ToARGB4444Row_MSA(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb4444,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422ToARGB1555Row_MSA(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb1555,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void NV12ToARGBRow_MSA(const uint8_t* src_y,
+ const uint8_t* src_uv,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void NV12ToRGB565Row_MSA(const uint8_t* src_y,
+ const uint8_t* src_uv,
+ uint8_t* dst_rgb565,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void NV21ToARGBRow_MSA(const uint8_t* src_y,
+ const uint8_t* src_vu,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void YUY2ToARGBRow_MSA(const uint8_t* src_yuy2,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void UYVYToARGBRow_MSA(const uint8_t* src_uyvy,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
+
+void ARGBToYRow_AVX2(const uint8_t* src_argb, uint8_t* dst_y, int width);
+void ARGBToYRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void ARGBToYRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_y, int width);
+void ARGBToYJRow_AVX2(const uint8_t* src_argb, uint8_t* dst_y, int width);
+void ARGBToYJRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void ARGBToYJRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_y, int width);
+void BGRAToYRow_SSSE3(const uint8_t* src_bgra, uint8_t* dst_y, int width);
+void ABGRToYRow_SSSE3(const uint8_t* src_abgr, uint8_t* dst_y, int width);
+void RGBAToYRow_SSSE3(const uint8_t* src_rgba, uint8_t* dst_y, int width);
+void RGB24ToYRow_SSSE3(const uint8_t* src_rgb24, uint8_t* dst_y, int width);
+void RAWToYRow_SSSE3(const uint8_t* src_raw, uint8_t* dst_y, int width);
+void ARGBToYRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width);
+void ARGBToYJRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width);
+void ARGBToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width);
+void ARGBToYJRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width);
+void ARGBToUV444Row_NEON(const uint8_t* src_argb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void ARGBToUVRow_NEON(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void ARGBToUV444Row_MSA(const uint8_t* src_argb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void ARGBToUVRow_MSA(const uint8_t* src_argb0,
+ int src_stride_argb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void ARGBToUVJRow_NEON(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void BGRAToUVRow_NEON(const uint8_t* src_bgra,
+ int src_stride_bgra,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void ABGRToUVRow_NEON(const uint8_t* src_abgr,
+ int src_stride_abgr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void RGBAToUVRow_NEON(const uint8_t* src_rgba,
+ int src_stride_rgba,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void RGB24ToUVRow_NEON(const uint8_t* src_rgb24,
+ int src_stride_rgb24,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void RAWToUVRow_NEON(const uint8_t* src_raw,
+ int src_stride_raw,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void RGB565ToUVRow_NEON(const uint8_t* src_rgb565,
+ int src_stride_rgb565,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void ARGB1555ToUVRow_NEON(const uint8_t* src_argb1555,
+ int src_stride_argb1555,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void ARGB4444ToUVRow_NEON(const uint8_t* src_argb4444,
+ int src_stride_argb4444,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void ARGBToUVJRow_MSA(const uint8_t* src_rgb0,
+ int src_stride_rgb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void BGRAToUVRow_MSA(const uint8_t* src_rgb0,
+ int src_stride_rgb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void ABGRToUVRow_MSA(const uint8_t* src_rgb0,
+ int src_stride_rgb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void RGBAToUVRow_MSA(const uint8_t* src_rgb0,
+ int src_stride_rgb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void RGB24ToUVRow_MSA(const uint8_t* src_rgb0,
+ int src_stride_rgb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void RAWToUVRow_MSA(const uint8_t* src_rgb0,
+ int src_stride_rgb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void RGB565ToUVRow_MSA(const uint8_t* src_rgb565,
+ int src_stride_rgb565,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void ARGB1555ToUVRow_MSA(const uint8_t* src_argb1555,
+ int src_stride_argb1555,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void BGRAToYRow_NEON(const uint8_t* src_bgra, uint8_t* dst_y, int width);
+void ABGRToYRow_NEON(const uint8_t* src_abgr, uint8_t* dst_y, int width);
+void RGBAToYRow_NEON(const uint8_t* src_rgba, uint8_t* dst_y, int width);
+void RGB24ToYRow_NEON(const uint8_t* src_rgb24, uint8_t* dst_y, int width);
+void RAWToYRow_NEON(const uint8_t* src_raw, uint8_t* dst_y, int width);
+void RGB565ToYRow_NEON(const uint8_t* src_rgb565, uint8_t* dst_y, int width);
+void ARGB1555ToYRow_NEON(const uint8_t* src_argb1555,
+ uint8_t* dst_y,
+ int width);
+void ARGB4444ToYRow_NEON(const uint8_t* src_argb4444,
+ uint8_t* dst_y,
+ int width);
+void BGRAToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width);
+void ABGRToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width);
+void RGBAToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width);
+void RGB24ToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width);
+void RAWToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width);
+void RGB565ToYRow_MSA(const uint8_t* src_rgb565, uint8_t* dst_y, int width);
+void ARGB1555ToYRow_MSA(const uint8_t* src_argb1555, uint8_t* dst_y, int width);
+void ARGBToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width);
+void ARGBToYJRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width);
+void BGRAToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width);
+void ABGRToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width);
+void RGBAToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width);
+void RGB24ToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width);
+void RAWToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width);
+void RGB565ToYRow_C(const uint8_t* src_rgb565, uint8_t* dst_y, int width);
+void ARGB1555ToYRow_C(const uint8_t* src_argb1555, uint8_t* dst_y, int width);
+void ARGB4444ToYRow_C(const uint8_t* src_argb4444, uint8_t* dst_y, int width);
+void ARGBToYRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void ARGBToYJRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void BGRAToYRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void ABGRToYRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void RGBAToYRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void RGB24ToYRow_Any_SSSE3(const uint8_t* src_rgb24, uint8_t* dst_y, int width);
+void RAWToYRow_Any_SSSE3(const uint8_t* src_raw, uint8_t* dst_y, int width);
+void ARGBToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void ARGBToYJRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void BGRAToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void ABGRToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void RGBAToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void RGB24ToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void RAWToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void RGB565ToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void ARGB1555ToYRow_Any_NEON(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+void ARGB4444ToYRow_Any_NEON(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+void BGRAToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void ABGRToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void RGBAToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void ARGBToYJRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void ARGBToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void RGB24ToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void RAWToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void RGB565ToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void ARGB1555ToYRow_Any_MSA(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+
+void ARGBToUVRow_AVX2(const uint8_t* src_argb0,
+ int src_stride_argb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void ARGBToUVJRow_AVX2(const uint8_t* src_argb0,
+ int src_stride_argb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void ARGBToUVRow_SSSE3(const uint8_t* src_argb0,
+ int src_stride_argb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void ARGBToUVJRow_SSSE3(const uint8_t* src_argb0,
+ int src_stride_argb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void BGRAToUVRow_SSSE3(const uint8_t* src_bgra0,
+ int src_stride_bgra,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void ABGRToUVRow_SSSE3(const uint8_t* src_abgr0,
+ int src_stride_abgr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void RGBAToUVRow_SSSE3(const uint8_t* src_rgba0,
+ int src_stride_rgba,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void ARGBToUVRow_Any_AVX2(const uint8_t* src_ptr,
+ int src_stride_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void ARGBToUVJRow_Any_AVX2(const uint8_t* src_ptr,
+ int src_stride_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void ARGBToUVRow_Any_SSSE3(const uint8_t* src_ptr,
+ int src_stride_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void ARGBToUVJRow_Any_SSSE3(const uint8_t* src_ptr,
+ int src_stride_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void BGRAToUVRow_Any_SSSE3(const uint8_t* src_ptr,
+ int src_stride_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void ABGRToUVRow_Any_SSSE3(const uint8_t* src_ptr,
+ int src_stride_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void RGBAToUVRow_Any_SSSE3(const uint8_t* src_ptr,
+ int src_stride_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void ARGBToUV444Row_Any_NEON(const uint8_t* src_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void ARGBToUVRow_Any_NEON(const uint8_t* src_ptr,
+ int src_stride_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void ARGBToUV444Row_Any_MSA(const uint8_t* src_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void ARGBToUVRow_Any_MSA(const uint8_t* src_ptr,
+ int src_stride_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void ARGBToUVJRow_Any_NEON(const uint8_t* src_ptr,
+ int src_stride_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void BGRAToUVRow_Any_NEON(const uint8_t* src_ptr,
+ int src_stride_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void ABGRToUVRow_Any_NEON(const uint8_t* src_ptr,
+ int src_stride_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void RGBAToUVRow_Any_NEON(const uint8_t* src_ptr,
+ int src_stride_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void RGB24ToUVRow_Any_NEON(const uint8_t* src_ptr,
+ int src_stride_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void RAWToUVRow_Any_NEON(const uint8_t* src_ptr,
+ int src_stride_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void RGB565ToUVRow_Any_NEON(const uint8_t* src_ptr,
+ int src_stride_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void ARGB1555ToUVRow_Any_NEON(const uint8_t* src_ptr,
+ int src_stride_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void ARGB4444ToUVRow_Any_NEON(const uint8_t* src_ptr,
+ int src_stride_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void ARGBToUVJRow_Any_MSA(const uint8_t* src_ptr,
+ int src_stride_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void BGRAToUVRow_Any_MSA(const uint8_t* src_ptr,
+ int src_stride_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void ABGRToUVRow_Any_MSA(const uint8_t* src_ptr,
+ int src_stride_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void RGBAToUVRow_Any_MSA(const uint8_t* src_ptr,
+ int src_stride_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void RGB24ToUVRow_Any_MSA(const uint8_t* src_ptr,
+ int src_stride_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void RAWToUVRow_Any_MSA(const uint8_t* src_ptr,
+ int src_stride_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void RGB565ToUVRow_Any_MSA(const uint8_t* src_ptr,
+ int src_stride_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void ARGB1555ToUVRow_Any_MSA(const uint8_t* src_ptr,
+ int src_stride_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void ARGBToUVRow_C(const uint8_t* src_rgb0,
+ int src_stride_rgb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void ARGBToUVJRow_C(const uint8_t* src_rgb0,
+ int src_stride_rgb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void ARGBToUVRow_C(const uint8_t* src_rgb0,
+ int src_stride_rgb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void ARGBToUVJRow_C(const uint8_t* src_rgb0,
+ int src_stride_rgb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void BGRAToUVRow_C(const uint8_t* src_rgb0,
+ int src_stride_rgb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void ABGRToUVRow_C(const uint8_t* src_rgb0,
+ int src_stride_rgb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void RGBAToUVRow_C(const uint8_t* src_rgb0,
+ int src_stride_rgb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void RGB24ToUVRow_C(const uint8_t* src_rgb0,
+ int src_stride_rgb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void RAWToUVRow_C(const uint8_t* src_rgb0,
+ int src_stride_rgb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void RGB565ToUVRow_C(const uint8_t* src_rgb565,
+ int src_stride_rgb565,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void ARGB1555ToUVRow_C(const uint8_t* src_argb1555,
+ int src_stride_argb1555,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void ARGB4444ToUVRow_C(const uint8_t* src_argb4444,
+ int src_stride_argb4444,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+
+void ARGBToUV444Row_SSSE3(const uint8_t* src_argb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void ARGBToUV444Row_Any_SSSE3(const uint8_t* src_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+
+void ARGBToUV444Row_C(const uint8_t* src_argb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+
+void MirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width);
+void MirrorRow_SSSE3(const uint8_t* src, uint8_t* dst, int width);
+void MirrorRow_NEON(const uint8_t* src, uint8_t* dst, int width);
+void MirrorRow_MSA(const uint8_t* src, uint8_t* dst, int width);
+void MirrorRow_C(const uint8_t* src, uint8_t* dst, int width);
+void MirrorRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void MirrorRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void MirrorRow_Any_SSE2(const uint8_t* src, uint8_t* dst, int width);
+void MirrorRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void MirrorRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+
+void MirrorUVRow_SSSE3(const uint8_t* src,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void MirrorUVRow_NEON(const uint8_t* src_uv,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void MirrorUVRow_MSA(const uint8_t* src_uv,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void MirrorUVRow_C(const uint8_t* src_uv,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+
+void ARGBMirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width);
+void ARGBMirrorRow_SSE2(const uint8_t* src, uint8_t* dst, int width);
+void ARGBMirrorRow_NEON(const uint8_t* src, uint8_t* dst, int width);
+void ARGBMirrorRow_MSA(const uint8_t* src, uint8_t* dst, int width);
+void ARGBMirrorRow_C(const uint8_t* src, uint8_t* dst, int width);
+void ARGBMirrorRow_Any_AVX2(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+void ARGBMirrorRow_Any_SSE2(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+void ARGBMirrorRow_Any_NEON(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+void ARGBMirrorRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+
+void SplitUVRow_C(const uint8_t* src_uv,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void SplitUVRow_SSE2(const uint8_t* src_uv,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void SplitUVRow_AVX2(const uint8_t* src_uv,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void SplitUVRow_NEON(const uint8_t* src_uv,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void SplitUVRow_MSA(const uint8_t* src_uv,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void SplitUVRow_Any_SSE2(const uint8_t* src_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void SplitUVRow_Any_AVX2(const uint8_t* src_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void SplitUVRow_Any_NEON(const uint8_t* src_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void SplitUVRow_Any_MSA(const uint8_t* src_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+
+void MergeUVRow_C(const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_uv,
+ int width);
+void MergeUVRow_SSE2(const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_uv,
+ int width);
+void MergeUVRow_AVX2(const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_uv,
+ int width);
+void MergeUVRow_NEON(const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_uv,
+ int width);
+void MergeUVRow_MSA(const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_uv,
+ int width);
+void MergeUVRow_Any_SSE2(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
+ int width);
+void MergeUVRow_Any_AVX2(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
+ int width);
+void MergeUVRow_Any_NEON(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
+ int width);
+void MergeUVRow_Any_MSA(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
+ int width);
+
+void SplitRGBRow_C(const uint8_t* src_rgb,
+ uint8_t* dst_r,
+ uint8_t* dst_g,
+ uint8_t* dst_b,
+ int width);
+void SplitRGBRow_SSSE3(const uint8_t* src_rgb,
+ uint8_t* dst_r,
+ uint8_t* dst_g,
+ uint8_t* dst_b,
+ int width);
+void SplitRGBRow_NEON(const uint8_t* src_rgb,
+ uint8_t* dst_r,
+ uint8_t* dst_g,
+ uint8_t* dst_b,
+ int width);
+void SplitRGBRow_Any_SSSE3(const uint8_t* src_ptr,
+ uint8_t* dst_r,
+ uint8_t* dst_g,
+ uint8_t* dst_b,
+ int width);
+void SplitRGBRow_Any_NEON(const uint8_t* src_ptr,
+ uint8_t* dst_r,
+ uint8_t* dst_g,
+ uint8_t* dst_b,
+ int width);
+
+void MergeRGBRow_C(const uint8_t* src_r,
+ const uint8_t* src_g,
+ const uint8_t* src_b,
+ uint8_t* dst_rgb,
+ int width);
+void MergeRGBRow_SSSE3(const uint8_t* src_r,
+ const uint8_t* src_g,
+ const uint8_t* src_b,
+ uint8_t* dst_rgb,
+ int width);
+void MergeRGBRow_NEON(const uint8_t* src_r,
+ const uint8_t* src_g,
+ const uint8_t* src_b,
+ uint8_t* dst_rgb,
+ int width);
+void MergeRGBRow_Any_SSSE3(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
+ int width);
+void MergeRGBRow_Any_NEON(const uint8_t* src_r,
+ const uint8_t* src_g,
+ const uint8_t* src_b,
+ uint8_t* dst_rgb,
+ int width);
+
+void MergeUVRow_16_C(const uint16_t* src_u,
+ const uint16_t* src_v,
+ uint16_t* dst_uv,
+ int scale, /* 64 for 10 bit */
+ int width);
+void MergeUVRow_16_AVX2(const uint16_t* src_u,
+ const uint16_t* src_v,
+ uint16_t* dst_uv,
+ int scale,
+ int width);
+
+void MultiplyRow_16_AVX2(const uint16_t* src_y,
+ uint16_t* dst_y,
+ int scale,
+ int width);
+void MultiplyRow_16_C(const uint16_t* src_y,
+ uint16_t* dst_y,
+ int scale,
+ int width);
+
+void Convert8To16Row_C(const uint8_t* src_y,
+ uint16_t* dst_y,
+ int scale,
+ int width);
+void Convert8To16Row_SSE2(const uint8_t* src_y,
+ uint16_t* dst_y,
+ int scale,
+ int width);
+void Convert8To16Row_AVX2(const uint8_t* src_y,
+ uint16_t* dst_y,
+ int scale,
+ int width);
+void Convert8To16Row_Any_SSE2(const uint8_t* src_ptr,
+ uint16_t* dst_ptr,
+ int scale,
+ int width);
+void Convert8To16Row_Any_AVX2(const uint8_t* src_ptr,
+ uint16_t* dst_ptr,
+ int scale,
+ int width);
+
+void Convert16To8Row_C(const uint16_t* src_y,
+ uint8_t* dst_y,
+ int scale,
+ int width);
+void Convert16To8Row_SSSE3(const uint16_t* src_y,
+ uint8_t* dst_y,
+ int scale,
+ int width);
+void Convert16To8Row_AVX2(const uint16_t* src_y,
+ uint8_t* dst_y,
+ int scale,
+ int width);
+void Convert16To8Row_Any_SSSE3(const uint16_t* src_ptr,
+ uint8_t* dst_ptr,
+ int scale,
+ int width);
+void Convert16To8Row_Any_AVX2(const uint16_t* src_ptr,
+ uint8_t* dst_ptr,
+ int scale,
+ int width);
+
+void CopyRow_SSE2(const uint8_t* src, uint8_t* dst, int width);
+void CopyRow_AVX(const uint8_t* src, uint8_t* dst, int width);
+void CopyRow_ERMS(const uint8_t* src, uint8_t* dst, int width);
+void CopyRow_NEON(const uint8_t* src, uint8_t* dst, int width);
+void CopyRow_MIPS(const uint8_t* src, uint8_t* dst, int count);
+void CopyRow_C(const uint8_t* src, uint8_t* dst, int count);
+void CopyRow_Any_SSE2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void CopyRow_Any_AVX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void CopyRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+
+void CopyRow_16_C(const uint16_t* src, uint16_t* dst, int count);
+
+void ARGBCopyAlphaRow_C(const uint8_t* src, uint8_t* dst, int width);
+void ARGBCopyAlphaRow_SSE2(const uint8_t* src, uint8_t* dst, int width);
+void ARGBCopyAlphaRow_AVX2(const uint8_t* src, uint8_t* dst, int width);
+void ARGBCopyAlphaRow_Any_SSE2(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+void ARGBCopyAlphaRow_Any_AVX2(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+
+void ARGBExtractAlphaRow_C(const uint8_t* src_argb, uint8_t* dst_a, int width);
+void ARGBExtractAlphaRow_SSE2(const uint8_t* src_argb,
+ uint8_t* dst_a,
+ int width);
+void ARGBExtractAlphaRow_AVX2(const uint8_t* src_argb,
+ uint8_t* dst_a,
+ int width);
+void ARGBExtractAlphaRow_NEON(const uint8_t* src_argb,
+ uint8_t* dst_a,
+ int width);
+void ARGBExtractAlphaRow_MSA(const uint8_t* src_argb,
+ uint8_t* dst_a,
+ int width);
+void ARGBExtractAlphaRow_Any_SSE2(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+void ARGBExtractAlphaRow_Any_AVX2(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+void ARGBExtractAlphaRow_Any_NEON(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+void ARGBExtractAlphaRow_Any_MSA(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+
+void ARGBCopyYToAlphaRow_C(const uint8_t* src, uint8_t* dst, int width);
+void ARGBCopyYToAlphaRow_SSE2(const uint8_t* src, uint8_t* dst, int width);
+void ARGBCopyYToAlphaRow_AVX2(const uint8_t* src, uint8_t* dst, int width);
+void ARGBCopyYToAlphaRow_Any_SSE2(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+void ARGBCopyYToAlphaRow_Any_AVX2(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+
+void SetRow_C(uint8_t* dst, uint8_t v8, int width);
+void SetRow_MSA(uint8_t* dst, uint8_t v8, int width);
+void SetRow_X86(uint8_t* dst, uint8_t v8, int width);
+void SetRow_ERMS(uint8_t* dst, uint8_t v8, int width);
+void SetRow_NEON(uint8_t* dst, uint8_t v8, int width);
+void SetRow_Any_X86(uint8_t* dst_ptr, uint8_t v32, int width);
+void SetRow_Any_NEON(uint8_t* dst_ptr, uint8_t v32, int width);
+
+void ARGBSetRow_C(uint8_t* dst_argb, uint32_t v32, int width);
+void ARGBSetRow_X86(uint8_t* dst_argb, uint32_t v32, int width);
+void ARGBSetRow_NEON(uint8_t* dst, uint32_t v32, int width);
+void ARGBSetRow_Any_NEON(uint8_t* dst_ptr, uint32_t v32, int width);
+void ARGBSetRow_MSA(uint8_t* dst_argb, uint32_t v32, int width);
+void ARGBSetRow_Any_MSA(uint8_t* dst_ptr, uint32_t v32, int width);
+
+// ARGBShufflers for BGRAToARGB etc.
+void ARGBShuffleRow_C(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ const uint8_t* shuffler,
+ int width);
+void ARGBShuffleRow_SSSE3(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ const uint8_t* shuffler,
+ int width);
+void ARGBShuffleRow_AVX2(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ const uint8_t* shuffler,
+ int width);
+void ARGBShuffleRow_NEON(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ const uint8_t* shuffler,
+ int width);
+void ARGBShuffleRow_MSA(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ const uint8_t* shuffler,
+ int width);
+void ARGBShuffleRow_Any_SSSE3(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ const uint8_t* param,
+ int width);
+void ARGBShuffleRow_Any_AVX2(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ const uint8_t* param,
+ int width);
+void ARGBShuffleRow_Any_NEON(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ const uint8_t* param,
+ int width);
+void ARGBShuffleRow_Any_MSA(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ const uint8_t* param,
+ int width);
+
+void RGB24ToARGBRow_SSSE3(const uint8_t* src_rgb24,
+ uint8_t* dst_argb,
+ int width);
+void RAWToARGBRow_SSSE3(const uint8_t* src_raw, uint8_t* dst_argb, int width);
+void RAWToRGB24Row_SSSE3(const uint8_t* src_raw, uint8_t* dst_rgb24, int width);
+void RGB565ToARGBRow_SSE2(const uint8_t* src, uint8_t* dst, int width);
+void ARGB1555ToARGBRow_SSE2(const uint8_t* src, uint8_t* dst, int width);
+void ARGB4444ToARGBRow_SSE2(const uint8_t* src, uint8_t* dst, int width);
+void RGB565ToARGBRow_AVX2(const uint8_t* src_rgb565,
+ uint8_t* dst_argb,
+ int width);
+void ARGB1555ToARGBRow_AVX2(const uint8_t* src_argb1555,
+ uint8_t* dst_argb,
+ int width);
+void ARGB4444ToARGBRow_AVX2(const uint8_t* src_argb4444,
+ uint8_t* dst_argb,
+ int width);
+
+void RGB24ToARGBRow_NEON(const uint8_t* src_rgb24,
+ uint8_t* dst_argb,
+ int width);
+void RGB24ToARGBRow_MSA(const uint8_t* src_rgb24, uint8_t* dst_argb, int width);
+void RAWToARGBRow_NEON(const uint8_t* src_raw, uint8_t* dst_argb, int width);
+void RAWToARGBRow_MSA(const uint8_t* src_raw, uint8_t* dst_argb, int width);
+void RAWToRGB24Row_NEON(const uint8_t* src_raw, uint8_t* dst_rgb24, int width);
+void RAWToRGB24Row_MSA(const uint8_t* src_raw, uint8_t* dst_rgb24, int width);
+void RGB565ToARGBRow_NEON(const uint8_t* src_rgb565,
+ uint8_t* dst_argb,
+ int width);
+void RGB565ToARGBRow_MSA(const uint8_t* src_rgb565,
+ uint8_t* dst_argb,
+ int width);
+void ARGB1555ToARGBRow_NEON(const uint8_t* src_argb1555,
+ uint8_t* dst_argb,
+ int width);
+void ARGB1555ToARGBRow_MSA(const uint8_t* src_argb1555,
+ uint8_t* dst_argb,
+ int width);
+void ARGB4444ToARGBRow_NEON(const uint8_t* src_argb4444,
+ uint8_t* dst_argb,
+ int width);
+void ARGB4444ToARGBRow_MSA(const uint8_t* src_argb4444,
+ uint8_t* dst_argb,
+ int width);
+void RGB24ToARGBRow_C(const uint8_t* src_rgb24, uint8_t* dst_argb, int width);
+void RAWToARGBRow_C(const uint8_t* src_raw, uint8_t* dst_argb, int width);
+void RAWToRGB24Row_C(const uint8_t* src_raw, uint8_t* dst_rgb24, int width);
+void RGB565ToARGBRow_C(const uint8_t* src_rgb565, uint8_t* dst_argb, int width);
+void ARGB1555ToARGBRow_C(const uint8_t* src_argb1555,
+ uint8_t* dst_argb,
+ int width);
+void ARGB4444ToARGBRow_C(const uint8_t* src_argb4444,
+ uint8_t* dst_argb,
+ int width);
+void AR30ToARGBRow_C(const uint8_t* src_ar30, uint8_t* dst_argb, int width);
+void AR30ToABGRRow_C(const uint8_t* src_ar30, uint8_t* dst_abgr, int width);
+void ARGBToAR30Row_C(const uint8_t* src_argb, uint8_t* dst_ar30, int width);
+void AR30ToAB30Row_C(const uint8_t* src_ar30, uint8_t* dst_ab30, int width);
+
+void RGB24ToARGBRow_Any_SSSE3(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+void RAWToARGBRow_Any_SSSE3(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+void RAWToRGB24Row_Any_SSSE3(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+
+void RGB565ToARGBRow_Any_SSE2(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+void ARGB1555ToARGBRow_Any_SSE2(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+void ARGB4444ToARGBRow_Any_SSE2(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+void RGB565ToARGBRow_Any_AVX2(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+void ARGB1555ToARGBRow_Any_AVX2(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+void ARGB4444ToARGBRow_Any_AVX2(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+
+void RGB24ToARGBRow_Any_NEON(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+void RGB24ToARGBRow_Any_MSA(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+void RAWToARGBRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void RAWToARGBRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void RAWToRGB24Row_Any_NEON(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+void RAWToRGB24Row_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void RGB565ToARGBRow_Any_NEON(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+void RGB565ToARGBRow_Any_MSA(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+void ARGB1555ToARGBRow_Any_NEON(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+void ARGB1555ToARGBRow_Any_MSA(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+void ARGB4444ToARGBRow_Any_NEON(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+
+void ARGB4444ToARGBRow_Any_MSA(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+
+void ARGBToRGB24Row_SSSE3(const uint8_t* src, uint8_t* dst, int width);
+void ARGBToRAWRow_SSSE3(const uint8_t* src, uint8_t* dst, int width);
+void ARGBToRGB565Row_SSE2(const uint8_t* src, uint8_t* dst, int width);
+void ARGBToARGB1555Row_SSE2(const uint8_t* src, uint8_t* dst, int width);
+void ARGBToARGB4444Row_SSE2(const uint8_t* src, uint8_t* dst, int width);
+void ABGRToAR30Row_SSSE3(const uint8_t* src, uint8_t* dst, int width);
+void ARGBToAR30Row_SSSE3(const uint8_t* src, uint8_t* dst, int width);
+
+void ARGBToRAWRow_AVX2(const uint8_t* src, uint8_t* dst, int width);
+void ARGBToRGB24Row_AVX2(const uint8_t* src, uint8_t* dst, int width);
+
+void ARGBToRGB24Row_AVX512VBMI(const uint8_t* src, uint8_t* dst, int width);
+
+void ARGBToRGB565DitherRow_C(const uint8_t* src_argb,
+ uint8_t* dst_rgb,
+ const uint32_t dither4,
+ int width);
+void ARGBToRGB565DitherRow_SSE2(const uint8_t* src,
+ uint8_t* dst,
+ const uint32_t dither4,
+ int width);
+void ARGBToRGB565DitherRow_AVX2(const uint8_t* src,
+ uint8_t* dst,
+ const uint32_t dither4,
+ int width);
+
+void ARGBToRGB565Row_AVX2(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
+void ARGBToARGB1555Row_AVX2(const uint8_t* src_argb,
+ uint8_t* dst_rgb,
+ int width);
+void ARGBToARGB4444Row_AVX2(const uint8_t* src_argb,
+ uint8_t* dst_rgb,
+ int width);
+void ABGRToAR30Row_AVX2(const uint8_t* src, uint8_t* dst, int width);
+void ARGBToAR30Row_AVX2(const uint8_t* src, uint8_t* dst, int width);
+
+void ARGBToRGB24Row_NEON(const uint8_t* src_argb,
+ uint8_t* dst_rgb24,
+ int width);
+void ARGBToRAWRow_NEON(const uint8_t* src_argb, uint8_t* dst_raw, int width);
+void ARGBToRGB565Row_NEON(const uint8_t* src_argb,
+ uint8_t* dst_rgb565,
+ int width);
+void ARGBToARGB1555Row_NEON(const uint8_t* src_argb,
+ uint8_t* dst_argb1555,
+ int width);
+void ARGBToARGB4444Row_NEON(const uint8_t* src_argb,
+ uint8_t* dst_argb4444,
+ int width);
+void ARGBToRGB565DitherRow_NEON(const uint8_t* src_argb,
+ uint8_t* dst_rgb,
+ const uint32_t dither4,
+ int width);
+void ARGBToRGB24Row_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
+void ARGBToRAWRow_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
+void ARGBToRGB565Row_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
+void ARGBToARGB1555Row_MSA(const uint8_t* src_argb,
+ uint8_t* dst_rgb,
+ int width);
+void ARGBToARGB4444Row_MSA(const uint8_t* src_argb,
+ uint8_t* dst_rgb,
+ int width);
+void ARGBToRGB565DitherRow_MSA(const uint8_t* src_argb,
+ uint8_t* dst_rgb,
+ const uint32_t dither4,
+ int width);
+
+void ARGBToRGBARow_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
+void ARGBToRGB24Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
+void ARGBToRAWRow_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
+void ARGBToRGB565Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
+void ARGBToARGB1555Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
+void ARGBToARGB4444Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
+void ABGRToAR30Row_C(const uint8_t* src_abgr, uint8_t* dst_ar30, int width);
+void ARGBToAR30Row_C(const uint8_t* src_argb, uint8_t* dst_ar30, int width);
+
+void J400ToARGBRow_SSE2(const uint8_t* src_y, uint8_t* dst_argb, int width);
+void J400ToARGBRow_AVX2(const uint8_t* src_y, uint8_t* dst_argb, int width);
+void J400ToARGBRow_NEON(const uint8_t* src_y, uint8_t* dst_argb, int width);
+void J400ToARGBRow_MSA(const uint8_t* src_y, uint8_t* dst_argb, int width);
+void J400ToARGBRow_C(const uint8_t* src_y, uint8_t* dst_argb, int width);
+void J400ToARGBRow_Any_SSE2(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+void J400ToARGBRow_Any_AVX2(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+void J400ToARGBRow_Any_NEON(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+void J400ToARGBRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+
+void I444ToARGBRow_C(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422ToARGBRow_C(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422ToAR30Row_C(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I210ToAR30Row_C(const uint16_t* src_y,
+ const uint16_t* src_u,
+ const uint16_t* src_v,
+ uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I210ToARGBRow_C(const uint16_t* src_y,
+ const uint16_t* src_u,
+ const uint16_t* src_v,
+ uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422AlphaToARGBRow_C(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ const uint8_t* src_a,
+ uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void NV12ToARGBRow_C(const uint8_t* src_y,
+ const uint8_t* src_uv,
+ uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void NV12ToRGB565Row_C(const uint8_t* src_y,
+ const uint8_t* src_uv,
+ uint8_t* dst_rgb565,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void NV21ToARGBRow_C(const uint8_t* src_y,
+ const uint8_t* src_vu,
+ uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void NV12ToRGB24Row_C(const uint8_t* src_y,
+ const uint8_t* src_uv,
+ uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void NV21ToRGB24Row_C(const uint8_t* src_y,
+ const uint8_t* src_vu,
+ uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void YUY2ToARGBRow_C(const uint8_t* src_yuy2,
+ uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void UYVYToARGBRow_C(const uint8_t* src_uyvy,
+ uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422ToRGBARow_C(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422ToRGB24Row_C(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422ToARGB4444Row_C(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb4444,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422ToARGB1555Row_C(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb1555,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422ToRGB565Row_C(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_rgb565,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422ToARGBRow_AVX2(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422ToRGBARow_AVX2(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I444ToARGBRow_SSSE3(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I444ToARGBRow_AVX2(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I444ToARGBRow_SSSE3(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I444ToARGBRow_AVX2(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422ToARGBRow_SSSE3(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
+
+void I422ToAR30Row_SSSE3(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ar30,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I210ToAR30Row_SSSE3(const uint16_t* y_buf,
+ const uint16_t* u_buf,
+ const uint16_t* v_buf,
+ uint8_t* dst_ar30,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I210ToARGBRow_SSSE3(const uint16_t* y_buf,
+ const uint16_t* u_buf,
+ const uint16_t* v_buf,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422ToAR30Row_AVX2(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ar30,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I210ToARGBRow_AVX2(const uint16_t* y_buf,
+ const uint16_t* u_buf,
+ const uint16_t* v_buf,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I210ToAR30Row_AVX2(const uint16_t* y_buf,
+ const uint16_t* u_buf,
+ const uint16_t* v_buf,
+ uint8_t* dst_ar30,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422AlphaToARGBRow_SSSE3(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ const uint8_t* a_buf,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422AlphaToARGBRow_AVX2(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ const uint8_t* a_buf,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void NV12ToARGBRow_SSSE3(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void NV12ToARGBRow_AVX2(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void NV12ToRGB24Row_SSSE3(const uint8_t* src_y,
+ const uint8_t* src_uv,
+ uint8_t* dst_rgb24,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void NV21ToRGB24Row_SSSE3(const uint8_t* src_y,
+ const uint8_t* src_vu,
+ uint8_t* dst_rgb24,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void NV12ToRGB565Row_SSSE3(const uint8_t* src_y,
+ const uint8_t* src_uv,
+ uint8_t* dst_rgb565,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void NV12ToRGB24Row_AVX2(const uint8_t* src_y,
+ const uint8_t* src_uv,
+ uint8_t* dst_rgb24,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void NV21ToRGB24Row_AVX2(const uint8_t* src_y,
+ const uint8_t* src_vu,
+ uint8_t* dst_rgb24,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void NV12ToRGB565Row_AVX2(const uint8_t* src_y,
+ const uint8_t* src_uv,
+ uint8_t* dst_rgb565,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void NV21ToARGBRow_SSSE3(const uint8_t* y_buf,
+ const uint8_t* vu_buf,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void NV21ToARGBRow_AVX2(const uint8_t* y_buf,
+ const uint8_t* vu_buf,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void YUY2ToARGBRow_SSSE3(const uint8_t* yuy2_buf,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void UYVYToARGBRow_SSSE3(const uint8_t* uyvy_buf,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void YUY2ToARGBRow_AVX2(const uint8_t* yuy2_buf,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void UYVYToARGBRow_AVX2(const uint8_t* uyvy_buf,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422ToRGBARow_SSSE3(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_rgba,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422ToARGB4444Row_SSSE3(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb4444,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422ToARGB4444Row_AVX2(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb4444,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422ToARGB1555Row_SSSE3(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb1555,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422ToARGB1555Row_AVX2(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb1555,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422ToRGB565Row_SSSE3(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_rgb565,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422ToRGB565Row_AVX2(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_rgb565,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422ToRGB24Row_SSSE3(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_rgb24,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422ToRGB24Row_AVX2(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_rgb24,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422ToARGBRow_Any_AVX2(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422ToRGBARow_Any_AVX2(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I444ToARGBRow_Any_SSSE3(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I444ToARGBRow_Any_AVX2(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422ToARGBRow_Any_SSSE3(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422ToAR30Row_Any_SSSE3(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I210ToAR30Row_Any_SSSE3(const uint16_t* y_buf,
+ const uint16_t* u_buf,
+ const uint16_t* v_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I210ToARGBRow_Any_SSSE3(const uint16_t* y_buf,
+ const uint16_t* u_buf,
+ const uint16_t* v_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422ToAR30Row_Any_AVX2(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I210ToARGBRow_Any_AVX2(const uint16_t* y_buf,
+ const uint16_t* u_buf,
+ const uint16_t* v_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I210ToAR30Row_Any_AVX2(const uint16_t* y_buf,
+ const uint16_t* u_buf,
+ const uint16_t* v_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422AlphaToARGBRow_Any_SSSE3(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ const uint8_t* a_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422AlphaToARGBRow_Any_AVX2(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ const uint8_t* a_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void NV12ToARGBRow_Any_SSSE3(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void NV12ToARGBRow_Any_AVX2(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void NV21ToARGBRow_Any_SSSE3(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void NV21ToARGBRow_Any_AVX2(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void NV12ToRGB24Row_Any_SSSE3(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void NV21ToRGB24Row_Any_SSSE3(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void NV12ToRGB24Row_Any_AVX2(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void NV21ToRGB24Row_Any_AVX2(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void NV12ToRGB565Row_Any_SSSE3(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void NV12ToRGB565Row_Any_AVX2(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void YUY2ToARGBRow_Any_SSSE3(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void UYVYToARGBRow_Any_SSSE3(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void YUY2ToARGBRow_Any_AVX2(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void UYVYToARGBRow_Any_AVX2(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422ToRGBARow_Any_SSSE3(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422ToARGB4444Row_Any_SSSE3(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422ToARGB4444Row_Any_AVX2(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422ToARGB1555Row_Any_SSSE3(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422ToARGB1555Row_Any_AVX2(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422ToRGB565Row_Any_SSSE3(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422ToRGB565Row_Any_AVX2(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422ToRGB24Row_Any_SSSE3(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422ToRGB24Row_Any_AVX2(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
+
+void I400ToARGBRow_C(const uint8_t* src_y, uint8_t* rgb_buf, int width);
+void I400ToARGBRow_SSE2(const uint8_t* y_buf, uint8_t* dst_argb, int width);
+void I400ToARGBRow_AVX2(const uint8_t* y_buf, uint8_t* dst_argb, int width);
+void I400ToARGBRow_NEON(const uint8_t* src_y, uint8_t* dst_argb, int width);
+void I400ToARGBRow_MSA(const uint8_t* src_y, uint8_t* dst_argb, int width);
+void I400ToARGBRow_Any_SSE2(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+void I400ToARGBRow_Any_AVX2(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+void I400ToARGBRow_Any_NEON(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+void I400ToARGBRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+
+// ARGB preattenuated alpha blend.
+void ARGBBlendRow_SSSE3(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
+ int width);
+void ARGBBlendRow_NEON(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
+ int width);
+void ARGBBlendRow_MSA(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
+ int width);
+void ARGBBlendRow_C(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
+ int width);
+
+// Unattenuated planar alpha blend.
+void BlendPlaneRow_SSSE3(const uint8_t* src0,
+ const uint8_t* src1,
+ const uint8_t* alpha,
+ uint8_t* dst,
+ int width);
+void BlendPlaneRow_Any_SSSE3(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
+ int width);
+void BlendPlaneRow_AVX2(const uint8_t* src0,
+ const uint8_t* src1,
+ const uint8_t* alpha,
+ uint8_t* dst,
+ int width);
+void BlendPlaneRow_Any_AVX2(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
+ int width);
+void BlendPlaneRow_C(const uint8_t* src0,
+ const uint8_t* src1,
+ const uint8_t* alpha,
+ uint8_t* dst,
+ int width);
+
+// ARGB multiply images. Same API as Blend, but these require
+// pointer and width alignment for SSE2.
+void ARGBMultiplyRow_C(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
+ int width);
+void ARGBMultiplyRow_SSE2(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
+ int width);
+void ARGBMultiplyRow_Any_SSE2(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
+ int width);
+void ARGBMultiplyRow_AVX2(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
+ int width);
+void ARGBMultiplyRow_Any_AVX2(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
+ int width);
+void ARGBMultiplyRow_NEON(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
+ int width);
+void ARGBMultiplyRow_Any_NEON(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
+ int width);
+void ARGBMultiplyRow_MSA(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
+ int width);
+void ARGBMultiplyRow_Any_MSA(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
+ int width);
+
+// ARGB add images.
+void ARGBAddRow_C(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
+ int width);
+void ARGBAddRow_SSE2(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
+ int width);
+void ARGBAddRow_Any_SSE2(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
+ int width);
+void ARGBAddRow_AVX2(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
+ int width);
+void ARGBAddRow_Any_AVX2(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
+ int width);
+void ARGBAddRow_NEON(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
+ int width);
+void ARGBAddRow_Any_NEON(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
+ int width);
+void ARGBAddRow_MSA(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
+ int width);
+void ARGBAddRow_Any_MSA(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
+ int width);
+
+// ARGB subtract images. Same API as Blend, but these require
+// pointer and width alignment for SSE2.
+void ARGBSubtractRow_C(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
+ int width);
+void ARGBSubtractRow_SSE2(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
+ int width);
+void ARGBSubtractRow_Any_SSE2(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
+ int width);
+void ARGBSubtractRow_AVX2(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
+ int width);
+void ARGBSubtractRow_Any_AVX2(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
+ int width);
+void ARGBSubtractRow_NEON(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
+ int width);
+void ARGBSubtractRow_Any_NEON(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
+ int width);
+void ARGBSubtractRow_MSA(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
+ int width);
+void ARGBSubtractRow_Any_MSA(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
+ int width);
+
+void ARGBToRGB24Row_Any_SSSE3(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+void ARGBToRAWRow_Any_SSSE3(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+void ARGBToRGB565Row_Any_SSE2(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+void ARGBToARGB1555Row_Any_SSE2(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+void ARGBToARGB4444Row_Any_SSE2(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+void ABGRToAR30Row_Any_SSSE3(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+void ARGBToAR30Row_Any_SSSE3(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+void ARGBToRAWRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void ARGBToRGB24Row_Any_AVX2(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+void ARGBToRGB24Row_Any_AVX512VBMI(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+void ARGBToRGB565DitherRow_Any_SSE2(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ const uint32_t param,
+ int width);
+void ARGBToRGB565DitherRow_Any_AVX2(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ const uint32_t param,
+ int width);
+
+void ARGBToRGB565Row_Any_AVX2(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+void ARGBToARGB1555Row_Any_AVX2(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+void ARGBToARGB4444Row_Any_AVX2(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+void ABGRToAR30Row_Any_AVX2(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+void ARGBToAR30Row_Any_AVX2(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+
+void ARGBToRGB24Row_Any_NEON(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+void ARGBToRAWRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void ARGBToRGB565Row_Any_NEON(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+void ARGBToARGB1555Row_Any_NEON(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+void ARGBToARGB4444Row_Any_NEON(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+void ARGBToRGB565DitherRow_Any_NEON(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ const uint32_t param,
+ int width);
+void ARGBToRGB24Row_Any_MSA(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+void ARGBToRAWRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void ARGBToRGB565Row_Any_MSA(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+void ARGBToARGB1555Row_Any_MSA(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+void ARGBToARGB4444Row_Any_MSA(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+void ARGBToRGB565DitherRow_Any_MSA(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ const uint32_t param,
+ int width);
+
+void I444ToARGBRow_Any_NEON(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422ToARGBRow_Any_NEON(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422AlphaToARGBRow_Any_NEON(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ const uint8_t* a_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422ToRGBARow_Any_NEON(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422ToRGB24Row_Any_NEON(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422ToARGB4444Row_Any_NEON(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422ToARGB1555Row_Any_NEON(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422ToRGB565Row_Any_NEON(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void NV12ToARGBRow_Any_NEON(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void NV21ToARGBRow_Any_NEON(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void NV12ToRGB24Row_Any_NEON(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void NV21ToRGB24Row_Any_NEON(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void NV12ToRGB565Row_Any_NEON(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void YUY2ToARGBRow_Any_NEON(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void UYVYToARGBRow_Any_NEON(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I444ToARGBRow_Any_MSA(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422ToARGBRow_Any_MSA(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422ToRGBARow_Any_MSA(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422AlphaToARGBRow_Any_MSA(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ const uint8_t* a_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422ToRGB24Row_Any_MSA(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422ToRGB565Row_Any_MSA(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422ToARGB4444Row_Any_MSA(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422ToARGB1555Row_Any_MSA(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void NV12ToARGBRow_Any_MSA(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void NV12ToRGB565Row_Any_MSA(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void NV21ToARGBRow_Any_MSA(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void YUY2ToARGBRow_Any_MSA(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void UYVYToARGBRow_Any_MSA(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
+
+void YUY2ToYRow_AVX2(const uint8_t* src_yuy2, uint8_t* dst_y, int width);
+void YUY2ToUVRow_AVX2(const uint8_t* src_yuy2,
+ int stride_yuy2,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void YUY2ToUV422Row_AVX2(const uint8_t* src_yuy2,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void YUY2ToYRow_SSE2(const uint8_t* src_yuy2, uint8_t* dst_y, int width);
+void YUY2ToUVRow_SSE2(const uint8_t* src_yuy2,
+ int stride_yuy2,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void YUY2ToUV422Row_SSE2(const uint8_t* src_yuy2,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void YUY2ToYRow_NEON(const uint8_t* src_yuy2, uint8_t* dst_y, int width);
+void YUY2ToUVRow_NEON(const uint8_t* src_yuy2,
+ int stride_yuy2,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void YUY2ToUV422Row_NEON(const uint8_t* src_yuy2,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void YUY2ToYRow_MSA(const uint8_t* src_yuy2, uint8_t* dst_y, int width);
+void YUY2ToUVRow_MSA(const uint8_t* src_yuy2,
+ int src_stride_yuy2,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void YUY2ToUV422Row_MSA(const uint8_t* src_yuy2,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void YUY2ToYRow_C(const uint8_t* src_yuy2, uint8_t* dst_y, int width);
+void YUY2ToUVRow_C(const uint8_t* src_yuy2,
+ int src_stride_yuy2,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void YUY2ToUV422Row_C(const uint8_t* src_yuy2,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void YUY2ToYRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void YUY2ToUVRow_Any_AVX2(const uint8_t* src_ptr,
+ int src_stride_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void YUY2ToUV422Row_Any_AVX2(const uint8_t* src_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void YUY2ToYRow_Any_SSE2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void YUY2ToUVRow_Any_SSE2(const uint8_t* src_ptr,
+ int src_stride_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void YUY2ToUV422Row_Any_SSE2(const uint8_t* src_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void YUY2ToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void YUY2ToUVRow_Any_NEON(const uint8_t* src_ptr,
+ int src_stride_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void YUY2ToUV422Row_Any_NEON(const uint8_t* src_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void YUY2ToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void YUY2ToUVRow_Any_MSA(const uint8_t* src_ptr,
+ int src_stride_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void YUY2ToUV422Row_Any_MSA(const uint8_t* src_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void UYVYToYRow_AVX2(const uint8_t* src_uyvy, uint8_t* dst_y, int width);
+void UYVYToUVRow_AVX2(const uint8_t* src_uyvy,
+ int stride_uyvy,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void UYVYToUV422Row_AVX2(const uint8_t* src_uyvy,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void UYVYToYRow_SSE2(const uint8_t* src_uyvy, uint8_t* dst_y, int width);
+void UYVYToUVRow_SSE2(const uint8_t* src_uyvy,
+ int stride_uyvy,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void UYVYToUV422Row_SSE2(const uint8_t* src_uyvy,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void UYVYToYRow_AVX2(const uint8_t* src_uyvy, uint8_t* dst_y, int width);
+void UYVYToUVRow_AVX2(const uint8_t* src_uyvy,
+ int stride_uyvy,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void UYVYToUV422Row_AVX2(const uint8_t* src_uyvy,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void UYVYToYRow_NEON(const uint8_t* src_uyvy, uint8_t* dst_y, int width);
+void UYVYToUVRow_NEON(const uint8_t* src_uyvy,
+ int stride_uyvy,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void UYVYToUV422Row_NEON(const uint8_t* src_uyvy,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void UYVYToYRow_MSA(const uint8_t* src_uyvy, uint8_t* dst_y, int width);
+void UYVYToUVRow_MSA(const uint8_t* src_uyvy,
+ int src_stride_uyvy,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void UYVYToUV422Row_MSA(const uint8_t* src_uyvy,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+
+void UYVYToYRow_C(const uint8_t* src_uyvy, uint8_t* dst_y, int width);
+void UYVYToUVRow_C(const uint8_t* src_uyvy,
+ int src_stride_uyvy,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void UYVYToUV422Row_C(const uint8_t* src_uyvy,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void UYVYToYRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void UYVYToUVRow_Any_AVX2(const uint8_t* src_ptr,
+ int src_stride_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void UYVYToUV422Row_Any_AVX2(const uint8_t* src_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void UYVYToYRow_Any_SSE2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void UYVYToUVRow_Any_SSE2(const uint8_t* src_ptr,
+ int src_stride_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void UYVYToUV422Row_Any_SSE2(const uint8_t* src_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void UYVYToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void UYVYToUVRow_Any_NEON(const uint8_t* src_ptr,
+ int src_stride_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void UYVYToUV422Row_Any_NEON(const uint8_t* src_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void UYVYToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void UYVYToUVRow_Any_MSA(const uint8_t* src_ptr,
+ int src_stride_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+void UYVYToUV422Row_Any_MSA(const uint8_t* src_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
+
+void I422ToYUY2Row_C(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_frame,
+ int width);
+void I422ToUYVYRow_C(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_frame,
+ int width);
+void I422ToYUY2Row_SSE2(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_yuy2,
+ int width);
+void I422ToUYVYRow_SSE2(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_uyvy,
+ int width);
+void I422ToYUY2Row_Any_SSE2(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
+ int width);
+void I422ToUYVYRow_Any_SSE2(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
+ int width);
+void I422ToYUY2Row_AVX2(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_yuy2,
+ int width);
+void I422ToUYVYRow_AVX2(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_uyvy,
+ int width);
+void I422ToYUY2Row_Any_AVX2(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
+ int width);
+void I422ToUYVYRow_Any_AVX2(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
+ int width);
+void I422ToYUY2Row_NEON(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_yuy2,
+ int width);
+void I422ToUYVYRow_NEON(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_uyvy,
+ int width);
+void I422ToYUY2Row_Any_NEON(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
+ int width);
+void I422ToUYVYRow_Any_NEON(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
+ int width);
+void I422ToYUY2Row_MSA(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_yuy2,
+ int width);
+void I422ToUYVYRow_MSA(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_uyvy,
+ int width);
+void I422ToYUY2Row_Any_MSA(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
+ int width);
+void I422ToUYVYRow_Any_MSA(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
+ int width);
+
+// Effects related row functions.
+void ARGBAttenuateRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width);
+void ARGBAttenuateRow_SSSE3(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ int width);
+void ARGBAttenuateRow_AVX2(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ int width);
+void ARGBAttenuateRow_NEON(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ int width);
+void ARGBAttenuateRow_MSA(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ int width);
+void ARGBAttenuateRow_Any_SSSE3(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+void ARGBAttenuateRow_Any_AVX2(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+void ARGBAttenuateRow_Any_NEON(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+void ARGBAttenuateRow_Any_MSA(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+
+// Inverse table for unattenuate, shared by C and SSE2.
+extern const uint32_t fixed_invtbl8[256];
+void ARGBUnattenuateRow_C(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ int width);
+void ARGBUnattenuateRow_SSE2(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ int width);
+void ARGBUnattenuateRow_AVX2(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ int width);
+void ARGBUnattenuateRow_Any_SSE2(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+void ARGBUnattenuateRow_Any_AVX2(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
+
+void ARGBGrayRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width);
+void ARGBGrayRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_argb, int width);
+void ARGBGrayRow_NEON(const uint8_t* src_argb, uint8_t* dst_argb, int width);
+void ARGBGrayRow_MSA(const uint8_t* src_argb, uint8_t* dst_argb, int width);
+
+void ARGBSepiaRow_C(uint8_t* dst_argb, int width);
+void ARGBSepiaRow_SSSE3(uint8_t* dst_argb, int width);
+void ARGBSepiaRow_NEON(uint8_t* dst_argb, int width);
+void ARGBSepiaRow_MSA(uint8_t* dst_argb, int width);
+
+void ARGBColorMatrixRow_C(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ const int8_t* matrix_argb,
+ int width);
+void ARGBColorMatrixRow_SSSE3(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ const int8_t* matrix_argb,
+ int width);
+void ARGBColorMatrixRow_NEON(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ const int8_t* matrix_argb,
+ int width);
+void ARGBColorMatrixRow_MSA(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ const int8_t* matrix_argb,
+ int width);
+
+void ARGBColorTableRow_C(uint8_t* dst_argb,
+ const uint8_t* table_argb,
+ int width);
+void ARGBColorTableRow_X86(uint8_t* dst_argb,
+ const uint8_t* table_argb,
+ int width);
+
+void RGBColorTableRow_C(uint8_t* dst_argb,
+ const uint8_t* table_argb,
+ int width);
+void RGBColorTableRow_X86(uint8_t* dst_argb,
+ const uint8_t* table_argb,
+ int width);
+
+void ARGBQuantizeRow_C(uint8_t* dst_argb,
+ int scale,
+ int interval_size,
+ int interval_offset,
+ int width);
+void ARGBQuantizeRow_SSE2(uint8_t* dst_argb,
+ int scale,
+ int interval_size,
+ int interval_offset,
+ int width);
+void ARGBQuantizeRow_NEON(uint8_t* dst_argb,
+ int scale,
+ int interval_size,
+ int interval_offset,
+ int width);
+void ARGBQuantizeRow_MSA(uint8_t* dst_argb,
+ int scale,
+ int interval_size,
+ int interval_offset,
+ int width);
+
+void ARGBShadeRow_C(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ int width,
+ uint32_t value);
+void ARGBShadeRow_SSE2(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ int width,
+ uint32_t value);
+void ARGBShadeRow_NEON(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ int width,
+ uint32_t value);
+void ARGBShadeRow_MSA(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ int width,
+ uint32_t value);
+
+// Used for blur.
+void CumulativeSumToAverageRow_SSE2(const int32_t* topleft,
+ const int32_t* botleft,
+ int width,
+ int area,
+ uint8_t* dst,
+ int count);
+void ComputeCumulativeSumRow_SSE2(const uint8_t* row,
+ int32_t* cumsum,
+ const int32_t* previous_cumsum,
+ int width);
+
+void CumulativeSumToAverageRow_C(const int32_t* tl,
+ const int32_t* bl,
+ int w,
+ int area,
+ uint8_t* dst,
+ int count);
+void ComputeCumulativeSumRow_C(const uint8_t* row,
+ int32_t* cumsum,
+ const int32_t* previous_cumsum,
+ int width);
+
+LIBYUV_API
+void ARGBAffineRow_C(const uint8_t* src_argb,
+ int src_argb_stride,
+ uint8_t* dst_argb,
+ const float* uv_dudv,
+ int width);
+LIBYUV_API
+void ARGBAffineRow_SSE2(const uint8_t* src_argb,
+ int src_argb_stride,
+ uint8_t* dst_argb,
+ const float* src_dudv,
+ int width);
+
+// Used for I420Scale, ARGBScale, and ARGBInterpolate.
+void InterpolateRow_C(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ int width,
+ int source_y_fraction);
+void InterpolateRow_SSSE3(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ int dst_width,
+ int source_y_fraction);
+void InterpolateRow_AVX2(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ int dst_width,
+ int source_y_fraction);
+void InterpolateRow_NEON(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ int dst_width,
+ int source_y_fraction);
+void InterpolateRow_MSA(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ int width,
+ int source_y_fraction);
+void InterpolateRow_Any_NEON(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
+ ptrdiff_t src_stride_ptr,
+ int width,
+ int source_y_fraction);
+void InterpolateRow_Any_SSSE3(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
+ ptrdiff_t src_stride_ptr,
+ int width,
+ int source_y_fraction);
+void InterpolateRow_Any_AVX2(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
+ ptrdiff_t src_stride_ptr,
+ int width,
+ int source_y_fraction);
+void InterpolateRow_Any_MSA(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
+ ptrdiff_t src_stride_ptr,
+ int width,
+ int source_y_fraction);
+
+void InterpolateRow_16_C(uint16_t* dst_ptr,
+ const uint16_t* src_ptr,
+ ptrdiff_t src_stride,
+ int width,
+ int source_y_fraction);
+
+// Sobel images.
+void SobelXRow_C(const uint8_t* src_y0,
+ const uint8_t* src_y1,
+ const uint8_t* src_y2,
+ uint8_t* dst_sobelx,
+ int width);
+void SobelXRow_SSE2(const uint8_t* src_y0,
+ const uint8_t* src_y1,
+ const uint8_t* src_y2,
+ uint8_t* dst_sobelx,
+ int width);
+void SobelXRow_NEON(const uint8_t* src_y0,
+ const uint8_t* src_y1,
+ const uint8_t* src_y2,
+ uint8_t* dst_sobelx,
+ int width);
+void SobelXRow_MSA(const uint8_t* src_y0,
+ const uint8_t* src_y1,
+ const uint8_t* src_y2,
+ uint8_t* dst_sobelx,
+ int width);
+void SobelYRow_C(const uint8_t* src_y0,
+ const uint8_t* src_y1,
+ uint8_t* dst_sobely,
+ int width);
+void SobelYRow_SSE2(const uint8_t* src_y0,
+ const uint8_t* src_y1,
+ uint8_t* dst_sobely,
+ int width);
+void SobelYRow_NEON(const uint8_t* src_y0,
+ const uint8_t* src_y1,
+ uint8_t* dst_sobely,
+ int width);
+void SobelYRow_MSA(const uint8_t* src_y0,
+ const uint8_t* src_y1,
+ uint8_t* dst_sobely,
+ int width);
+void SobelRow_C(const uint8_t* src_sobelx,
+ const uint8_t* src_sobely,
+ uint8_t* dst_argb,
+ int width);
+void SobelRow_SSE2(const uint8_t* src_sobelx,
+ const uint8_t* src_sobely,
+ uint8_t* dst_argb,
+ int width);
+void SobelRow_NEON(const uint8_t* src_sobelx,
+ const uint8_t* src_sobely,
+ uint8_t* dst_argb,
+ int width);
+void SobelRow_MSA(const uint8_t* src_sobelx,
+ const uint8_t* src_sobely,
+ uint8_t* dst_argb,
+ int width);
+void SobelToPlaneRow_C(const uint8_t* src_sobelx,
+ const uint8_t* src_sobely,
+ uint8_t* dst_y,
+ int width);
+void SobelToPlaneRow_SSE2(const uint8_t* src_sobelx,
+ const uint8_t* src_sobely,
+ uint8_t* dst_y,
+ int width);
+void SobelToPlaneRow_NEON(const uint8_t* src_sobelx,
+ const uint8_t* src_sobely,
+ uint8_t* dst_y,
+ int width);
+void SobelToPlaneRow_MSA(const uint8_t* src_sobelx,
+ const uint8_t* src_sobely,
+ uint8_t* dst_y,
+ int width);
+void SobelXYRow_C(const uint8_t* src_sobelx,
+ const uint8_t* src_sobely,
+ uint8_t* dst_argb,
+ int width);
+void SobelXYRow_SSE2(const uint8_t* src_sobelx,
+ const uint8_t* src_sobely,
+ uint8_t* dst_argb,
+ int width);
+void SobelXYRow_NEON(const uint8_t* src_sobelx,
+ const uint8_t* src_sobely,
+ uint8_t* dst_argb,
+ int width);
+void SobelXYRow_MSA(const uint8_t* src_sobelx,
+ const uint8_t* src_sobely,
+ uint8_t* dst_argb,
+ int width);
+void SobelRow_Any_SSE2(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
+ int width);
+void SobelRow_Any_NEON(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
+ int width);
+void SobelRow_Any_MSA(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
+ int width);
+void SobelToPlaneRow_Any_SSE2(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
+ int width);
+void SobelToPlaneRow_Any_NEON(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
+ int width);
+void SobelToPlaneRow_Any_MSA(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
+ int width);
+void SobelXYRow_Any_SSE2(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
+ int width);
+void SobelXYRow_Any_NEON(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
+ int width);
+void SobelXYRow_Any_MSA(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
+ int width);
+
+void ARGBPolynomialRow_C(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ const float* poly,
+ int width);
+void ARGBPolynomialRow_SSE2(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ const float* poly,
+ int width);
+void ARGBPolynomialRow_AVX2(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ const float* poly,
+ int width);
+
+// Scale and convert to half float.
+void HalfFloatRow_C(const uint16_t* src, uint16_t* dst, float scale, int width);
+void HalfFloatRow_SSE2(const uint16_t* src,
+ uint16_t* dst,
+ float scale,
+ int width);
+void HalfFloatRow_Any_SSE2(const uint16_t* src_ptr,
+ uint16_t* dst_ptr,
+ float param,
+ int width);
+void HalfFloatRow_AVX2(const uint16_t* src,
+ uint16_t* dst,
+ float scale,
+ int width);
+void HalfFloatRow_Any_AVX2(const uint16_t* src_ptr,
+ uint16_t* dst_ptr,
+ float param,
+ int width);
+void HalfFloatRow_F16C(const uint16_t* src,
+ uint16_t* dst,
+ float scale,
+ int width);
+void HalfFloatRow_Any_F16C(const uint16_t* src,
+ uint16_t* dst,
+ float scale,
+ int width);
+void HalfFloat1Row_F16C(const uint16_t* src,
+ uint16_t* dst,
+ float scale,
+ int width);
+void HalfFloat1Row_Any_F16C(const uint16_t* src,
+ uint16_t* dst,
+ float scale,
+ int width);
+void HalfFloatRow_NEON(const uint16_t* src,
+ uint16_t* dst,
+ float scale,
+ int width);
+void HalfFloatRow_Any_NEON(const uint16_t* src_ptr,
+ uint16_t* dst_ptr,
+ float param,
+ int width);
+void HalfFloat1Row_NEON(const uint16_t* src,
+ uint16_t* dst,
+ float scale,
+ int width);
+void HalfFloat1Row_Any_NEON(const uint16_t* src_ptr,
+ uint16_t* dst_ptr,
+ float param,
+ int width);
+void HalfFloatRow_MSA(const uint16_t* src,
+ uint16_t* dst,
+ float scale,
+ int width);
+void HalfFloatRow_Any_MSA(const uint16_t* src_ptr,
+ uint16_t* dst_ptr,
+ float param,
+ int width);
+void ByteToFloatRow_C(const uint8_t* src, float* dst, float scale, int width);
+void ByteToFloatRow_NEON(const uint8_t* src,
+ float* dst,
+ float scale,
+ int width);
+void ByteToFloatRow_Any_NEON(const uint8_t* src_ptr,
+ float* dst_ptr,
+ float param,
+ int width);
+
+void ARGBLumaColorTableRow_C(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ int width,
+ const uint8_t* luma,
+ uint32_t lumacoeff);
+void ARGBLumaColorTableRow_SSSE3(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ int width,
+ const uint8_t* luma,
+ uint32_t lumacoeff);
+
+float ScaleMaxSamples_C(const float* src, float* dst, float scale, int width);
+float ScaleMaxSamples_NEON(const float* src,
+ float* dst,
+ float scale,
+ int width);
+float ScaleSumSamples_C(const float* src, float* dst, float scale, int width);
+float ScaleSumSamples_NEON(const float* src,
+ float* dst,
+ float scale,
+ int width);
+void ScaleSamples_C(const float* src, float* dst, float scale, int width);
+void ScaleSamples_NEON(const float* src, float* dst, float scale, int width);
+
+#ifdef __cplusplus
+} // extern "C"
+} // namespace libyuv
+#endif
+
+#endif // INCLUDE_LIBYUV_ROW_H_
diff --git a/media/libvpx/libvpx/third_party/libyuv/include/libyuv/scale.h b/media/libvpx/libvpx/third_party/libyuv/include/libyuv/scale.h
new file mode 100644
index 0000000000..b937d348ca
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/libyuv/include/libyuv/scale.h
@@ -0,0 +1,131 @@
+/*
+ * Copyright 2011 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef INCLUDE_LIBYUV_SCALE_H_
+#define INCLUDE_LIBYUV_SCALE_H_
+
+#include "libyuv/basic_types.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+// Supported filtering.
+typedef enum FilterMode {
+ kFilterNone = 0, // Point sample; Fastest.
+ kFilterLinear = 1, // Filter horizontally only.
+ kFilterBilinear = 2, // Faster than box, but lower quality scaling down.
+ kFilterBox = 3 // Highest quality.
+} FilterModeEnum;
+
+// Scale a YUV plane.
+LIBYUV_API
+void ScalePlane(const uint8_t* src,
+ int src_stride,
+ int src_width,
+ int src_height,
+ uint8_t* dst,
+ int dst_stride,
+ int dst_width,
+ int dst_height,
+ enum FilterMode filtering);
+
+LIBYUV_API
+void ScalePlane_16(const uint16_t* src,
+ int src_stride,
+ int src_width,
+ int src_height,
+ uint16_t* dst,
+ int dst_stride,
+ int dst_width,
+ int dst_height,
+ enum FilterMode filtering);
+
+// Scales a YUV 4:2:0 image from the src width and height to the
+// dst width and height.
+// If filtering is kFilterNone, a simple nearest-neighbor algorithm is
+// used. This produces basic (blocky) quality at the fastest speed.
+// If filtering is kFilterBilinear, interpolation is used to produce a better
+// quality image, at the expense of speed.
+// If filtering is kFilterBox, averaging is used to produce ever better
+// quality image, at further expense of speed.
+// Returns 0 if successful.
+
+LIBYUV_API
+int I420Scale(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ int src_width,
+ int src_height,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int dst_width,
+ int dst_height,
+ enum FilterMode filtering);
+
+LIBYUV_API
+int I420Scale_16(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ int src_width,
+ int src_height,
+ uint16_t* dst_y,
+ int dst_stride_y,
+ uint16_t* dst_u,
+ int dst_stride_u,
+ uint16_t* dst_v,
+ int dst_stride_v,
+ int dst_width,
+ int dst_height,
+ enum FilterMode filtering);
+
+#ifdef __cplusplus
+// Legacy API. Deprecated.
+LIBYUV_API
+int Scale(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ int src_stride_y,
+ int src_stride_u,
+ int src_stride_v,
+ int src_width,
+ int src_height,
+ uint8_t* dst_y,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int dst_stride_y,
+ int dst_stride_u,
+ int dst_stride_v,
+ int dst_width,
+ int dst_height,
+ LIBYUV_BOOL interpolate);
+
+// For testing, allow disabling of specialized scalers.
+LIBYUV_API
+void SetUseReferenceImpl(LIBYUV_BOOL use);
+#endif // __cplusplus
+
+#ifdef __cplusplus
+} // extern "C"
+} // namespace libyuv
+#endif
+
+#endif // INCLUDE_LIBYUV_SCALE_H_
diff --git a/media/libvpx/libvpx/third_party/libyuv/include/libyuv/scale_argb.h b/media/libvpx/libvpx/third_party/libyuv/include/libyuv/scale_argb.h
new file mode 100644
index 0000000000..7641f18e34
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/libyuv/include/libyuv/scale_argb.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright 2012 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef INCLUDE_LIBYUV_SCALE_ARGB_H_
+#define INCLUDE_LIBYUV_SCALE_ARGB_H_
+
+#include "libyuv/basic_types.h"
+#include "libyuv/scale.h" // For FilterMode
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+LIBYUV_API
+int ARGBScale(const uint8_t* src_argb,
+ int src_stride_argb,
+ int src_width,
+ int src_height,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int dst_width,
+ int dst_height,
+ enum FilterMode filtering);
+
+// Clipped scale takes destination rectangle coordinates for clip values.
+LIBYUV_API
+int ARGBScaleClip(const uint8_t* src_argb,
+ int src_stride_argb,
+ int src_width,
+ int src_height,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int dst_width,
+ int dst_height,
+ int clip_x,
+ int clip_y,
+ int clip_width,
+ int clip_height,
+ enum FilterMode filtering);
+
+// Scale with YUV conversion to ARGB and clipping.
+LIBYUV_API
+int YUVToARGBScaleClip(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint32_t src_fourcc,
+ int src_width,
+ int src_height,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ uint32_t dst_fourcc,
+ int dst_width,
+ int dst_height,
+ int clip_x,
+ int clip_y,
+ int clip_width,
+ int clip_height,
+ enum FilterMode filtering);
+
+#ifdef __cplusplus
+} // extern "C"
+} // namespace libyuv
+#endif
+
+#endif // INCLUDE_LIBYUV_SCALE_ARGB_H_
diff --git a/media/libvpx/libvpx/third_party/libyuv/include/libyuv/scale_row.h b/media/libvpx/libvpx/third_party/libyuv/include/libyuv/scale_row.h
new file mode 100644
index 0000000000..7194ba09f8
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/libyuv/include/libyuv/scale_row.h
@@ -0,0 +1,944 @@
+/*
+ * Copyright 2013 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef INCLUDE_LIBYUV_SCALE_ROW_H_
+#define INCLUDE_LIBYUV_SCALE_ROW_H_
+
+#include "libyuv/basic_types.h"
+#include "libyuv/scale.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+#if defined(__pnacl__) || defined(__CLR_VER) || \
+ (defined(__native_client__) && defined(__x86_64__)) || \
+ (defined(__i386__) && !defined(__SSE__) && !defined(__clang__))
+#define LIBYUV_DISABLE_X86
+#endif
+#if defined(__native_client__)
+#define LIBYUV_DISABLE_NEON
+#endif
+// MemorySanitizer does not support assembly code yet. http://crbug.com/344505
+#if defined(__has_feature)
+#if __has_feature(memory_sanitizer)
+#define LIBYUV_DISABLE_X86
+#endif
+#endif
+// GCC >= 4.7.0 required for AVX2.
+#if defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__))
+#if (__GNUC__ > 4) || (__GNUC__ == 4 && (__GNUC_MINOR__ >= 7))
+#define GCC_HAS_AVX2 1
+#endif // GNUC >= 4.7
+#endif // __GNUC__
+
+// clang >= 3.4.0 required for AVX2.
+#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
+#if (__clang_major__ > 3) || (__clang_major__ == 3 && (__clang_minor__ >= 4))
+#define CLANG_HAS_AVX2 1
+#endif // clang >= 3.4
+#endif // __clang__
+
+// Visual C 2012 required for AVX2.
+#if defined(_M_IX86) && !defined(__clang__) && defined(_MSC_VER) && \
+ _MSC_VER >= 1700
+#define VISUALC_HAS_AVX2 1
+#endif // VisualStudio >= 2012
+
+// The following are available on all x86 platforms:
+#if !defined(LIBYUV_DISABLE_X86) && \
+ (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
+#define HAS_FIXEDDIV1_X86
+#define HAS_FIXEDDIV_X86
+#define HAS_SCALEARGBCOLS_SSE2
+#define HAS_SCALEARGBCOLSUP2_SSE2
+#define HAS_SCALEARGBFILTERCOLS_SSSE3
+#define HAS_SCALEARGBROWDOWN2_SSE2
+#define HAS_SCALEARGBROWDOWNEVEN_SSE2
+#define HAS_SCALECOLSUP2_SSE2
+#define HAS_SCALEFILTERCOLS_SSSE3
+#define HAS_SCALEROWDOWN2_SSSE3
+#define HAS_SCALEROWDOWN34_SSSE3
+#define HAS_SCALEROWDOWN38_SSSE3
+#define HAS_SCALEROWDOWN4_SSSE3
+#define HAS_SCALEADDROW_SSE2
+#endif
+
+// The following are available on all x86 platforms, but
+// require VS2012, clang 3.4 or gcc 4.7.
+// The code supports NaCL but requires a new compiler and validator.
+#if !defined(LIBYUV_DISABLE_X86) && \
+ (defined(VISUALC_HAS_AVX2) || defined(CLANG_HAS_AVX2) || \
+ defined(GCC_HAS_AVX2))
+#define HAS_SCALEADDROW_AVX2
+#define HAS_SCALEROWDOWN2_AVX2
+#define HAS_SCALEROWDOWN4_AVX2
+#endif
+
+// The following are available on Neon platforms:
+#if !defined(LIBYUV_DISABLE_NEON) && \
+ (defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__))
+#define HAS_SCALEARGBCOLS_NEON
+#define HAS_SCALEARGBROWDOWN2_NEON
+#define HAS_SCALEARGBROWDOWNEVEN_NEON
+#define HAS_SCALEFILTERCOLS_NEON
+#define HAS_SCALEROWDOWN2_NEON
+#define HAS_SCALEROWDOWN34_NEON
+#define HAS_SCALEROWDOWN38_NEON
+#define HAS_SCALEROWDOWN4_NEON
+#define HAS_SCALEARGBFILTERCOLS_NEON
+#endif
+
+#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
+#define HAS_SCALEADDROW_MSA
+#define HAS_SCALEARGBCOLS_MSA
+#define HAS_SCALEARGBFILTERCOLS_MSA
+#define HAS_SCALEARGBROWDOWN2_MSA
+#define HAS_SCALEARGBROWDOWNEVEN_MSA
+#define HAS_SCALEFILTERCOLS_MSA
+#define HAS_SCALEROWDOWN2_MSA
+#define HAS_SCALEROWDOWN34_MSA
+#define HAS_SCALEROWDOWN38_MSA
+#define HAS_SCALEROWDOWN4_MSA
+#endif
+
+// Scale ARGB vertically with bilinear interpolation.
+void ScalePlaneVertical(int src_height,
+ int dst_width,
+ int dst_height,
+ int src_stride,
+ int dst_stride,
+ const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ int x,
+ int y,
+ int dy,
+ int bpp,
+ enum FilterMode filtering);
+
+void ScalePlaneVertical_16(int src_height,
+ int dst_width,
+ int dst_height,
+ int src_stride,
+ int dst_stride,
+ const uint16_t* src_argb,
+ uint16_t* dst_argb,
+ int x,
+ int y,
+ int dy,
+ int wpp,
+ enum FilterMode filtering);
+
+// Simplify the filtering based on scale factors.
+enum FilterMode ScaleFilterReduce(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ enum FilterMode filtering);
+
+// Divide num by div and return as 16.16 fixed point result.
+int FixedDiv_C(int num, int div);
+int FixedDiv_X86(int num, int div);
+// Divide num - 1 by div - 1 and return as 16.16 fixed point result.
+int FixedDiv1_C(int num, int div);
+int FixedDiv1_X86(int num, int div);
+#ifdef HAS_FIXEDDIV_X86
+#define FixedDiv FixedDiv_X86
+#define FixedDiv1 FixedDiv1_X86
+#else
+#define FixedDiv FixedDiv_C
+#define FixedDiv1 FixedDiv1_C
+#endif
+
+// Compute slope values for stepping.
+void ScaleSlope(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ enum FilterMode filtering,
+ int* x,
+ int* y,
+ int* dx,
+ int* dy);
+
+void ScaleRowDown2_C(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width);
+void ScaleRowDown2_16_C(const uint16_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint16_t* dst,
+ int dst_width);
+void ScaleRowDown2Linear_C(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width);
+void ScaleRowDown2Linear_16_C(const uint16_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint16_t* dst,
+ int dst_width);
+void ScaleRowDown2Box_C(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width);
+void ScaleRowDown2Box_Odd_C(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width);
+void ScaleRowDown2Box_16_C(const uint16_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint16_t* dst,
+ int dst_width);
+void ScaleRowDown4_C(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width);
+void ScaleRowDown4_16_C(const uint16_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint16_t* dst,
+ int dst_width);
+void ScaleRowDown4Box_C(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width);
+void ScaleRowDown4Box_16_C(const uint16_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint16_t* dst,
+ int dst_width);
+void ScaleRowDown34_C(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width);
+void ScaleRowDown34_16_C(const uint16_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint16_t* dst,
+ int dst_width);
+void ScaleRowDown34_0_Box_C(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* d,
+ int dst_width);
+void ScaleRowDown34_0_Box_16_C(const uint16_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint16_t* d,
+ int dst_width);
+void ScaleRowDown34_1_Box_C(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* d,
+ int dst_width);
+void ScaleRowDown34_1_Box_16_C(const uint16_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint16_t* d,
+ int dst_width);
+void ScaleCols_C(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
+ int dst_width,
+ int x,
+ int dx);
+void ScaleCols_16_C(uint16_t* dst_ptr,
+ const uint16_t* src_ptr,
+ int dst_width,
+ int x,
+ int dx);
+void ScaleColsUp2_C(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
+ int dst_width,
+ int,
+ int);
+void ScaleColsUp2_16_C(uint16_t* dst_ptr,
+ const uint16_t* src_ptr,
+ int dst_width,
+ int,
+ int);
+void ScaleFilterCols_C(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
+ int dst_width,
+ int x,
+ int dx);
+void ScaleFilterCols_16_C(uint16_t* dst_ptr,
+ const uint16_t* src_ptr,
+ int dst_width,
+ int x,
+ int dx);
+void ScaleFilterCols64_C(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
+ int dst_width,
+ int x32,
+ int dx);
+void ScaleFilterCols64_16_C(uint16_t* dst_ptr,
+ const uint16_t* src_ptr,
+ int dst_width,
+ int x32,
+ int dx);
+void ScaleRowDown38_C(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width);
+void ScaleRowDown38_16_C(const uint16_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint16_t* dst,
+ int dst_width);
+void ScaleRowDown38_3_Box_C(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+void ScaleRowDown38_3_Box_16_C(const uint16_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint16_t* dst_ptr,
+ int dst_width);
+void ScaleRowDown38_2_Box_C(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+void ScaleRowDown38_2_Box_16_C(const uint16_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint16_t* dst_ptr,
+ int dst_width);
+void ScaleAddRow_C(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width);
+void ScaleAddRow_16_C(const uint16_t* src_ptr,
+ uint32_t* dst_ptr,
+ int src_width);
+void ScaleARGBRowDown2_C(const uint8_t* src_argb,
+ ptrdiff_t src_stride,
+ uint8_t* dst_argb,
+ int dst_width);
+void ScaleARGBRowDown2Linear_C(const uint8_t* src_argb,
+ ptrdiff_t src_stride,
+ uint8_t* dst_argb,
+ int dst_width);
+void ScaleARGBRowDown2Box_C(const uint8_t* src_argb,
+ ptrdiff_t src_stride,
+ uint8_t* dst_argb,
+ int dst_width);
+void ScaleARGBRowDownEven_C(const uint8_t* src_argb,
+ ptrdiff_t src_stride,
+ int src_stepx,
+ uint8_t* dst_argb,
+ int dst_width);
+void ScaleARGBRowDownEvenBox_C(const uint8_t* src_argb,
+ ptrdiff_t src_stride,
+ int src_stepx,
+ uint8_t* dst_argb,
+ int dst_width);
+void ScaleARGBCols_C(uint8_t* dst_argb,
+ const uint8_t* src_argb,
+ int dst_width,
+ int x,
+ int dx);
+void ScaleARGBCols64_C(uint8_t* dst_argb,
+ const uint8_t* src_argb,
+ int dst_width,
+ int x32,
+ int dx);
+void ScaleARGBColsUp2_C(uint8_t* dst_argb,
+ const uint8_t* src_argb,
+ int dst_width,
+ int,
+ int);
+void ScaleARGBFilterCols_C(uint8_t* dst_argb,
+ const uint8_t* src_argb,
+ int dst_width,
+ int x,
+ int dx);
+void ScaleARGBFilterCols64_C(uint8_t* dst_argb,
+ const uint8_t* src_argb,
+ int dst_width,
+ int x32,
+ int dx);
+
+// Specialized scalers for x86.
+void ScaleRowDown2_SSSE3(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+void ScaleRowDown2Linear_SSSE3(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+void ScaleRowDown2Box_SSSE3(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+void ScaleRowDown2_AVX2(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+void ScaleRowDown2Linear_AVX2(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+void ScaleRowDown2Box_AVX2(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+void ScaleRowDown4_SSSE3(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+void ScaleRowDown4Box_SSSE3(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+void ScaleRowDown4_AVX2(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+void ScaleRowDown4Box_AVX2(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+
+void ScaleRowDown34_SSSE3(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+void ScaleRowDown34_1_Box_SSSE3(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+void ScaleRowDown34_0_Box_SSSE3(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+void ScaleRowDown38_SSSE3(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+void ScaleRowDown38_3_Box_SSSE3(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+void ScaleRowDown38_2_Box_SSSE3(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+void ScaleRowDown2_Any_SSSE3(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+void ScaleRowDown2Linear_Any_SSSE3(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+void ScaleRowDown2Box_Any_SSSE3(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+void ScaleRowDown2Box_Odd_SSSE3(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+void ScaleRowDown2_Any_AVX2(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+void ScaleRowDown2Linear_Any_AVX2(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+void ScaleRowDown2Box_Any_AVX2(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+void ScaleRowDown2Box_Odd_AVX2(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+void ScaleRowDown4_Any_SSSE3(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+void ScaleRowDown4Box_Any_SSSE3(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+void ScaleRowDown4_Any_AVX2(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+void ScaleRowDown4Box_Any_AVX2(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+
+void ScaleRowDown34_Any_SSSE3(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+void ScaleRowDown34_1_Box_Any_SSSE3(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+void ScaleRowDown34_0_Box_Any_SSSE3(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+void ScaleRowDown38_Any_SSSE3(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+void ScaleRowDown38_3_Box_Any_SSSE3(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+void ScaleRowDown38_2_Box_Any_SSSE3(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+
+void ScaleAddRow_SSE2(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width);
+void ScaleAddRow_AVX2(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width);
+void ScaleAddRow_Any_SSE2(const uint8_t* src_ptr,
+ uint16_t* dst_ptr,
+ int src_width);
+void ScaleAddRow_Any_AVX2(const uint8_t* src_ptr,
+ uint16_t* dst_ptr,
+ int src_width);
+
+void ScaleFilterCols_SSSE3(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
+ int dst_width,
+ int x,
+ int dx);
+void ScaleColsUp2_SSE2(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
+ int dst_width,
+ int x,
+ int dx);
+
+// ARGB Column functions
+void ScaleARGBCols_SSE2(uint8_t* dst_argb,
+ const uint8_t* src_argb,
+ int dst_width,
+ int x,
+ int dx);
+void ScaleARGBFilterCols_SSSE3(uint8_t* dst_argb,
+ const uint8_t* src_argb,
+ int dst_width,
+ int x,
+ int dx);
+void ScaleARGBColsUp2_SSE2(uint8_t* dst_argb,
+ const uint8_t* src_argb,
+ int dst_width,
+ int x,
+ int dx);
+void ScaleARGBFilterCols_NEON(uint8_t* dst_argb,
+ const uint8_t* src_argb,
+ int dst_width,
+ int x,
+ int dx);
+void ScaleARGBCols_NEON(uint8_t* dst_argb,
+ const uint8_t* src_argb,
+ int dst_width,
+ int x,
+ int dx);
+void ScaleARGBFilterCols_Any_NEON(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
+ int dst_width,
+ int x,
+ int dx);
+void ScaleARGBCols_Any_NEON(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
+ int dst_width,
+ int x,
+ int dx);
+void ScaleARGBFilterCols_MSA(uint8_t* dst_argb,
+ const uint8_t* src_argb,
+ int dst_width,
+ int x,
+ int dx);
+void ScaleARGBCols_MSA(uint8_t* dst_argb,
+ const uint8_t* src_argb,
+ int dst_width,
+ int x,
+ int dx);
+void ScaleARGBFilterCols_Any_MSA(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
+ int dst_width,
+ int x,
+ int dx);
+void ScaleARGBCols_Any_MSA(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
+ int dst_width,
+ int x,
+ int dx);
+
+// ARGB Row functions
+void ScaleARGBRowDown2_SSE2(const uint8_t* src_argb,
+ ptrdiff_t src_stride,
+ uint8_t* dst_argb,
+ int dst_width);
+void ScaleARGBRowDown2Linear_SSE2(const uint8_t* src_argb,
+ ptrdiff_t src_stride,
+ uint8_t* dst_argb,
+ int dst_width);
+void ScaleARGBRowDown2Box_SSE2(const uint8_t* src_argb,
+ ptrdiff_t src_stride,
+ uint8_t* dst_argb,
+ int dst_width);
+void ScaleARGBRowDown2_NEON(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width);
+void ScaleARGBRowDown2Linear_NEON(const uint8_t* src_argb,
+ ptrdiff_t src_stride,
+ uint8_t* dst_argb,
+ int dst_width);
+void ScaleARGBRowDown2Box_NEON(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width);
+void ScaleARGBRowDown2_MSA(const uint8_t* src_argb,
+ ptrdiff_t src_stride,
+ uint8_t* dst_argb,
+ int dst_width);
+void ScaleARGBRowDown2Linear_MSA(const uint8_t* src_argb,
+ ptrdiff_t src_stride,
+ uint8_t* dst_argb,
+ int dst_width);
+void ScaleARGBRowDown2Box_MSA(const uint8_t* src_argb,
+ ptrdiff_t src_stride,
+ uint8_t* dst_argb,
+ int dst_width);
+void ScaleARGBRowDown2_Any_SSE2(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+void ScaleARGBRowDown2Linear_Any_SSE2(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+void ScaleARGBRowDown2Box_Any_SSE2(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+void ScaleARGBRowDown2_Any_NEON(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+void ScaleARGBRowDown2Linear_Any_NEON(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+void ScaleARGBRowDown2Box_Any_NEON(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+void ScaleARGBRowDown2_Any_MSA(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+void ScaleARGBRowDown2Linear_Any_MSA(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+void ScaleARGBRowDown2Box_Any_MSA(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+
+void ScaleARGBRowDownEven_SSE2(const uint8_t* src_argb,
+ ptrdiff_t src_stride,
+ int src_stepx,
+ uint8_t* dst_argb,
+ int dst_width);
+void ScaleARGBRowDownEvenBox_SSE2(const uint8_t* src_argb,
+ ptrdiff_t src_stride,
+ int src_stepx,
+ uint8_t* dst_argb,
+ int dst_width);
+void ScaleARGBRowDownEven_NEON(const uint8_t* src_argb,
+ ptrdiff_t src_stride,
+ int src_stepx,
+ uint8_t* dst_argb,
+ int dst_width);
+void ScaleARGBRowDownEvenBox_NEON(const uint8_t* src_argb,
+ ptrdiff_t src_stride,
+ int src_stepx,
+ uint8_t* dst_argb,
+ int dst_width);
+void ScaleARGBRowDownEven_MSA(const uint8_t* src_argb,
+ ptrdiff_t src_stride,
+ int32_t src_stepx,
+ uint8_t* dst_argb,
+ int dst_width);
+void ScaleARGBRowDownEvenBox_MSA(const uint8_t* src_argb,
+ ptrdiff_t src_stride,
+ int src_stepx,
+ uint8_t* dst_argb,
+ int dst_width);
+void ScaleARGBRowDownEven_Any_SSE2(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ int src_stepx,
+ uint8_t* dst_ptr,
+ int dst_width);
+void ScaleARGBRowDownEvenBox_Any_SSE2(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ int src_stepx,
+ uint8_t* dst_ptr,
+ int dst_width);
+void ScaleARGBRowDownEven_Any_NEON(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ int src_stepx,
+ uint8_t* dst_ptr,
+ int dst_width);
+void ScaleARGBRowDownEvenBox_Any_NEON(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ int src_stepx,
+ uint8_t* dst_ptr,
+ int dst_width);
+void ScaleARGBRowDownEven_Any_MSA(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ int32_t src_stepx,
+ uint8_t* dst_ptr,
+ int dst_width);
+void ScaleARGBRowDownEvenBox_Any_MSA(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ int src_stepx,
+ uint8_t* dst_ptr,
+ int dst_width);
+
+// ScaleRowDown2Box also used by planar functions
+// NEON downscalers with interpolation.
+
+// Note - not static due to reuse in convert for 444 to 420.
+void ScaleRowDown2_NEON(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width);
+void ScaleRowDown2Linear_NEON(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width);
+void ScaleRowDown2Box_NEON(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width);
+
+void ScaleRowDown4_NEON(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+void ScaleRowDown4Box_NEON(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+
+// Down scale from 4 to 3 pixels. Use the neon multilane read/write
+// to load up the every 4th pixel into a 4 different registers.
+// Point samples 32 pixels to 24 pixels.
+void ScaleRowDown34_NEON(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+void ScaleRowDown34_0_Box_NEON(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+void ScaleRowDown34_1_Box_NEON(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+
+// 32 -> 12
+void ScaleRowDown38_NEON(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+// 32x3 -> 12x1
+void ScaleRowDown38_3_Box_NEON(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+// 32x2 -> 12x1
+void ScaleRowDown38_2_Box_NEON(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+
+void ScaleRowDown2_Any_NEON(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+void ScaleRowDown2Linear_Any_NEON(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+void ScaleRowDown2Box_Any_NEON(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+void ScaleRowDown2Box_Odd_NEON(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+void ScaleRowDown4_Any_NEON(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+void ScaleRowDown4Box_Any_NEON(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+void ScaleRowDown34_Any_NEON(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+void ScaleRowDown34_0_Box_Any_NEON(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+void ScaleRowDown34_1_Box_Any_NEON(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+// 32 -> 12
+void ScaleRowDown38_Any_NEON(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+// 32x3 -> 12x1
+void ScaleRowDown38_3_Box_Any_NEON(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+// 32x2 -> 12x1
+void ScaleRowDown38_2_Box_Any_NEON(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+
+void ScaleAddRow_NEON(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width);
+void ScaleAddRow_Any_NEON(const uint8_t* src_ptr,
+ uint16_t* dst_ptr,
+ int src_width);
+
+void ScaleFilterCols_NEON(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
+ int dst_width,
+ int x,
+ int dx);
+
+void ScaleFilterCols_Any_NEON(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
+ int dst_width,
+ int x,
+ int dx);
+
+void ScaleRowDown2_MSA(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width);
+void ScaleRowDown2Linear_MSA(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width);
+void ScaleRowDown2Box_MSA(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width);
+void ScaleRowDown4_MSA(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width);
+void ScaleRowDown4Box_MSA(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width);
+void ScaleRowDown38_MSA(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width);
+void ScaleRowDown38_2_Box_MSA(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+void ScaleRowDown38_3_Box_MSA(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+void ScaleAddRow_MSA(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width);
+void ScaleFilterCols_MSA(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
+ int dst_width,
+ int x,
+ int dx);
+void ScaleRowDown34_MSA(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width);
+void ScaleRowDown34_0_Box_MSA(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* d,
+ int dst_width);
+void ScaleRowDown34_1_Box_MSA(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* d,
+ int dst_width);
+
+void ScaleRowDown2_Any_MSA(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+void ScaleRowDown2Linear_Any_MSA(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+void ScaleRowDown2Box_Any_MSA(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+void ScaleRowDown4_Any_MSA(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+void ScaleRowDown4Box_Any_MSA(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+void ScaleRowDown38_Any_MSA(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+void ScaleRowDown38_2_Box_Any_MSA(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+void ScaleRowDown38_3_Box_Any_MSA(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+void ScaleAddRow_Any_MSA(const uint8_t* src_ptr,
+ uint16_t* dst_ptr,
+ int src_width);
+void ScaleFilterCols_Any_MSA(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
+ int dst_width,
+ int x,
+ int dx);
+void ScaleRowDown34_Any_MSA(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+void ScaleRowDown34_0_Box_Any_MSA(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+void ScaleRowDown34_1_Box_Any_MSA(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+
+#ifdef __cplusplus
+} // extern "C"
+} // namespace libyuv
+#endif
+
+#endif // INCLUDE_LIBYUV_SCALE_ROW_H_
diff --git a/media/libvpx/libvpx/third_party/libyuv/include/libyuv/version.h b/media/libvpx/libvpx/third_party/libyuv/include/libyuv/version.h
new file mode 100644
index 0000000000..7022785d8c
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/libyuv/include/libyuv/version.h
@@ -0,0 +1,16 @@
+/*
+ * Copyright 2012 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef INCLUDE_LIBYUV_VERSION_H_
+#define INCLUDE_LIBYUV_VERSION_H_
+
+#define LIBYUV_VERSION 1711
+
+#endif // INCLUDE_LIBYUV_VERSION_H_
diff --git a/media/libvpx/libvpx/third_party/libyuv/include/libyuv/video_common.h b/media/libvpx/libvpx/third_party/libyuv/include/libyuv/video_common.h
new file mode 100644
index 0000000000..bcef378b5a
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/libyuv/include/libyuv/video_common.h
@@ -0,0 +1,188 @@
+/*
+ * Copyright 2011 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+// Common definitions for video, including fourcc and VideoFormat.
+
+#ifndef INCLUDE_LIBYUV_VIDEO_COMMON_H_
+#define INCLUDE_LIBYUV_VIDEO_COMMON_H_
+
+#include "libyuv/basic_types.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+//////////////////////////////////////////////////////////////////////////////
+// Definition of FourCC codes
+//////////////////////////////////////////////////////////////////////////////
+
+// Convert four characters to a FourCC code.
+// Needs to be a macro otherwise the OS X compiler complains when the kFormat*
+// constants are used in a switch.
+#ifdef __cplusplus
+#define FOURCC(a, b, c, d) \
+ ((static_cast<uint32_t>(a)) | (static_cast<uint32_t>(b) << 8) | \
+ (static_cast<uint32_t>(c) << 16) | (static_cast<uint32_t>(d) << 24))
+#else
+#define FOURCC(a, b, c, d) \
+ (((uint32_t)(a)) | ((uint32_t)(b) << 8) | /* NOLINT */ \
+ ((uint32_t)(c) << 16) | ((uint32_t)(d) << 24)) /* NOLINT */
+#endif
+
+// Some pages discussing FourCC codes:
+// http://www.fourcc.org/yuv.php
+// http://v4l2spec.bytesex.org/spec/book1.htm
+// http://developer.apple.com/quicktime/icefloe/dispatch020.html
+// http://msdn.microsoft.com/library/windows/desktop/dd206750.aspx#nv12
+// http://people.xiph.org/~xiphmont/containers/nut/nut4cc.txt
+
+// FourCC codes grouped according to implementation efficiency.
+// Primary formats should convert in 1 efficient step.
+// Secondary formats are converted in 2 steps.
+// Auxilliary formats call primary converters.
+enum FourCC {
+ // 9 Primary YUV formats: 5 planar, 2 biplanar, 2 packed.
+ FOURCC_I420 = FOURCC('I', '4', '2', '0'),
+ FOURCC_I422 = FOURCC('I', '4', '2', '2'),
+ FOURCC_I444 = FOURCC('I', '4', '4', '4'),
+ FOURCC_I400 = FOURCC('I', '4', '0', '0'),
+ FOURCC_NV21 = FOURCC('N', 'V', '2', '1'),
+ FOURCC_NV12 = FOURCC('N', 'V', '1', '2'),
+ FOURCC_YUY2 = FOURCC('Y', 'U', 'Y', '2'),
+ FOURCC_UYVY = FOURCC('U', 'Y', 'V', 'Y'),
+ FOURCC_H010 = FOURCC('H', '0', '1', '0'), // unofficial fourcc. 10 bit lsb
+
+ // 1 Secondary YUV format: row biplanar.
+ FOURCC_M420 = FOURCC('M', '4', '2', '0'),
+
+ // 11 Primary RGB formats: 4 32 bpp, 2 24 bpp, 3 16 bpp, 1 10 bpc
+ FOURCC_ARGB = FOURCC('A', 'R', 'G', 'B'),
+ FOURCC_BGRA = FOURCC('B', 'G', 'R', 'A'),
+ FOURCC_ABGR = FOURCC('A', 'B', 'G', 'R'),
+ FOURCC_AR30 = FOURCC('A', 'R', '3', '0'), // 10 bit per channel. 2101010.
+ FOURCC_AB30 = FOURCC('A', 'B', '3', '0'), // ABGR version of 10 bit
+ FOURCC_24BG = FOURCC('2', '4', 'B', 'G'),
+ FOURCC_RAW = FOURCC('r', 'a', 'w', ' '),
+ FOURCC_RGBA = FOURCC('R', 'G', 'B', 'A'),
+ FOURCC_RGBP = FOURCC('R', 'G', 'B', 'P'), // rgb565 LE.
+ FOURCC_RGBO = FOURCC('R', 'G', 'B', 'O'), // argb1555 LE.
+ FOURCC_R444 = FOURCC('R', '4', '4', '4'), // argb4444 LE.
+
+ // 1 Primary Compressed YUV format.
+ FOURCC_MJPG = FOURCC('M', 'J', 'P', 'G'),
+
+ // 7 Auxiliary YUV variations: 3 with U and V planes are swapped, 1 Alias.
+ FOURCC_YV12 = FOURCC('Y', 'V', '1', '2'),
+ FOURCC_YV16 = FOURCC('Y', 'V', '1', '6'),
+ FOURCC_YV24 = FOURCC('Y', 'V', '2', '4'),
+ FOURCC_YU12 = FOURCC('Y', 'U', '1', '2'), // Linux version of I420.
+ FOURCC_J420 = FOURCC('J', '4', '2', '0'),
+ FOURCC_J400 = FOURCC('J', '4', '0', '0'), // unofficial fourcc
+ FOURCC_H420 = FOURCC('H', '4', '2', '0'), // unofficial fourcc
+
+ // 14 Auxiliary aliases. CanonicalFourCC() maps these to canonical fourcc.
+ FOURCC_IYUV = FOURCC('I', 'Y', 'U', 'V'), // Alias for I420.
+ FOURCC_YU16 = FOURCC('Y', 'U', '1', '6'), // Alias for I422.
+ FOURCC_YU24 = FOURCC('Y', 'U', '2', '4'), // Alias for I444.
+ FOURCC_YUYV = FOURCC('Y', 'U', 'Y', 'V'), // Alias for YUY2.
+ FOURCC_YUVS = FOURCC('y', 'u', 'v', 's'), // Alias for YUY2 on Mac.
+ FOURCC_HDYC = FOURCC('H', 'D', 'Y', 'C'), // Alias for UYVY.
+ FOURCC_2VUY = FOURCC('2', 'v', 'u', 'y'), // Alias for UYVY on Mac.
+ FOURCC_JPEG = FOURCC('J', 'P', 'E', 'G'), // Alias for MJPG.
+ FOURCC_DMB1 = FOURCC('d', 'm', 'b', '1'), // Alias for MJPG on Mac.
+ FOURCC_BA81 = FOURCC('B', 'A', '8', '1'), // Alias for BGGR.
+ FOURCC_RGB3 = FOURCC('R', 'G', 'B', '3'), // Alias for RAW.
+ FOURCC_BGR3 = FOURCC('B', 'G', 'R', '3'), // Alias for 24BG.
+ FOURCC_CM32 = FOURCC(0, 0, 0, 32), // Alias for BGRA kCMPixelFormat_32ARGB
+ FOURCC_CM24 = FOURCC(0, 0, 0, 24), // Alias for RAW kCMPixelFormat_24RGB
+ FOURCC_L555 = FOURCC('L', '5', '5', '5'), // Alias for RGBO.
+ FOURCC_L565 = FOURCC('L', '5', '6', '5'), // Alias for RGBP.
+ FOURCC_5551 = FOURCC('5', '5', '5', '1'), // Alias for RGBO.
+
+ // deprecated formats. Not supported, but defined for backward compatibility.
+ FOURCC_I411 = FOURCC('I', '4', '1', '1'),
+ FOURCC_Q420 = FOURCC('Q', '4', '2', '0'),
+ FOURCC_RGGB = FOURCC('R', 'G', 'G', 'B'),
+ FOURCC_BGGR = FOURCC('B', 'G', 'G', 'R'),
+ FOURCC_GRBG = FOURCC('G', 'R', 'B', 'G'),
+ FOURCC_GBRG = FOURCC('G', 'B', 'R', 'G'),
+ FOURCC_H264 = FOURCC('H', '2', '6', '4'),
+
+ // Match any fourcc.
+ FOURCC_ANY = -1,
+};
+
+enum FourCCBpp {
+ // Canonical fourcc codes used in our code.
+ FOURCC_BPP_I420 = 12,
+ FOURCC_BPP_I422 = 16,
+ FOURCC_BPP_I444 = 24,
+ FOURCC_BPP_I411 = 12,
+ FOURCC_BPP_I400 = 8,
+ FOURCC_BPP_NV21 = 12,
+ FOURCC_BPP_NV12 = 12,
+ FOURCC_BPP_YUY2 = 16,
+ FOURCC_BPP_UYVY = 16,
+ FOURCC_BPP_M420 = 12,
+ FOURCC_BPP_Q420 = 12,
+ FOURCC_BPP_ARGB = 32,
+ FOURCC_BPP_BGRA = 32,
+ FOURCC_BPP_ABGR = 32,
+ FOURCC_BPP_RGBA = 32,
+ FOURCC_BPP_AR30 = 32,
+ FOURCC_BPP_AB30 = 32,
+ FOURCC_BPP_24BG = 24,
+ FOURCC_BPP_RAW = 24,
+ FOURCC_BPP_RGBP = 16,
+ FOURCC_BPP_RGBO = 16,
+ FOURCC_BPP_R444 = 16,
+ FOURCC_BPP_RGGB = 8,
+ FOURCC_BPP_BGGR = 8,
+ FOURCC_BPP_GRBG = 8,
+ FOURCC_BPP_GBRG = 8,
+ FOURCC_BPP_YV12 = 12,
+ FOURCC_BPP_YV16 = 16,
+ FOURCC_BPP_YV24 = 24,
+ FOURCC_BPP_YU12 = 12,
+ FOURCC_BPP_J420 = 12,
+ FOURCC_BPP_J400 = 8,
+ FOURCC_BPP_H420 = 12,
+ FOURCC_BPP_H010 = 24,
+ FOURCC_BPP_MJPG = 0, // 0 means unknown.
+ FOURCC_BPP_H264 = 0,
+ FOURCC_BPP_IYUV = 12,
+ FOURCC_BPP_YU16 = 16,
+ FOURCC_BPP_YU24 = 24,
+ FOURCC_BPP_YUYV = 16,
+ FOURCC_BPP_YUVS = 16,
+ FOURCC_BPP_HDYC = 16,
+ FOURCC_BPP_2VUY = 16,
+ FOURCC_BPP_JPEG = 1,
+ FOURCC_BPP_DMB1 = 1,
+ FOURCC_BPP_BA81 = 8,
+ FOURCC_BPP_RGB3 = 24,
+ FOURCC_BPP_BGR3 = 24,
+ FOURCC_BPP_CM32 = 32,
+ FOURCC_BPP_CM24 = 24,
+
+ // Match any fourcc.
+ FOURCC_BPP_ANY = 0, // 0 means unknown.
+};
+
+// Converts fourcc aliases into canonical ones.
+LIBYUV_API uint32_t CanonicalFourCC(uint32_t fourcc);
+
+#ifdef __cplusplus
+} // extern "C"
+} // namespace libyuv
+#endif
+
+#endif // INCLUDE_LIBYUV_VIDEO_COMMON_H_
diff --git a/media/libvpx/libvpx/third_party/libyuv/source/compare.cc b/media/libvpx/libvpx/third_party/libyuv/source/compare.cc
new file mode 100644
index 0000000000..50e3abd055
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/libyuv/source/compare.cc
@@ -0,0 +1,429 @@
+/*
+ * Copyright 2011 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/compare.h"
+
+#include <float.h>
+#include <math.h>
+#ifdef _OPENMP
+#include <omp.h>
+#endif
+
+#include "libyuv/basic_types.h"
+#include "libyuv/compare_row.h"
+#include "libyuv/cpu_id.h"
+#include "libyuv/row.h"
+#include "libyuv/video_common.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+// hash seed of 5381 recommended.
+LIBYUV_API
+uint32_t HashDjb2(const uint8_t* src, uint64_t count, uint32_t seed) {
+ const int kBlockSize = 1 << 15; // 32768;
+ int remainder;
+ uint32_t (*HashDjb2_SSE)(const uint8_t* src, int count, uint32_t seed) =
+ HashDjb2_C;
+#if defined(HAS_HASHDJB2_SSE41)
+ if (TestCpuFlag(kCpuHasSSE41)) {
+ HashDjb2_SSE = HashDjb2_SSE41;
+ }
+#endif
+#if defined(HAS_HASHDJB2_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ HashDjb2_SSE = HashDjb2_AVX2;
+ }
+#endif
+
+ while (count >= (uint64_t)(kBlockSize)) {
+ seed = HashDjb2_SSE(src, kBlockSize, seed);
+ src += kBlockSize;
+ count -= kBlockSize;
+ }
+ remainder = (int)count & ~15;
+ if (remainder) {
+ seed = HashDjb2_SSE(src, remainder, seed);
+ src += remainder;
+ count -= remainder;
+ }
+ remainder = (int)count & 15;
+ if (remainder) {
+ seed = HashDjb2_C(src, remainder, seed);
+ }
+ return seed;
+}
+
+static uint32_t ARGBDetectRow_C(const uint8_t* argb, int width) {
+ int x;
+ for (x = 0; x < width - 1; x += 2) {
+ if (argb[0] != 255) { // First byte is not Alpha of 255, so not ARGB.
+ return FOURCC_BGRA;
+ }
+ if (argb[3] != 255) { // 4th byte is not Alpha of 255, so not BGRA.
+ return FOURCC_ARGB;
+ }
+ if (argb[4] != 255) { // Second pixel first byte is not Alpha of 255.
+ return FOURCC_BGRA;
+ }
+ if (argb[7] != 255) { // Second pixel 4th byte is not Alpha of 255.
+ return FOURCC_ARGB;
+ }
+ argb += 8;
+ }
+ if (width & 1) {
+ if (argb[0] != 255) { // First byte is not Alpha of 255, so not ARGB.
+ return FOURCC_BGRA;
+ }
+ if (argb[3] != 255) { // 4th byte is not Alpha of 255, so not BGRA.
+ return FOURCC_ARGB;
+ }
+ }
+ return 0;
+}
+
+// Scan an opaque argb image and return fourcc based on alpha offset.
+// Returns FOURCC_ARGB, FOURCC_BGRA, or 0 if unknown.
+LIBYUV_API
+uint32_t ARGBDetect(const uint8_t* argb,
+ int stride_argb,
+ int width,
+ int height) {
+ uint32_t fourcc = 0;
+ int h;
+
+ // Coalesce rows.
+ if (stride_argb == width * 4) {
+ width *= height;
+ height = 1;
+ stride_argb = 0;
+ }
+ for (h = 0; h < height && fourcc == 0; ++h) {
+ fourcc = ARGBDetectRow_C(argb, width);
+ argb += stride_argb;
+ }
+ return fourcc;
+}
+
+// NEON version accumulates in 16 bit shorts which overflow at 65536 bytes.
+// So actual maximum is 1 less loop, which is 64436 - 32 bytes.
+
+LIBYUV_API
+uint64_t ComputeHammingDistance(const uint8_t* src_a,
+ const uint8_t* src_b,
+ int count) {
+ const int kBlockSize = 1 << 15; // 32768;
+ const int kSimdSize = 64;
+ // SIMD for multiple of 64, and C for remainder
+ int remainder = count & (kBlockSize - 1) & ~(kSimdSize - 1);
+ uint64_t diff = 0;
+ int i;
+ uint32_t (*HammingDistance)(const uint8_t* src_a, const uint8_t* src_b,
+ int count) = HammingDistance_C;
+#if defined(HAS_HAMMINGDISTANCE_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ HammingDistance = HammingDistance_NEON;
+ }
+#endif
+#if defined(HAS_HAMMINGDISTANCE_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ HammingDistance = HammingDistance_SSSE3;
+ }
+#endif
+#if defined(HAS_HAMMINGDISTANCE_SSE42)
+ if (TestCpuFlag(kCpuHasSSE42)) {
+ HammingDistance = HammingDistance_SSE42;
+ }
+#endif
+#if defined(HAS_HAMMINGDISTANCE_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ HammingDistance = HammingDistance_AVX2;
+ }
+#endif
+#if defined(HAS_HAMMINGDISTANCE_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ HammingDistance = HammingDistance_MSA;
+ }
+#endif
+#ifdef _OPENMP
+#pragma omp parallel for reduction(+ : diff)
+#endif
+ for (i = 0; i < (count - (kBlockSize - 1)); i += kBlockSize) {
+ diff += HammingDistance(src_a + i, src_b + i, kBlockSize);
+ }
+ src_a += count & ~(kBlockSize - 1);
+ src_b += count & ~(kBlockSize - 1);
+ if (remainder) {
+ diff += HammingDistance(src_a, src_b, remainder);
+ src_a += remainder;
+ src_b += remainder;
+ }
+ remainder = count & (kSimdSize - 1);
+ if (remainder) {
+ diff += HammingDistance_C(src_a, src_b, remainder);
+ }
+ return diff;
+}
+
+// TODO(fbarchard): Refactor into row function.
+LIBYUV_API
+uint64_t ComputeSumSquareError(const uint8_t* src_a,
+ const uint8_t* src_b,
+ int count) {
+ // SumSquareError returns values 0 to 65535 for each squared difference.
+ // Up to 65536 of those can be summed and remain within a uint32_t.
+ // After each block of 65536 pixels, accumulate into a uint64_t.
+ const int kBlockSize = 65536;
+ int remainder = count & (kBlockSize - 1) & ~31;
+ uint64_t sse = 0;
+ int i;
+ uint32_t (*SumSquareError)(const uint8_t* src_a, const uint8_t* src_b,
+ int count) = SumSquareError_C;
+#if defined(HAS_SUMSQUAREERROR_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ SumSquareError = SumSquareError_NEON;
+ }
+#endif
+#if defined(HAS_SUMSQUAREERROR_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ // Note only used for multiples of 16 so count is not checked.
+ SumSquareError = SumSquareError_SSE2;
+ }
+#endif
+#if defined(HAS_SUMSQUAREERROR_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ // Note only used for multiples of 32 so count is not checked.
+ SumSquareError = SumSquareError_AVX2;
+ }
+#endif
+#if defined(HAS_SUMSQUAREERROR_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ SumSquareError = SumSquareError_MSA;
+ }
+#endif
+#ifdef _OPENMP
+#pragma omp parallel for reduction(+ : sse)
+#endif
+ for (i = 0; i < (count - (kBlockSize - 1)); i += kBlockSize) {
+ sse += SumSquareError(src_a + i, src_b + i, kBlockSize);
+ }
+ src_a += count & ~(kBlockSize - 1);
+ src_b += count & ~(kBlockSize - 1);
+ if (remainder) {
+ sse += SumSquareError(src_a, src_b, remainder);
+ src_a += remainder;
+ src_b += remainder;
+ }
+ remainder = count & 31;
+ if (remainder) {
+ sse += SumSquareError_C(src_a, src_b, remainder);
+ }
+ return sse;
+}
+
+LIBYUV_API
+uint64_t ComputeSumSquareErrorPlane(const uint8_t* src_a,
+ int stride_a,
+ const uint8_t* src_b,
+ int stride_b,
+ int width,
+ int height) {
+ uint64_t sse = 0;
+ int h;
+ // Coalesce rows.
+ if (stride_a == width && stride_b == width) {
+ width *= height;
+ height = 1;
+ stride_a = stride_b = 0;
+ }
+ for (h = 0; h < height; ++h) {
+ sse += ComputeSumSquareError(src_a, src_b, width);
+ src_a += stride_a;
+ src_b += stride_b;
+ }
+ return sse;
+}
+
+LIBYUV_API
+double SumSquareErrorToPsnr(uint64_t sse, uint64_t count) {
+ double psnr;
+ if (sse > 0) {
+ double mse = (double)count / (double)sse;
+ psnr = 10.0 * log10(255.0 * 255.0 * mse);
+ } else {
+ psnr = kMaxPsnr; // Limit to prevent divide by 0
+ }
+
+ if (psnr > kMaxPsnr) {
+ psnr = kMaxPsnr;
+ }
+
+ return psnr;
+}
+
+LIBYUV_API
+double CalcFramePsnr(const uint8_t* src_a,
+ int stride_a,
+ const uint8_t* src_b,
+ int stride_b,
+ int width,
+ int height) {
+ const uint64_t samples = (uint64_t)width * (uint64_t)height;
+ const uint64_t sse = ComputeSumSquareErrorPlane(src_a, stride_a, src_b,
+ stride_b, width, height);
+ return SumSquareErrorToPsnr(sse, samples);
+}
+
+LIBYUV_API
+double I420Psnr(const uint8_t* src_y_a,
+ int stride_y_a,
+ const uint8_t* src_u_a,
+ int stride_u_a,
+ const uint8_t* src_v_a,
+ int stride_v_a,
+ const uint8_t* src_y_b,
+ int stride_y_b,
+ const uint8_t* src_u_b,
+ int stride_u_b,
+ const uint8_t* src_v_b,
+ int stride_v_b,
+ int width,
+ int height) {
+ const uint64_t sse_y = ComputeSumSquareErrorPlane(
+ src_y_a, stride_y_a, src_y_b, stride_y_b, width, height);
+ const int width_uv = (width + 1) >> 1;
+ const int height_uv = (height + 1) >> 1;
+ const uint64_t sse_u = ComputeSumSquareErrorPlane(
+ src_u_a, stride_u_a, src_u_b, stride_u_b, width_uv, height_uv);
+ const uint64_t sse_v = ComputeSumSquareErrorPlane(
+ src_v_a, stride_v_a, src_v_b, stride_v_b, width_uv, height_uv);
+ const uint64_t samples = (uint64_t)width * (uint64_t)height +
+ 2 * ((uint64_t)width_uv * (uint64_t)height_uv);
+ const uint64_t sse = sse_y + sse_u + sse_v;
+ return SumSquareErrorToPsnr(sse, samples);
+}
+
+static const int64_t cc1 = 26634; // (64^2*(.01*255)^2
+static const int64_t cc2 = 239708; // (64^2*(.03*255)^2
+
+static double Ssim8x8_C(const uint8_t* src_a,
+ int stride_a,
+ const uint8_t* src_b,
+ int stride_b) {
+ int64_t sum_a = 0;
+ int64_t sum_b = 0;
+ int64_t sum_sq_a = 0;
+ int64_t sum_sq_b = 0;
+ int64_t sum_axb = 0;
+
+ int i;
+ for (i = 0; i < 8; ++i) {
+ int j;
+ for (j = 0; j < 8; ++j) {
+ sum_a += src_a[j];
+ sum_b += src_b[j];
+ sum_sq_a += src_a[j] * src_a[j];
+ sum_sq_b += src_b[j] * src_b[j];
+ sum_axb += src_a[j] * src_b[j];
+ }
+
+ src_a += stride_a;
+ src_b += stride_b;
+ }
+
+ {
+ const int64_t count = 64;
+ // scale the constants by number of pixels
+ const int64_t c1 = (cc1 * count * count) >> 12;
+ const int64_t c2 = (cc2 * count * count) >> 12;
+
+ const int64_t sum_a_x_sum_b = sum_a * sum_b;
+
+ const int64_t ssim_n = (2 * sum_a_x_sum_b + c1) *
+ (2 * count * sum_axb - 2 * sum_a_x_sum_b + c2);
+
+ const int64_t sum_a_sq = sum_a * sum_a;
+ const int64_t sum_b_sq = sum_b * sum_b;
+
+ const int64_t ssim_d =
+ (sum_a_sq + sum_b_sq + c1) *
+ (count * sum_sq_a - sum_a_sq + count * sum_sq_b - sum_b_sq + c2);
+
+ if (ssim_d == 0.0) {
+ return DBL_MAX;
+ }
+ return ssim_n * 1.0 / ssim_d;
+ }
+}
+
+// We are using a 8x8 moving window with starting location of each 8x8 window
+// on the 4x4 pixel grid. Such arrangement allows the windows to overlap
+// block boundaries to penalize blocking artifacts.
+LIBYUV_API
+double CalcFrameSsim(const uint8_t* src_a,
+ int stride_a,
+ const uint8_t* src_b,
+ int stride_b,
+ int width,
+ int height) {
+ int samples = 0;
+ double ssim_total = 0;
+ double (*Ssim8x8)(const uint8_t* src_a, int stride_a, const uint8_t* src_b,
+ int stride_b) = Ssim8x8_C;
+
+ // sample point start with each 4x4 location
+ int i;
+ for (i = 0; i < height - 8; i += 4) {
+ int j;
+ for (j = 0; j < width - 8; j += 4) {
+ ssim_total += Ssim8x8(src_a + j, stride_a, src_b + j, stride_b);
+ samples++;
+ }
+
+ src_a += stride_a * 4;
+ src_b += stride_b * 4;
+ }
+
+ ssim_total /= samples;
+ return ssim_total;
+}
+
+LIBYUV_API
+double I420Ssim(const uint8_t* src_y_a,
+ int stride_y_a,
+ const uint8_t* src_u_a,
+ int stride_u_a,
+ const uint8_t* src_v_a,
+ int stride_v_a,
+ const uint8_t* src_y_b,
+ int stride_y_b,
+ const uint8_t* src_u_b,
+ int stride_u_b,
+ const uint8_t* src_v_b,
+ int stride_v_b,
+ int width,
+ int height) {
+ const double ssim_y =
+ CalcFrameSsim(src_y_a, stride_y_a, src_y_b, stride_y_b, width, height);
+ const int width_uv = (width + 1) >> 1;
+ const int height_uv = (height + 1) >> 1;
+ const double ssim_u = CalcFrameSsim(src_u_a, stride_u_a, src_u_b, stride_u_b,
+ width_uv, height_uv);
+ const double ssim_v = CalcFrameSsim(src_v_a, stride_v_a, src_v_b, stride_v_b,
+ width_uv, height_uv);
+ return ssim_y * 0.8 + 0.1 * (ssim_u + ssim_v);
+}
+
+#ifdef __cplusplus
+} // extern "C"
+} // namespace libyuv
+#endif
diff --git a/media/libvpx/libvpx/third_party/libyuv/source/compare_common.cc b/media/libvpx/libvpx/third_party/libyuv/source/compare_common.cc
new file mode 100644
index 0000000000..d4b170ad98
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/libyuv/source/compare_common.cc
@@ -0,0 +1,104 @@
+/*
+ * Copyright 2012 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/basic_types.h"
+
+#include "libyuv/compare_row.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+#if ORIGINAL_OPT
+uint32_t HammingDistance_C1(const uint8_t* src_a,
+ const uint8_t* src_b,
+ int count) {
+ uint32_t diff = 0u;
+
+ int i;
+ for (i = 0; i < count; ++i) {
+ int x = src_a[i] ^ src_b[i];
+ if (x & 1)
+ ++diff;
+ if (x & 2)
+ ++diff;
+ if (x & 4)
+ ++diff;
+ if (x & 8)
+ ++diff;
+ if (x & 16)
+ ++diff;
+ if (x & 32)
+ ++diff;
+ if (x & 64)
+ ++diff;
+ if (x & 128)
+ ++diff;
+ }
+ return diff;
+}
+#endif
+
+// Hakmem method for hamming distance.
+uint32_t HammingDistance_C(const uint8_t* src_a,
+ const uint8_t* src_b,
+ int count) {
+ uint32_t diff = 0u;
+
+ int i;
+ for (i = 0; i < count - 3; i += 4) {
+ uint32_t x = *((const uint32_t*)src_a) ^ *((const uint32_t*)src_b);
+ uint32_t u = x - ((x >> 1) & 0x55555555);
+ u = ((u >> 2) & 0x33333333) + (u & 0x33333333);
+ diff += ((((u + (u >> 4)) & 0x0f0f0f0f) * 0x01010101) >> 24);
+ src_a += 4;
+ src_b += 4;
+ }
+
+ for (; i < count; ++i) {
+ uint32_t x = *src_a ^ *src_b;
+ uint32_t u = x - ((x >> 1) & 0x55);
+ u = ((u >> 2) & 0x33) + (u & 0x33);
+ diff += (u + (u >> 4)) & 0x0f;
+ src_a += 1;
+ src_b += 1;
+ }
+
+ return diff;
+}
+
+uint32_t SumSquareError_C(const uint8_t* src_a,
+ const uint8_t* src_b,
+ int count) {
+ uint32_t sse = 0u;
+ int i;
+ for (i = 0; i < count; ++i) {
+ int diff = src_a[i] - src_b[i];
+ sse += (uint32_t)(diff * diff);
+ }
+ return sse;
+}
+
+// hash seed of 5381 recommended.
+// Internal C version of HashDjb2 with int sized count for efficiency.
+uint32_t HashDjb2_C(const uint8_t* src, int count, uint32_t seed) {
+ uint32_t hash = seed;
+ int i;
+ for (i = 0; i < count; ++i) {
+ hash += (hash << 5) + src[i];
+ }
+ return hash;
+}
+
+#ifdef __cplusplus
+} // extern "C"
+} // namespace libyuv
+#endif
diff --git a/media/libvpx/libvpx/third_party/libyuv/source/compare_gcc.cc b/media/libvpx/libvpx/third_party/libyuv/source/compare_gcc.cc
new file mode 100644
index 0000000000..676527c1b1
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/libyuv/source/compare_gcc.cc
@@ -0,0 +1,360 @@
+/*
+ * Copyright 2012 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/basic_types.h"
+
+#include "libyuv/compare_row.h"
+#include "libyuv/row.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+// This module is for GCC x86 and x64.
+#if !defined(LIBYUV_DISABLE_X86) && \
+ (defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER)))
+
+#if defined(__x86_64__)
+uint32_t HammingDistance_SSE42(const uint8_t* src_a,
+ const uint8_t* src_b,
+ int count) {
+ uint64_t diff = 0u;
+
+ asm volatile(
+ "xor %3,%3 \n"
+ "xor %%r8,%%r8 \n"
+ "xor %%r9,%%r9 \n"
+ "xor %%r10,%%r10 \n"
+
+ // Process 32 bytes per loop.
+ LABELALIGN
+ "1: \n"
+ "mov (%0),%%rcx \n"
+ "mov 0x8(%0),%%rdx \n"
+ "xor (%1),%%rcx \n"
+ "xor 0x8(%1),%%rdx \n"
+ "popcnt %%rcx,%%rcx \n"
+ "popcnt %%rdx,%%rdx \n"
+ "mov 0x10(%0),%%rsi \n"
+ "mov 0x18(%0),%%rdi \n"
+ "xor 0x10(%1),%%rsi \n"
+ "xor 0x18(%1),%%rdi \n"
+ "popcnt %%rsi,%%rsi \n"
+ "popcnt %%rdi,%%rdi \n"
+ "add $0x20,%0 \n"
+ "add $0x20,%1 \n"
+ "add %%rcx,%3 \n"
+ "add %%rdx,%%r8 \n"
+ "add %%rsi,%%r9 \n"
+ "add %%rdi,%%r10 \n"
+ "sub $0x20,%2 \n"
+ "jg 1b \n"
+
+ "add %%r8, %3 \n"
+ "add %%r9, %3 \n"
+ "add %%r10, %3 \n"
+ : "+r"(src_a), // %0
+ "+r"(src_b), // %1
+ "+r"(count), // %2
+ "=r"(diff) // %3
+ :
+ : "memory", "cc", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10");
+
+ return static_cast<uint32_t>(diff);
+}
+#else
+uint32_t HammingDistance_SSE42(const uint8_t* src_a,
+ const uint8_t* src_b,
+ int count) {
+ uint32_t diff = 0u;
+
+ asm volatile(
+ // Process 16 bytes per loop.
+ LABELALIGN
+ "1: \n"
+ "mov (%0),%%ecx \n"
+ "mov 0x4(%0),%%edx \n"
+ "xor (%1),%%ecx \n"
+ "xor 0x4(%1),%%edx \n"
+ "popcnt %%ecx,%%ecx \n"
+ "add %%ecx,%3 \n"
+ "popcnt %%edx,%%edx \n"
+ "add %%edx,%3 \n"
+ "mov 0x8(%0),%%ecx \n"
+ "mov 0xc(%0),%%edx \n"
+ "xor 0x8(%1),%%ecx \n"
+ "xor 0xc(%1),%%edx \n"
+ "popcnt %%ecx,%%ecx \n"
+ "add %%ecx,%3 \n"
+ "popcnt %%edx,%%edx \n"
+ "add %%edx,%3 \n"
+ "add $0x10,%0 \n"
+ "add $0x10,%1 \n"
+ "sub $0x10,%2 \n"
+ "jg 1b \n"
+ : "+r"(src_a), // %0
+ "+r"(src_b), // %1
+ "+r"(count), // %2
+ "+r"(diff) // %3
+ :
+ : "memory", "cc", "ecx", "edx");
+
+ return diff;
+}
+#endif
+
+static const vec8 kNibbleMask = {15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15};
+static const vec8 kBitCount = {0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4};
+
+uint32_t HammingDistance_SSSE3(const uint8_t* src_a,
+ const uint8_t* src_b,
+ int count) {
+ uint32_t diff = 0u;
+
+ asm volatile(
+ "movdqa %4,%%xmm2 \n"
+ "movdqa %5,%%xmm3 \n"
+ "pxor %%xmm0,%%xmm0 \n"
+ "pxor %%xmm1,%%xmm1 \n"
+ "sub %0,%1 \n"
+
+ LABELALIGN
+ "1: \n"
+ "movdqa (%0),%%xmm4 \n"
+ "movdqa 0x10(%0), %%xmm5 \n"
+ "pxor (%0,%1), %%xmm4 \n"
+ "movdqa %%xmm4,%%xmm6 \n"
+ "pand %%xmm2,%%xmm6 \n"
+ "psrlw $0x4,%%xmm4 \n"
+ "movdqa %%xmm3,%%xmm7 \n"
+ "pshufb %%xmm6,%%xmm7 \n"
+ "pand %%xmm2,%%xmm4 \n"
+ "movdqa %%xmm3,%%xmm6 \n"
+ "pshufb %%xmm4,%%xmm6 \n"
+ "paddb %%xmm7,%%xmm6 \n"
+ "pxor 0x10(%0,%1),%%xmm5 \n"
+ "add $0x20,%0 \n"
+ "movdqa %%xmm5,%%xmm4 \n"
+ "pand %%xmm2,%%xmm5 \n"
+ "psrlw $0x4,%%xmm4 \n"
+ "movdqa %%xmm3,%%xmm7 \n"
+ "pshufb %%xmm5,%%xmm7 \n"
+ "pand %%xmm2,%%xmm4 \n"
+ "movdqa %%xmm3,%%xmm5 \n"
+ "pshufb %%xmm4,%%xmm5 \n"
+ "paddb %%xmm7,%%xmm5 \n"
+ "paddb %%xmm5,%%xmm6 \n"
+ "psadbw %%xmm1,%%xmm6 \n"
+ "paddd %%xmm6,%%xmm0 \n"
+ "sub $0x20,%2 \n"
+ "jg 1b \n"
+
+ "pshufd $0xaa,%%xmm0,%%xmm1 \n"
+ "paddd %%xmm1,%%xmm0 \n"
+ "movd %%xmm0, %3 \n"
+ : "+r"(src_a), // %0
+ "+r"(src_b), // %1
+ "+r"(count), // %2
+ "=r"(diff) // %3
+ : "m"(kNibbleMask), // %4
+ "m"(kBitCount) // %5
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
+ "xmm7");
+
+ return diff;
+}
+
+#ifdef HAS_HAMMINGDISTANCE_AVX2
+uint32_t HammingDistance_AVX2(const uint8_t* src_a,
+ const uint8_t* src_b,
+ int count) {
+ uint32_t diff = 0u;
+
+ asm volatile(
+ "vbroadcastf128 %4,%%ymm2 \n"
+ "vbroadcastf128 %5,%%ymm3 \n"
+ "vpxor %%ymm0,%%ymm0,%%ymm0 \n"
+ "vpxor %%ymm1,%%ymm1,%%ymm1 \n"
+ "sub %0,%1 \n"
+
+ LABELALIGN
+ "1: \n"
+ "vmovdqa (%0),%%ymm4 \n"
+ "vmovdqa 0x20(%0), %%ymm5 \n"
+ "vpxor (%0,%1), %%ymm4, %%ymm4 \n"
+ "vpand %%ymm2,%%ymm4,%%ymm6 \n"
+ "vpsrlw $0x4,%%ymm4,%%ymm4 \n"
+ "vpshufb %%ymm6,%%ymm3,%%ymm6 \n"
+ "vpand %%ymm2,%%ymm4,%%ymm4 \n"
+ "vpshufb %%ymm4,%%ymm3,%%ymm4 \n"
+ "vpaddb %%ymm4,%%ymm6,%%ymm6 \n"
+ "vpxor 0x20(%0,%1),%%ymm5,%%ymm4 \n"
+ "add $0x40,%0 \n"
+ "vpand %%ymm2,%%ymm4,%%ymm5 \n"
+ "vpsrlw $0x4,%%ymm4,%%ymm4 \n"
+ "vpshufb %%ymm5,%%ymm3,%%ymm5 \n"
+ "vpand %%ymm2,%%ymm4,%%ymm4 \n"
+ "vpshufb %%ymm4,%%ymm3,%%ymm4 \n"
+ "vpaddb %%ymm5,%%ymm4,%%ymm4 \n"
+ "vpaddb %%ymm6,%%ymm4,%%ymm4 \n"
+ "vpsadbw %%ymm1,%%ymm4,%%ymm4 \n"
+ "vpaddd %%ymm0,%%ymm4,%%ymm0 \n"
+ "sub $0x40,%2 \n"
+ "jg 1b \n"
+
+ "vpermq $0xb1,%%ymm0,%%ymm1 \n"
+ "vpaddd %%ymm1,%%ymm0,%%ymm0 \n"
+ "vpermq $0xaa,%%ymm0,%%ymm1 \n"
+ "vpaddd %%ymm1,%%ymm0,%%ymm0 \n"
+ "vmovd %%xmm0, %3 \n"
+ "vzeroupper \n"
+ : "+r"(src_a), // %0
+ "+r"(src_b), // %1
+ "+r"(count), // %2
+ "=r"(diff) // %3
+ : "m"(kNibbleMask), // %4
+ "m"(kBitCount) // %5
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6");
+
+ return diff;
+}
+#endif // HAS_HAMMINGDISTANCE_AVX2
+
+uint32_t SumSquareError_SSE2(const uint8_t* src_a,
+ const uint8_t* src_b,
+ int count) {
+ uint32_t sse;
+ asm volatile(
+ "pxor %%xmm0,%%xmm0 \n"
+ "pxor %%xmm5,%%xmm5 \n"
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm1 \n"
+ "lea 0x10(%0),%0 \n"
+ "movdqu (%1),%%xmm2 \n"
+ "lea 0x10(%1),%1 \n"
+ "movdqa %%xmm1,%%xmm3 \n"
+ "psubusb %%xmm2,%%xmm1 \n"
+ "psubusb %%xmm3,%%xmm2 \n"
+ "por %%xmm2,%%xmm1 \n"
+ "movdqa %%xmm1,%%xmm2 \n"
+ "punpcklbw %%xmm5,%%xmm1 \n"
+ "punpckhbw %%xmm5,%%xmm2 \n"
+ "pmaddwd %%xmm1,%%xmm1 \n"
+ "pmaddwd %%xmm2,%%xmm2 \n"
+ "paddd %%xmm1,%%xmm0 \n"
+ "paddd %%xmm2,%%xmm0 \n"
+ "sub $0x10,%2 \n"
+ "jg 1b \n"
+
+ "pshufd $0xee,%%xmm0,%%xmm1 \n"
+ "paddd %%xmm1,%%xmm0 \n"
+ "pshufd $0x1,%%xmm0,%%xmm1 \n"
+ "paddd %%xmm1,%%xmm0 \n"
+ "movd %%xmm0,%3 \n"
+
+ : "+r"(src_a), // %0
+ "+r"(src_b), // %1
+ "+r"(count), // %2
+ "=g"(sse) // %3
+ ::"memory",
+ "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5");
+ return sse;
+}
+
+static const uvec32 kHash16x33 = {0x92d9e201, 0, 0, 0}; // 33 ^ 16
+static const uvec32 kHashMul0 = {
+ 0x0c3525e1, // 33 ^ 15
+ 0xa3476dc1, // 33 ^ 14
+ 0x3b4039a1, // 33 ^ 13
+ 0x4f5f0981, // 33 ^ 12
+};
+static const uvec32 kHashMul1 = {
+ 0x30f35d61, // 33 ^ 11
+ 0x855cb541, // 33 ^ 10
+ 0x040a9121, // 33 ^ 9
+ 0x747c7101, // 33 ^ 8
+};
+static const uvec32 kHashMul2 = {
+ 0xec41d4e1, // 33 ^ 7
+ 0x4cfa3cc1, // 33 ^ 6
+ 0x025528a1, // 33 ^ 5
+ 0x00121881, // 33 ^ 4
+};
+static const uvec32 kHashMul3 = {
+ 0x00008c61, // 33 ^ 3
+ 0x00000441, // 33 ^ 2
+ 0x00000021, // 33 ^ 1
+ 0x00000001, // 33 ^ 0
+};
+
+uint32_t HashDjb2_SSE41(const uint8_t* src, int count, uint32_t seed) {
+ uint32_t hash;
+ asm volatile(
+ "movd %2,%%xmm0 \n"
+ "pxor %%xmm7,%%xmm7 \n"
+ "movdqa %4,%%xmm6 \n"
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm1 \n"
+ "lea 0x10(%0),%0 \n"
+ "pmulld %%xmm6,%%xmm0 \n"
+ "movdqa %5,%%xmm5 \n"
+ "movdqa %%xmm1,%%xmm2 \n"
+ "punpcklbw %%xmm7,%%xmm2 \n"
+ "movdqa %%xmm2,%%xmm3 \n"
+ "punpcklwd %%xmm7,%%xmm3 \n"
+ "pmulld %%xmm5,%%xmm3 \n"
+ "movdqa %6,%%xmm5 \n"
+ "movdqa %%xmm2,%%xmm4 \n"
+ "punpckhwd %%xmm7,%%xmm4 \n"
+ "pmulld %%xmm5,%%xmm4 \n"
+ "movdqa %7,%%xmm5 \n"
+ "punpckhbw %%xmm7,%%xmm1 \n"
+ "movdqa %%xmm1,%%xmm2 \n"
+ "punpcklwd %%xmm7,%%xmm2 \n"
+ "pmulld %%xmm5,%%xmm2 \n"
+ "movdqa %8,%%xmm5 \n"
+ "punpckhwd %%xmm7,%%xmm1 \n"
+ "pmulld %%xmm5,%%xmm1 \n"
+ "paddd %%xmm4,%%xmm3 \n"
+ "paddd %%xmm2,%%xmm1 \n"
+ "paddd %%xmm3,%%xmm1 \n"
+ "pshufd $0xe,%%xmm1,%%xmm2 \n"
+ "paddd %%xmm2,%%xmm1 \n"
+ "pshufd $0x1,%%xmm1,%%xmm2 \n"
+ "paddd %%xmm2,%%xmm1 \n"
+ "paddd %%xmm1,%%xmm0 \n"
+ "sub $0x10,%1 \n"
+ "jg 1b \n"
+ "movd %%xmm0,%3 \n"
+ : "+r"(src), // %0
+ "+r"(count), // %1
+ "+rm"(seed), // %2
+ "=g"(hash) // %3
+ : "m"(kHash16x33), // %4
+ "m"(kHashMul0), // %5
+ "m"(kHashMul1), // %6
+ "m"(kHashMul2), // %7
+ "m"(kHashMul3) // %8
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
+ "xmm7");
+ return hash;
+}
+#endif // defined(__x86_64__) || (defined(__i386__) && !defined(__pic__)))
+
+#ifdef __cplusplus
+} // extern "C"
+} // namespace libyuv
+#endif
diff --git a/media/libvpx/libvpx/third_party/libyuv/source/compare_msa.cc b/media/libvpx/libvpx/third_party/libyuv/source/compare_msa.cc
new file mode 100644
index 0000000000..0b807d37be
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/libyuv/source/compare_msa.cc
@@ -0,0 +1,97 @@
+/*
+ * Copyright 2017 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/basic_types.h"
+
+#include "libyuv/compare_row.h"
+#include "libyuv/row.h"
+
+// This module is for GCC MSA
+#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
+#include "libyuv/macros_msa.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+uint32_t HammingDistance_MSA(const uint8_t* src_a,
+ const uint8_t* src_b,
+ int count) {
+ uint32_t diff = 0u;
+ int i;
+ v16u8 src0, src1, src2, src3;
+ v2i64 vec0 = {0}, vec1 = {0};
+
+ for (i = 0; i < count; i += 32) {
+ src0 = (v16u8)__msa_ld_b((v16i8*)src_a, 0);
+ src1 = (v16u8)__msa_ld_b((v16i8*)src_a, 16);
+ src2 = (v16u8)__msa_ld_b((v16i8*)src_b, 0);
+ src3 = (v16u8)__msa_ld_b((v16i8*)src_b, 16);
+ src0 ^= src2;
+ src1 ^= src3;
+ vec0 += __msa_pcnt_d((v2i64)src0);
+ vec1 += __msa_pcnt_d((v2i64)src1);
+ src_a += 32;
+ src_b += 32;
+ }
+
+ vec0 += vec1;
+ diff = (uint32_t)__msa_copy_u_w((v4i32)vec0, 0);
+ diff += (uint32_t)__msa_copy_u_w((v4i32)vec0, 2);
+ return diff;
+}
+
+uint32_t SumSquareError_MSA(const uint8_t* src_a,
+ const uint8_t* src_b,
+ int count) {
+ uint32_t sse = 0u;
+ int i;
+ v16u8 src0, src1, src2, src3;
+ v8i16 vec0, vec1, vec2, vec3;
+ v4i32 reg0 = {0}, reg1 = {0}, reg2 = {0}, reg3 = {0};
+ v2i64 tmp0;
+
+ for (i = 0; i < count; i += 32) {
+ src0 = (v16u8)__msa_ld_b((v16i8*)src_a, 0);
+ src1 = (v16u8)__msa_ld_b((v16i8*)src_a, 16);
+ src2 = (v16u8)__msa_ld_b((v16i8*)src_b, 0);
+ src3 = (v16u8)__msa_ld_b((v16i8*)src_b, 16);
+ vec0 = (v8i16)__msa_ilvr_b((v16i8)src2, (v16i8)src0);
+ vec1 = (v8i16)__msa_ilvl_b((v16i8)src2, (v16i8)src0);
+ vec2 = (v8i16)__msa_ilvr_b((v16i8)src3, (v16i8)src1);
+ vec3 = (v8i16)__msa_ilvl_b((v16i8)src3, (v16i8)src1);
+ vec0 = __msa_hsub_u_h((v16u8)vec0, (v16u8)vec0);
+ vec1 = __msa_hsub_u_h((v16u8)vec1, (v16u8)vec1);
+ vec2 = __msa_hsub_u_h((v16u8)vec2, (v16u8)vec2);
+ vec3 = __msa_hsub_u_h((v16u8)vec3, (v16u8)vec3);
+ reg0 = __msa_dpadd_s_w(reg0, vec0, vec0);
+ reg1 = __msa_dpadd_s_w(reg1, vec1, vec1);
+ reg2 = __msa_dpadd_s_w(reg2, vec2, vec2);
+ reg3 = __msa_dpadd_s_w(reg3, vec3, vec3);
+ src_a += 32;
+ src_b += 32;
+ }
+
+ reg0 += reg1;
+ reg2 += reg3;
+ reg0 += reg2;
+ tmp0 = __msa_hadd_s_d(reg0, reg0);
+ sse = (uint32_t)__msa_copy_u_w((v4i32)tmp0, 0);
+ sse += (uint32_t)__msa_copy_u_w((v4i32)tmp0, 2);
+ return sse;
+}
+
+#ifdef __cplusplus
+} // extern "C"
+} // namespace libyuv
+#endif
+
+#endif // !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
diff --git a/media/libvpx/libvpx/third_party/libyuv/source/compare_neon.cc b/media/libvpx/libvpx/third_party/libyuv/source/compare_neon.cc
new file mode 100644
index 0000000000..2a2181e0cb
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/libyuv/source/compare_neon.cc
@@ -0,0 +1,96 @@
+/*
+ * Copyright 2012 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/basic_types.h"
+
+#include "libyuv/compare_row.h"
+#include "libyuv/row.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__) && \
+ !defined(__aarch64__)
+
+// 256 bits at a time
+// uses short accumulator which restricts count to 131 KB
+uint32_t HammingDistance_NEON(const uint8_t* src_a,
+ const uint8_t* src_b,
+ int count) {
+ uint32_t diff;
+
+ asm volatile(
+ "vmov.u16 q4, #0 \n" // accumulator
+
+ "1: \n"
+ "vld1.8 {q0, q1}, [%0]! \n"
+ "vld1.8 {q2, q3}, [%1]! \n"
+ "veor.32 q0, q0, q2 \n"
+ "veor.32 q1, q1, q3 \n"
+ "vcnt.i8 q0, q0 \n"
+ "vcnt.i8 q1, q1 \n"
+ "subs %2, %2, #32 \n"
+ "vadd.u8 q0, q0, q1 \n" // 16 byte counts
+ "vpadal.u8 q4, q0 \n" // 8 shorts
+ "bgt 1b \n"
+
+ "vpaddl.u16 q0, q4 \n" // 4 ints
+ "vpadd.u32 d0, d0, d1 \n"
+ "vpadd.u32 d0, d0, d0 \n"
+ "vmov.32 %3, d0[0] \n"
+
+ : "+r"(src_a), "+r"(src_b), "+r"(count), "=r"(diff)
+ :
+ : "cc", "q0", "q1", "q2", "q3", "q4");
+ return diff;
+}
+
+uint32_t SumSquareError_NEON(const uint8_t* src_a,
+ const uint8_t* src_b,
+ int count) {
+ uint32_t sse;
+ asm volatile(
+ "vmov.u8 q8, #0 \n"
+ "vmov.u8 q10, #0 \n"
+ "vmov.u8 q9, #0 \n"
+ "vmov.u8 q11, #0 \n"
+
+ "1: \n"
+ "vld1.8 {q0}, [%0]! \n"
+ "vld1.8 {q1}, [%1]! \n"
+ "subs %2, %2, #16 \n"
+ "vsubl.u8 q2, d0, d2 \n"
+ "vsubl.u8 q3, d1, d3 \n"
+ "vmlal.s16 q8, d4, d4 \n"
+ "vmlal.s16 q9, d6, d6 \n"
+ "vmlal.s16 q10, d5, d5 \n"
+ "vmlal.s16 q11, d7, d7 \n"
+ "bgt 1b \n"
+
+ "vadd.u32 q8, q8, q9 \n"
+ "vadd.u32 q10, q10, q11 \n"
+ "vadd.u32 q11, q8, q10 \n"
+ "vpaddl.u32 q1, q11 \n"
+ "vadd.u64 d0, d2, d3 \n"
+ "vmov.32 %3, d0[0] \n"
+ : "+r"(src_a), "+r"(src_b), "+r"(count), "=r"(sse)
+ :
+ : "memory", "cc", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11");
+ return sse;
+}
+
+#endif // defined(__ARM_NEON__) && !defined(__aarch64__)
+
+#ifdef __cplusplus
+} // extern "C"
+} // namespace libyuv
+#endif
diff --git a/media/libvpx/libvpx/third_party/libyuv/source/compare_neon64.cc b/media/libvpx/libvpx/third_party/libyuv/source/compare_neon64.cc
new file mode 100644
index 0000000000..6e8f672ab7
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/libyuv/source/compare_neon64.cc
@@ -0,0 +1,90 @@
+/*
+ * Copyright 2012 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/basic_types.h"
+
+#include "libyuv/compare_row.h"
+#include "libyuv/row.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
+
+// 256 bits at a time
+// uses short accumulator which restricts count to 131 KB
+uint32_t HammingDistance_NEON(const uint8_t* src_a,
+ const uint8_t* src_b,
+ int count) {
+ uint32_t diff;
+ asm volatile(
+ "movi v4.8h, #0 \n"
+
+ "1: \n"
+ "ld1 {v0.16b, v1.16b}, [%0], #32 \n"
+ "ld1 {v2.16b, v3.16b}, [%1], #32 \n"
+ "eor v0.16b, v0.16b, v2.16b \n"
+ "eor v1.16b, v1.16b, v3.16b \n"
+ "cnt v0.16b, v0.16b \n"
+ "cnt v1.16b, v1.16b \n"
+ "subs %w2, %w2, #32 \n"
+ "add v0.16b, v0.16b, v1.16b \n"
+ "uadalp v4.8h, v0.16b \n"
+ "b.gt 1b \n"
+
+ "uaddlv s4, v4.8h \n"
+ "fmov %w3, s4 \n"
+ : "+r"(src_a), "+r"(src_b), "+r"(count), "=r"(diff)
+ :
+ : "cc", "v0", "v1", "v2", "v3", "v4");
+ return diff;
+}
+
+uint32_t SumSquareError_NEON(const uint8_t* src_a,
+ const uint8_t* src_b,
+ int count) {
+ uint32_t sse;
+ asm volatile(
+ "eor v16.16b, v16.16b, v16.16b \n"
+ "eor v18.16b, v18.16b, v18.16b \n"
+ "eor v17.16b, v17.16b, v17.16b \n"
+ "eor v19.16b, v19.16b, v19.16b \n"
+
+ "1: \n"
+ "ld1 {v0.16b}, [%0], #16 \n"
+ "ld1 {v1.16b}, [%1], #16 \n"
+ "subs %w2, %w2, #16 \n"
+ "usubl v2.8h, v0.8b, v1.8b \n"
+ "usubl2 v3.8h, v0.16b, v1.16b \n"
+ "smlal v16.4s, v2.4h, v2.4h \n"
+ "smlal v17.4s, v3.4h, v3.4h \n"
+ "smlal2 v18.4s, v2.8h, v2.8h \n"
+ "smlal2 v19.4s, v3.8h, v3.8h \n"
+ "b.gt 1b \n"
+
+ "add v16.4s, v16.4s, v17.4s \n"
+ "add v18.4s, v18.4s, v19.4s \n"
+ "add v19.4s, v16.4s, v18.4s \n"
+ "addv s0, v19.4s \n"
+ "fmov %w3, s0 \n"
+ : "+r"(src_a), "+r"(src_b), "+r"(count), "=r"(sse)
+ :
+ : "cc", "v0", "v1", "v2", "v3", "v16", "v17", "v18", "v19");
+ return sse;
+}
+
+#endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
+
+#ifdef __cplusplus
+} // extern "C"
+} // namespace libyuv
+#endif
diff --git a/media/libvpx/libvpx/third_party/libyuv/source/compare_win.cc b/media/libvpx/libvpx/third_party/libyuv/source/compare_win.cc
new file mode 100644
index 0000000000..d57d3d9d1c
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/libyuv/source/compare_win.cc
@@ -0,0 +1,241 @@
+/*
+ * Copyright 2012 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/basic_types.h"
+
+#include "libyuv/compare_row.h"
+#include "libyuv/row.h"
+
+#if defined(_MSC_VER)
+#include <intrin.h> // For __popcnt
+#endif
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+// This module is for 32 bit Visual C x86 and clangcl
+#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)
+
+uint32_t HammingDistance_SSE42(const uint8_t* src_a,
+ const uint8_t* src_b,
+ int count) {
+ uint32_t diff = 0u;
+
+ int i;
+ for (i = 0; i < count - 3; i += 4) {
+ uint32_t x = *((uint32_t*)src_a) ^ *((uint32_t*)src_b); // NOLINT
+ src_a += 4;
+ src_b += 4;
+ diff += __popcnt(x);
+ }
+ return diff;
+}
+
+__declspec(naked) uint32_t
+ SumSquareError_SSE2(const uint8_t* src_a, const uint8_t* src_b, int count) {
+ __asm {
+ mov eax, [esp + 4] // src_a
+ mov edx, [esp + 8] // src_b
+ mov ecx, [esp + 12] // count
+ pxor xmm0, xmm0
+ pxor xmm5, xmm5
+
+ wloop:
+ movdqu xmm1, [eax]
+ lea eax, [eax + 16]
+ movdqu xmm2, [edx]
+ lea edx, [edx + 16]
+ movdqa xmm3, xmm1 // abs trick
+ psubusb xmm1, xmm2
+ psubusb xmm2, xmm3
+ por xmm1, xmm2
+ movdqa xmm2, xmm1
+ punpcklbw xmm1, xmm5
+ punpckhbw xmm2, xmm5
+ pmaddwd xmm1, xmm1
+ pmaddwd xmm2, xmm2
+ paddd xmm0, xmm1
+ paddd xmm0, xmm2
+ sub ecx, 16
+ jg wloop
+
+ pshufd xmm1, xmm0, 0xee
+ paddd xmm0, xmm1
+ pshufd xmm1, xmm0, 0x01
+ paddd xmm0, xmm1
+ movd eax, xmm0
+ ret
+ }
+}
+
+// Visual C 2012 required for AVX2.
+#if _MSC_VER >= 1700
+// C4752: found Intel(R) Advanced Vector Extensions; consider using /arch:AVX.
+#pragma warning(disable : 4752)
+__declspec(naked) uint32_t
+ SumSquareError_AVX2(const uint8_t* src_a, const uint8_t* src_b, int count) {
+ __asm {
+ mov eax, [esp + 4] // src_a
+ mov edx, [esp + 8] // src_b
+ mov ecx, [esp + 12] // count
+ vpxor ymm0, ymm0, ymm0 // sum
+ vpxor ymm5, ymm5, ymm5 // constant 0 for unpck
+ sub edx, eax
+
+ wloop:
+ vmovdqu ymm1, [eax]
+ vmovdqu ymm2, [eax + edx]
+ lea eax, [eax + 32]
+ vpsubusb ymm3, ymm1, ymm2 // abs difference trick
+ vpsubusb ymm2, ymm2, ymm1
+ vpor ymm1, ymm2, ymm3
+ vpunpcklbw ymm2, ymm1, ymm5 // u16. mutates order.
+ vpunpckhbw ymm1, ymm1, ymm5
+ vpmaddwd ymm2, ymm2, ymm2 // square + hadd to u32.
+ vpmaddwd ymm1, ymm1, ymm1
+ vpaddd ymm0, ymm0, ymm1
+ vpaddd ymm0, ymm0, ymm2
+ sub ecx, 32
+ jg wloop
+
+ vpshufd ymm1, ymm0, 0xee // 3, 2 + 1, 0 both lanes.
+ vpaddd ymm0, ymm0, ymm1
+ vpshufd ymm1, ymm0, 0x01 // 1 + 0 both lanes.
+ vpaddd ymm0, ymm0, ymm1
+ vpermq ymm1, ymm0, 0x02 // high + low lane.
+ vpaddd ymm0, ymm0, ymm1
+ vmovd eax, xmm0
+ vzeroupper
+ ret
+ }
+}
+#endif // _MSC_VER >= 1700
+
+uvec32 kHash16x33 = {0x92d9e201, 0, 0, 0}; // 33 ^ 16
+uvec32 kHashMul0 = {
+ 0x0c3525e1, // 33 ^ 15
+ 0xa3476dc1, // 33 ^ 14
+ 0x3b4039a1, // 33 ^ 13
+ 0x4f5f0981, // 33 ^ 12
+};
+uvec32 kHashMul1 = {
+ 0x30f35d61, // 33 ^ 11
+ 0x855cb541, // 33 ^ 10
+ 0x040a9121, // 33 ^ 9
+ 0x747c7101, // 33 ^ 8
+};
+uvec32 kHashMul2 = {
+ 0xec41d4e1, // 33 ^ 7
+ 0x4cfa3cc1, // 33 ^ 6
+ 0x025528a1, // 33 ^ 5
+ 0x00121881, // 33 ^ 4
+};
+uvec32 kHashMul3 = {
+ 0x00008c61, // 33 ^ 3
+ 0x00000441, // 33 ^ 2
+ 0x00000021, // 33 ^ 1
+ 0x00000001, // 33 ^ 0
+};
+
+__declspec(naked) uint32_t
+ HashDjb2_SSE41(const uint8_t* src, int count, uint32_t seed) {
+ __asm {
+ mov eax, [esp + 4] // src
+ mov ecx, [esp + 8] // count
+ movd xmm0, [esp + 12] // seed
+
+ pxor xmm7, xmm7 // constant 0 for unpck
+ movdqa xmm6, xmmword ptr kHash16x33
+
+ wloop:
+ movdqu xmm1, [eax] // src[0-15]
+ lea eax, [eax + 16]
+ pmulld xmm0, xmm6 // hash *= 33 ^ 16
+ movdqa xmm5, xmmword ptr kHashMul0
+ movdqa xmm2, xmm1
+ punpcklbw xmm2, xmm7 // src[0-7]
+ movdqa xmm3, xmm2
+ punpcklwd xmm3, xmm7 // src[0-3]
+ pmulld xmm3, xmm5
+ movdqa xmm5, xmmword ptr kHashMul1
+ movdqa xmm4, xmm2
+ punpckhwd xmm4, xmm7 // src[4-7]
+ pmulld xmm4, xmm5
+ movdqa xmm5, xmmword ptr kHashMul2
+ punpckhbw xmm1, xmm7 // src[8-15]
+ movdqa xmm2, xmm1
+ punpcklwd xmm2, xmm7 // src[8-11]
+ pmulld xmm2, xmm5
+ movdqa xmm5, xmmword ptr kHashMul3
+ punpckhwd xmm1, xmm7 // src[12-15]
+ pmulld xmm1, xmm5
+ paddd xmm3, xmm4 // add 16 results
+ paddd xmm1, xmm2
+ paddd xmm1, xmm3
+
+ pshufd xmm2, xmm1, 0x0e // upper 2 dwords
+ paddd xmm1, xmm2
+ pshufd xmm2, xmm1, 0x01
+ paddd xmm1, xmm2
+ paddd xmm0, xmm1
+ sub ecx, 16
+ jg wloop
+
+ movd eax, xmm0 // return hash
+ ret
+ }
+}
+
+// Visual C 2012 required for AVX2.
+#if _MSC_VER >= 1700
+__declspec(naked) uint32_t
+ HashDjb2_AVX2(const uint8_t* src, int count, uint32_t seed) {
+ __asm {
+ mov eax, [esp + 4] // src
+ mov ecx, [esp + 8] // count
+ vmovd xmm0, [esp + 12] // seed
+
+ wloop:
+ vpmovzxbd xmm3, [eax] // src[0-3]
+ vpmulld xmm0, xmm0, xmmword ptr kHash16x33 // hash *= 33 ^ 16
+ vpmovzxbd xmm4, [eax + 4] // src[4-7]
+ vpmulld xmm3, xmm3, xmmword ptr kHashMul0
+ vpmovzxbd xmm2, [eax + 8] // src[8-11]
+ vpmulld xmm4, xmm4, xmmword ptr kHashMul1
+ vpmovzxbd xmm1, [eax + 12] // src[12-15]
+ vpmulld xmm2, xmm2, xmmword ptr kHashMul2
+ lea eax, [eax + 16]
+ vpmulld xmm1, xmm1, xmmword ptr kHashMul3
+ vpaddd xmm3, xmm3, xmm4 // add 16 results
+ vpaddd xmm1, xmm1, xmm2
+ vpaddd xmm1, xmm1, xmm3
+ vpshufd xmm2, xmm1, 0x0e // upper 2 dwords
+ vpaddd xmm1, xmm1,xmm2
+ vpshufd xmm2, xmm1, 0x01
+ vpaddd xmm1, xmm1, xmm2
+ vpaddd xmm0, xmm0, xmm1
+ sub ecx, 16
+ jg wloop
+
+ vmovd eax, xmm0 // return hash
+ vzeroupper
+ ret
+ }
+}
+#endif // _MSC_VER >= 1700
+
+#endif // !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86)
+
+#ifdef __cplusplus
+} // extern "C"
+} // namespace libyuv
+#endif
diff --git a/media/libvpx/libvpx/third_party/libyuv/source/convert.cc b/media/libvpx/libvpx/third_party/libyuv/source/convert.cc
new file mode 100644
index 0000000000..375cc732c1
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/libyuv/source/convert.cc
@@ -0,0 +1,1740 @@
+/*
+ * Copyright 2011 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/convert.h"
+
+#include "libyuv/basic_types.h"
+#include "libyuv/cpu_id.h"
+#include "libyuv/planar_functions.h"
+#include "libyuv/rotate.h"
+#include "libyuv/row.h"
+#include "libyuv/scale.h" // For ScalePlane()
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+#define SUBSAMPLE(v, a, s) (v < 0) ? (-((-v + a) >> s)) : ((v + a) >> s)
+static __inline int Abs(int v) {
+ return v >= 0 ? v : -v;
+}
+
+// Any I4xx To I420 format with mirroring.
+static int I4xxToI420(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int src_y_width,
+ int src_y_height,
+ int src_uv_width,
+ int src_uv_height) {
+ const int dst_y_width = Abs(src_y_width);
+ const int dst_y_height = Abs(src_y_height);
+ const int dst_uv_width = SUBSAMPLE(dst_y_width, 1, 1);
+ const int dst_uv_height = SUBSAMPLE(dst_y_height, 1, 1);
+ if (src_uv_width == 0 || src_uv_height == 0) {
+ return -1;
+ }
+ if (dst_y) {
+ ScalePlane(src_y, src_stride_y, src_y_width, src_y_height, dst_y,
+ dst_stride_y, dst_y_width, dst_y_height, kFilterBilinear);
+ }
+ ScalePlane(src_u, src_stride_u, src_uv_width, src_uv_height, dst_u,
+ dst_stride_u, dst_uv_width, dst_uv_height, kFilterBilinear);
+ ScalePlane(src_v, src_stride_v, src_uv_width, src_uv_height, dst_v,
+ dst_stride_v, dst_uv_width, dst_uv_height, kFilterBilinear);
+ return 0;
+}
+
+// Copy I420 with optional flipping.
+// TODO(fbarchard): Use Scale plane which supports mirroring, but ensure
+// is does row coalescing.
+LIBYUV_API
+int I420Copy(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height) {
+ int halfwidth = (width + 1) >> 1;
+ int halfheight = (height + 1) >> 1;
+ if (!src_u || !src_v || !dst_u || !dst_v || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ halfheight = (height + 1) >> 1;
+ src_y = src_y + (height - 1) * src_stride_y;
+ src_u = src_u + (halfheight - 1) * src_stride_u;
+ src_v = src_v + (halfheight - 1) * src_stride_v;
+ src_stride_y = -src_stride_y;
+ src_stride_u = -src_stride_u;
+ src_stride_v = -src_stride_v;
+ }
+
+ if (dst_y) {
+ CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
+ }
+ // Copy UV planes.
+ CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, halfheight);
+ CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, halfheight);
+ return 0;
+}
+
+// Copy I010 with optional flipping.
+LIBYUV_API
+int I010Copy(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint16_t* dst_y,
+ int dst_stride_y,
+ uint16_t* dst_u,
+ int dst_stride_u,
+ uint16_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height) {
+ int halfwidth = (width + 1) >> 1;
+ int halfheight = (height + 1) >> 1;
+ if (!src_u || !src_v || !dst_u || !dst_v || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ halfheight = (height + 1) >> 1;
+ src_y = src_y + (height - 1) * src_stride_y;
+ src_u = src_u + (halfheight - 1) * src_stride_u;
+ src_v = src_v + (halfheight - 1) * src_stride_v;
+ src_stride_y = -src_stride_y;
+ src_stride_u = -src_stride_u;
+ src_stride_v = -src_stride_v;
+ }
+
+ if (dst_y) {
+ CopyPlane_16(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
+ }
+ // Copy UV planes.
+ CopyPlane_16(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, halfheight);
+ CopyPlane_16(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, halfheight);
+ return 0;
+}
+
+// Convert 10 bit YUV to 8 bit.
+LIBYUV_API
+int I010ToI420(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height) {
+ int halfwidth = (width + 1) >> 1;
+ int halfheight = (height + 1) >> 1;
+ if (!src_u || !src_v || !dst_u || !dst_v || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ halfheight = (height + 1) >> 1;
+ src_y = src_y + (height - 1) * src_stride_y;
+ src_u = src_u + (halfheight - 1) * src_stride_u;
+ src_v = src_v + (halfheight - 1) * src_stride_v;
+ src_stride_y = -src_stride_y;
+ src_stride_u = -src_stride_u;
+ src_stride_v = -src_stride_v;
+ }
+
+ // Convert Y plane.
+ Convert16To8Plane(src_y, src_stride_y, dst_y, dst_stride_y, 16384, width,
+ height);
+ // Convert UV planes.
+ Convert16To8Plane(src_u, src_stride_u, dst_u, dst_stride_u, 16384, halfwidth,
+ halfheight);
+ Convert16To8Plane(src_v, src_stride_v, dst_v, dst_stride_v, 16384, halfwidth,
+ halfheight);
+ return 0;
+}
+
+// 422 chroma is 1/2 width, 1x height
+// 420 chroma is 1/2 width, 1/2 height
+LIBYUV_API
+int I422ToI420(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height) {
+ const int src_uv_width = SUBSAMPLE(width, 1, 1);
+ return I4xxToI420(src_y, src_stride_y, src_u, src_stride_u, src_v,
+ src_stride_v, dst_y, dst_stride_y, dst_u, dst_stride_u,
+ dst_v, dst_stride_v, width, height, src_uv_width, height);
+}
+
+// 444 chroma is 1x width, 1x height
+// 420 chroma is 1/2 width, 1/2 height
+LIBYUV_API
+int I444ToI420(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height) {
+ return I4xxToI420(src_y, src_stride_y, src_u, src_stride_u, src_v,
+ src_stride_v, dst_y, dst_stride_y, dst_u, dst_stride_u,
+ dst_v, dst_stride_v, width, height, width, height);
+}
+
+// I400 is greyscale typically used in MJPG
+LIBYUV_API
+int I400ToI420(const uint8_t* src_y,
+ int src_stride_y,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height) {
+ int halfwidth = (width + 1) >> 1;
+ int halfheight = (height + 1) >> 1;
+ if (!dst_u || !dst_v || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ halfheight = (height + 1) >> 1;
+ src_y = src_y + (height - 1) * src_stride_y;
+ src_stride_y = -src_stride_y;
+ }
+ if (dst_y) {
+ CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
+ }
+ SetPlane(dst_u, dst_stride_u, halfwidth, halfheight, 128);
+ SetPlane(dst_v, dst_stride_v, halfwidth, halfheight, 128);
+ return 0;
+}
+
+static void CopyPlane2(const uint8_t* src,
+ int src_stride_0,
+ int src_stride_1,
+ uint8_t* dst,
+ int dst_stride,
+ int width,
+ int height) {
+ int y;
+ void (*CopyRow)(const uint8_t* src, uint8_t* dst, int width) = CopyRow_C;
+#if defined(HAS_COPYROW_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ CopyRow = IS_ALIGNED(width, 32) ? CopyRow_SSE2 : CopyRow_Any_SSE2;
+ }
+#endif
+#if defined(HAS_COPYROW_AVX)
+ if (TestCpuFlag(kCpuHasAVX)) {
+ CopyRow = IS_ALIGNED(width, 64) ? CopyRow_AVX : CopyRow_Any_AVX;
+ }
+#endif
+#if defined(HAS_COPYROW_ERMS)
+ if (TestCpuFlag(kCpuHasERMS)) {
+ CopyRow = CopyRow_ERMS;
+ }
+#endif
+#if defined(HAS_COPYROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ CopyRow = IS_ALIGNED(width, 32) ? CopyRow_NEON : CopyRow_Any_NEON;
+ }
+#endif
+
+ // Copy plane
+ for (y = 0; y < height - 1; y += 2) {
+ CopyRow(src, dst, width);
+ CopyRow(src + src_stride_0, dst + dst_stride, width);
+ src += src_stride_0 + src_stride_1;
+ dst += dst_stride * 2;
+ }
+ if (height & 1) {
+ CopyRow(src, dst, width);
+ }
+}
+
+// Support converting from FOURCC_M420
+// Useful for bandwidth constrained transports like USB 1.0 and 2.0 and for
+// easy conversion to I420.
+// M420 format description:
+// M420 is row biplanar 420: 2 rows of Y and 1 row of UV.
+// Chroma is half width / half height. (420)
+// src_stride_m420 is row planar. Normally this will be the width in pixels.
+// The UV plane is half width, but 2 values, so src_stride_m420 applies to
+// this as well as the two Y planes.
+static int X420ToI420(const uint8_t* src_y,
+ int src_stride_y0,
+ int src_stride_y1,
+ const uint8_t* src_uv,
+ int src_stride_uv,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height) {
+ int halfwidth = (width + 1) >> 1;
+ int halfheight = (height + 1) >> 1;
+ if (!src_uv || !dst_u || !dst_v || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ halfheight = (height + 1) >> 1;
+ if (dst_y) {
+ dst_y = dst_y + (height - 1) * dst_stride_y;
+ }
+ dst_u = dst_u + (halfheight - 1) * dst_stride_u;
+ dst_v = dst_v + (halfheight - 1) * dst_stride_v;
+ dst_stride_y = -dst_stride_y;
+ dst_stride_u = -dst_stride_u;
+ dst_stride_v = -dst_stride_v;
+ }
+ // Coalesce rows.
+ if (src_stride_y0 == width && src_stride_y1 == width &&
+ dst_stride_y == width) {
+ width *= height;
+ height = 1;
+ src_stride_y0 = src_stride_y1 = dst_stride_y = 0;
+ }
+ // Coalesce rows.
+ if (src_stride_uv == halfwidth * 2 && dst_stride_u == halfwidth &&
+ dst_stride_v == halfwidth) {
+ halfwidth *= halfheight;
+ halfheight = 1;
+ src_stride_uv = dst_stride_u = dst_stride_v = 0;
+ }
+
+ if (dst_y) {
+ if (src_stride_y0 == src_stride_y1) {
+ CopyPlane(src_y, src_stride_y0, dst_y, dst_stride_y, width, height);
+ } else {
+ CopyPlane2(src_y, src_stride_y0, src_stride_y1, dst_y, dst_stride_y,
+ width, height);
+ }
+ }
+
+ // Split UV plane - NV12 / NV21
+ SplitUVPlane(src_uv, src_stride_uv, dst_u, dst_stride_u, dst_v, dst_stride_v,
+ halfwidth, halfheight);
+
+ return 0;
+}
+
+// Convert NV12 to I420.
+LIBYUV_API
+int NV12ToI420(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_uv,
+ int src_stride_uv,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height) {
+ return X420ToI420(src_y, src_stride_y, src_stride_y, src_uv, src_stride_uv,
+ dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v,
+ dst_stride_v, width, height);
+}
+
+// Convert NV21 to I420. Same as NV12 but u and v pointers swapped.
+LIBYUV_API
+int NV21ToI420(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_vu,
+ int src_stride_vu,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height) {
+ return X420ToI420(src_y, src_stride_y, src_stride_y, src_vu, src_stride_vu,
+ dst_y, dst_stride_y, dst_v, dst_stride_v, dst_u,
+ dst_stride_u, width, height);
+}
+
+// Convert M420 to I420.
+LIBYUV_API
+int M420ToI420(const uint8_t* src_m420,
+ int src_stride_m420,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height) {
+ return X420ToI420(src_m420, src_stride_m420, src_stride_m420 * 2,
+ src_m420 + src_stride_m420 * 2, src_stride_m420 * 3, dst_y,
+ dst_stride_y, dst_u, dst_stride_u, dst_v, dst_stride_v,
+ width, height);
+}
+
+// Convert YUY2 to I420.
+LIBYUV_API
+int YUY2ToI420(const uint8_t* src_yuy2,
+ int src_stride_yuy2,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height) {
+ int y;
+ void (*YUY2ToUVRow)(const uint8_t* src_yuy2, int src_stride_yuy2,
+ uint8_t* dst_u, uint8_t* dst_v, int width) =
+ YUY2ToUVRow_C;
+ void (*YUY2ToYRow)(const uint8_t* src_yuy2, uint8_t* dst_y, int width) =
+ YUY2ToYRow_C;
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2;
+ src_stride_yuy2 = -src_stride_yuy2;
+ }
+#if defined(HAS_YUY2TOYROW_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ YUY2ToUVRow = YUY2ToUVRow_Any_SSE2;
+ YUY2ToYRow = YUY2ToYRow_Any_SSE2;
+ if (IS_ALIGNED(width, 16)) {
+ YUY2ToUVRow = YUY2ToUVRow_SSE2;
+ YUY2ToYRow = YUY2ToYRow_SSE2;
+ }
+ }
+#endif
+#if defined(HAS_YUY2TOYROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ YUY2ToUVRow = YUY2ToUVRow_Any_AVX2;
+ YUY2ToYRow = YUY2ToYRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ YUY2ToUVRow = YUY2ToUVRow_AVX2;
+ YUY2ToYRow = YUY2ToYRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_YUY2TOYROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ YUY2ToYRow = YUY2ToYRow_Any_NEON;
+ YUY2ToUVRow = YUY2ToUVRow_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ YUY2ToYRow = YUY2ToYRow_NEON;
+ YUY2ToUVRow = YUY2ToUVRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_YUY2TOYROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ YUY2ToYRow = YUY2ToYRow_Any_MSA;
+ YUY2ToUVRow = YUY2ToUVRow_Any_MSA;
+ if (IS_ALIGNED(width, 32)) {
+ YUY2ToYRow = YUY2ToYRow_MSA;
+ YUY2ToUVRow = YUY2ToUVRow_MSA;
+ }
+ }
+#endif
+
+ for (y = 0; y < height - 1; y += 2) {
+ YUY2ToUVRow(src_yuy2, src_stride_yuy2, dst_u, dst_v, width);
+ YUY2ToYRow(src_yuy2, dst_y, width);
+ YUY2ToYRow(src_yuy2 + src_stride_yuy2, dst_y + dst_stride_y, width);
+ src_yuy2 += src_stride_yuy2 * 2;
+ dst_y += dst_stride_y * 2;
+ dst_u += dst_stride_u;
+ dst_v += dst_stride_v;
+ }
+ if (height & 1) {
+ YUY2ToUVRow(src_yuy2, 0, dst_u, dst_v, width);
+ YUY2ToYRow(src_yuy2, dst_y, width);
+ }
+ return 0;
+}
+
+// Convert UYVY to I420.
+LIBYUV_API
+int UYVYToI420(const uint8_t* src_uyvy,
+ int src_stride_uyvy,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height) {
+ int y;
+ void (*UYVYToUVRow)(const uint8_t* src_uyvy, int src_stride_uyvy,
+ uint8_t* dst_u, uint8_t* dst_v, int width) =
+ UYVYToUVRow_C;
+ void (*UYVYToYRow)(const uint8_t* src_uyvy, uint8_t* dst_y, int width) =
+ UYVYToYRow_C;
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_uyvy = src_uyvy + (height - 1) * src_stride_uyvy;
+ src_stride_uyvy = -src_stride_uyvy;
+ }
+#if defined(HAS_UYVYTOYROW_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ UYVYToUVRow = UYVYToUVRow_Any_SSE2;
+ UYVYToYRow = UYVYToYRow_Any_SSE2;
+ if (IS_ALIGNED(width, 16)) {
+ UYVYToUVRow = UYVYToUVRow_SSE2;
+ UYVYToYRow = UYVYToYRow_SSE2;
+ }
+ }
+#endif
+#if defined(HAS_UYVYTOYROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ UYVYToUVRow = UYVYToUVRow_Any_AVX2;
+ UYVYToYRow = UYVYToYRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ UYVYToUVRow = UYVYToUVRow_AVX2;
+ UYVYToYRow = UYVYToYRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_UYVYTOYROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ UYVYToYRow = UYVYToYRow_Any_NEON;
+ UYVYToUVRow = UYVYToUVRow_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ UYVYToYRow = UYVYToYRow_NEON;
+ UYVYToUVRow = UYVYToUVRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_UYVYTOYROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ UYVYToYRow = UYVYToYRow_Any_MSA;
+ UYVYToUVRow = UYVYToUVRow_Any_MSA;
+ if (IS_ALIGNED(width, 32)) {
+ UYVYToYRow = UYVYToYRow_MSA;
+ UYVYToUVRow = UYVYToUVRow_MSA;
+ }
+ }
+#endif
+
+ for (y = 0; y < height - 1; y += 2) {
+ UYVYToUVRow(src_uyvy, src_stride_uyvy, dst_u, dst_v, width);
+ UYVYToYRow(src_uyvy, dst_y, width);
+ UYVYToYRow(src_uyvy + src_stride_uyvy, dst_y + dst_stride_y, width);
+ src_uyvy += src_stride_uyvy * 2;
+ dst_y += dst_stride_y * 2;
+ dst_u += dst_stride_u;
+ dst_v += dst_stride_v;
+ }
+ if (height & 1) {
+ UYVYToUVRow(src_uyvy, 0, dst_u, dst_v, width);
+ UYVYToYRow(src_uyvy, dst_y, width);
+ }
+ return 0;
+}
+
+// Convert ARGB to I420.
+LIBYUV_API
+int ARGBToI420(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height) {
+ int y;
+ void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb,
+ uint8_t* dst_u, uint8_t* dst_v, int width) =
+ ARGBToUVRow_C;
+ void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
+ ARGBToYRow_C;
+ if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_argb = src_argb + (height - 1) * src_stride_argb;
+ src_stride_argb = -src_stride_argb;
+ }
+#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
+ ARGBToYRow = ARGBToYRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToUVRow = ARGBToUVRow_SSSE3;
+ ARGBToYRow = ARGBToYRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBToUVRow = ARGBToUVRow_Any_AVX2;
+ ARGBToYRow = ARGBToYRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ ARGBToUVRow = ARGBToUVRow_AVX2;
+ ARGBToYRow = ARGBToYRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ARGBToYRow = ARGBToYRow_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBToYRow = ARGBToYRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOUVROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ARGBToUVRow = ARGBToUVRow_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToUVRow = ARGBToUVRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ ARGBToYRow = ARGBToYRow_Any_MSA;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToYRow = ARGBToYRow_MSA;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOUVROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ ARGBToUVRow = ARGBToUVRow_Any_MSA;
+ if (IS_ALIGNED(width, 32)) {
+ ARGBToUVRow = ARGBToUVRow_MSA;
+ }
+ }
+#endif
+
+ for (y = 0; y < height - 1; y += 2) {
+ ARGBToUVRow(src_argb, src_stride_argb, dst_u, dst_v, width);
+ ARGBToYRow(src_argb, dst_y, width);
+ ARGBToYRow(src_argb + src_stride_argb, dst_y + dst_stride_y, width);
+ src_argb += src_stride_argb * 2;
+ dst_y += dst_stride_y * 2;
+ dst_u += dst_stride_u;
+ dst_v += dst_stride_v;
+ }
+ if (height & 1) {
+ ARGBToUVRow(src_argb, 0, dst_u, dst_v, width);
+ ARGBToYRow(src_argb, dst_y, width);
+ }
+ return 0;
+}
+
+// Convert BGRA to I420.
+LIBYUV_API
+int BGRAToI420(const uint8_t* src_bgra,
+ int src_stride_bgra,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height) {
+ int y;
+ void (*BGRAToUVRow)(const uint8_t* src_bgra0, int src_stride_bgra,
+ uint8_t* dst_u, uint8_t* dst_v, int width) =
+ BGRAToUVRow_C;
+ void (*BGRAToYRow)(const uint8_t* src_bgra, uint8_t* dst_y, int width) =
+ BGRAToYRow_C;
+ if (!src_bgra || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_bgra = src_bgra + (height - 1) * src_stride_bgra;
+ src_stride_bgra = -src_stride_bgra;
+ }
+#if defined(HAS_BGRATOYROW_SSSE3) && defined(HAS_BGRATOUVROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ BGRAToUVRow = BGRAToUVRow_Any_SSSE3;
+ BGRAToYRow = BGRAToYRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ BGRAToUVRow = BGRAToUVRow_SSSE3;
+ BGRAToYRow = BGRAToYRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_BGRATOYROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ BGRAToYRow = BGRAToYRow_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ BGRAToYRow = BGRAToYRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_BGRATOUVROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ BGRAToUVRow = BGRAToUVRow_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ BGRAToUVRow = BGRAToUVRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_BGRATOYROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ BGRAToYRow = BGRAToYRow_Any_MSA;
+ if (IS_ALIGNED(width, 16)) {
+ BGRAToYRow = BGRAToYRow_MSA;
+ }
+ }
+#endif
+#if defined(HAS_BGRATOUVROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ BGRAToUVRow = BGRAToUVRow_Any_MSA;
+ if (IS_ALIGNED(width, 16)) {
+ BGRAToUVRow = BGRAToUVRow_MSA;
+ }
+ }
+#endif
+
+ for (y = 0; y < height - 1; y += 2) {
+ BGRAToUVRow(src_bgra, src_stride_bgra, dst_u, dst_v, width);
+ BGRAToYRow(src_bgra, dst_y, width);
+ BGRAToYRow(src_bgra + src_stride_bgra, dst_y + dst_stride_y, width);
+ src_bgra += src_stride_bgra * 2;
+ dst_y += dst_stride_y * 2;
+ dst_u += dst_stride_u;
+ dst_v += dst_stride_v;
+ }
+ if (height & 1) {
+ BGRAToUVRow(src_bgra, 0, dst_u, dst_v, width);
+ BGRAToYRow(src_bgra, dst_y, width);
+ }
+ return 0;
+}
+
+// Convert ABGR to I420.
+LIBYUV_API
+int ABGRToI420(const uint8_t* src_abgr,
+ int src_stride_abgr,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height) {
+ int y;
+ void (*ABGRToUVRow)(const uint8_t* src_abgr0, int src_stride_abgr,
+ uint8_t* dst_u, uint8_t* dst_v, int width) =
+ ABGRToUVRow_C;
+ void (*ABGRToYRow)(const uint8_t* src_abgr, uint8_t* dst_y, int width) =
+ ABGRToYRow_C;
+ if (!src_abgr || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_abgr = src_abgr + (height - 1) * src_stride_abgr;
+ src_stride_abgr = -src_stride_abgr;
+ }
+#if defined(HAS_ABGRTOYROW_SSSE3) && defined(HAS_ABGRTOUVROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ ABGRToUVRow = ABGRToUVRow_Any_SSSE3;
+ ABGRToYRow = ABGRToYRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ ABGRToUVRow = ABGRToUVRow_SSSE3;
+ ABGRToYRow = ABGRToYRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_ABGRTOYROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ABGRToYRow = ABGRToYRow_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ ABGRToYRow = ABGRToYRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_ABGRTOUVROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ABGRToUVRow = ABGRToUVRow_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ ABGRToUVRow = ABGRToUVRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_ABGRTOYROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ ABGRToYRow = ABGRToYRow_Any_MSA;
+ if (IS_ALIGNED(width, 16)) {
+ ABGRToYRow = ABGRToYRow_MSA;
+ }
+ }
+#endif
+#if defined(HAS_ABGRTOUVROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ ABGRToUVRow = ABGRToUVRow_Any_MSA;
+ if (IS_ALIGNED(width, 16)) {
+ ABGRToUVRow = ABGRToUVRow_MSA;
+ }
+ }
+#endif
+
+ for (y = 0; y < height - 1; y += 2) {
+ ABGRToUVRow(src_abgr, src_stride_abgr, dst_u, dst_v, width);
+ ABGRToYRow(src_abgr, dst_y, width);
+ ABGRToYRow(src_abgr + src_stride_abgr, dst_y + dst_stride_y, width);
+ src_abgr += src_stride_abgr * 2;
+ dst_y += dst_stride_y * 2;
+ dst_u += dst_stride_u;
+ dst_v += dst_stride_v;
+ }
+ if (height & 1) {
+ ABGRToUVRow(src_abgr, 0, dst_u, dst_v, width);
+ ABGRToYRow(src_abgr, dst_y, width);
+ }
+ return 0;
+}
+
+// Convert RGBA to I420.
+LIBYUV_API
+int RGBAToI420(const uint8_t* src_rgba,
+ int src_stride_rgba,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height) {
+ int y;
+ void (*RGBAToUVRow)(const uint8_t* src_rgba0, int src_stride_rgba,
+ uint8_t* dst_u, uint8_t* dst_v, int width) =
+ RGBAToUVRow_C;
+ void (*RGBAToYRow)(const uint8_t* src_rgba, uint8_t* dst_y, int width) =
+ RGBAToYRow_C;
+ if (!src_rgba || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_rgba = src_rgba + (height - 1) * src_stride_rgba;
+ src_stride_rgba = -src_stride_rgba;
+ }
+#if defined(HAS_RGBATOYROW_SSSE3) && defined(HAS_RGBATOUVROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ RGBAToUVRow = RGBAToUVRow_Any_SSSE3;
+ RGBAToYRow = RGBAToYRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ RGBAToUVRow = RGBAToUVRow_SSSE3;
+ RGBAToYRow = RGBAToYRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_RGBATOYROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ RGBAToYRow = RGBAToYRow_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ RGBAToYRow = RGBAToYRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_RGBATOUVROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ RGBAToUVRow = RGBAToUVRow_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ RGBAToUVRow = RGBAToUVRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_RGBATOYROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ RGBAToYRow = RGBAToYRow_Any_MSA;
+ if (IS_ALIGNED(width, 16)) {
+ RGBAToYRow = RGBAToYRow_MSA;
+ }
+ }
+#endif
+#if defined(HAS_RGBATOUVROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ RGBAToUVRow = RGBAToUVRow_Any_MSA;
+ if (IS_ALIGNED(width, 16)) {
+ RGBAToUVRow = RGBAToUVRow_MSA;
+ }
+ }
+#endif
+
+ for (y = 0; y < height - 1; y += 2) {
+ RGBAToUVRow(src_rgba, src_stride_rgba, dst_u, dst_v, width);
+ RGBAToYRow(src_rgba, dst_y, width);
+ RGBAToYRow(src_rgba + src_stride_rgba, dst_y + dst_stride_y, width);
+ src_rgba += src_stride_rgba * 2;
+ dst_y += dst_stride_y * 2;
+ dst_u += dst_stride_u;
+ dst_v += dst_stride_v;
+ }
+ if (height & 1) {
+ RGBAToUVRow(src_rgba, 0, dst_u, dst_v, width);
+ RGBAToYRow(src_rgba, dst_y, width);
+ }
+ return 0;
+}
+
+// Convert RGB24 to I420.
+LIBYUV_API
+int RGB24ToI420(const uint8_t* src_rgb24,
+ int src_stride_rgb24,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height) {
+ int y;
+#if (defined(HAS_RGB24TOYROW_NEON) || defined(HAS_RGB24TOYROW_MSA))
+ void (*RGB24ToUVRow)(const uint8_t* src_rgb24, int src_stride_rgb24,
+ uint8_t* dst_u, uint8_t* dst_v, int width) =
+ RGB24ToUVRow_C;
+ void (*RGB24ToYRow)(const uint8_t* src_rgb24, uint8_t* dst_y, int width) =
+ RGB24ToYRow_C;
+#else
+ void (*RGB24ToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) =
+ RGB24ToARGBRow_C;
+ void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb,
+ uint8_t* dst_u, uint8_t* dst_v, int width) =
+ ARGBToUVRow_C;
+ void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
+ ARGBToYRow_C;
+#endif
+ if (!src_rgb24 || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_rgb24 = src_rgb24 + (height - 1) * src_stride_rgb24;
+ src_stride_rgb24 = -src_stride_rgb24;
+ }
+
+// Neon version does direct RGB24 to YUV.
+#if defined(HAS_RGB24TOYROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ RGB24ToUVRow = RGB24ToUVRow_Any_NEON;
+ RGB24ToYRow = RGB24ToYRow_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ RGB24ToYRow = RGB24ToYRow_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ RGB24ToUVRow = RGB24ToUVRow_NEON;
+ }
+ }
+ }
+#elif defined(HAS_RGB24TOYROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ RGB24ToUVRow = RGB24ToUVRow_Any_MSA;
+ RGB24ToYRow = RGB24ToYRow_Any_MSA;
+ if (IS_ALIGNED(width, 16)) {
+ RGB24ToYRow = RGB24ToYRow_MSA;
+ RGB24ToUVRow = RGB24ToUVRow_MSA;
+ }
+ }
+// Other platforms do intermediate conversion from RGB24 to ARGB.
+#else
+#if defined(HAS_RGB24TOARGBROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ RGB24ToARGBRow = RGB24ToARGBRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ RGB24ToARGBRow = RGB24ToARGBRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
+ ARGBToYRow = ARGBToYRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToUVRow = ARGBToUVRow_SSSE3;
+ ARGBToYRow = ARGBToYRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBToUVRow = ARGBToUVRow_Any_AVX2;
+ ARGBToYRow = ARGBToYRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ ARGBToUVRow = ARGBToUVRow_AVX2;
+ ARGBToYRow = ARGBToYRow_AVX2;
+ }
+ }
+#endif
+#endif
+
+ {
+#if !(defined(HAS_RGB24TOYROW_NEON) || defined(HAS_RGB24TOYROW_MSA))
+ // Allocate 2 rows of ARGB.
+ const int kRowSize = (width * 4 + 31) & ~31;
+ align_buffer_64(row, kRowSize * 2);
+#endif
+
+ for (y = 0; y < height - 1; y += 2) {
+#if (defined(HAS_RGB24TOYROW_NEON) || defined(HAS_RGB24TOYROW_MSA))
+ RGB24ToUVRow(src_rgb24, src_stride_rgb24, dst_u, dst_v, width);
+ RGB24ToYRow(src_rgb24, dst_y, width);
+ RGB24ToYRow(src_rgb24 + src_stride_rgb24, dst_y + dst_stride_y, width);
+#else
+ RGB24ToARGBRow(src_rgb24, row, width);
+ RGB24ToARGBRow(src_rgb24 + src_stride_rgb24, row + kRowSize, width);
+ ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
+ ARGBToYRow(row, dst_y, width);
+ ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
+#endif
+ src_rgb24 += src_stride_rgb24 * 2;
+ dst_y += dst_stride_y * 2;
+ dst_u += dst_stride_u;
+ dst_v += dst_stride_v;
+ }
+ if (height & 1) {
+#if (defined(HAS_RGB24TOYROW_NEON) || defined(HAS_RGB24TOYROW_MSA))
+ RGB24ToUVRow(src_rgb24, 0, dst_u, dst_v, width);
+ RGB24ToYRow(src_rgb24, dst_y, width);
+#else
+ RGB24ToARGBRow(src_rgb24, row, width);
+ ARGBToUVRow(row, 0, dst_u, dst_v, width);
+ ARGBToYRow(row, dst_y, width);
+#endif
+ }
+#if !(defined(HAS_RGB24TOYROW_NEON) || defined(HAS_RGB24TOYROW_MSA))
+ free_aligned_buffer_64(row);
+#endif
+ }
+ return 0;
+}
+
+// Convert RAW to I420.
+LIBYUV_API
+int RAWToI420(const uint8_t* src_raw,
+ int src_stride_raw,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height) {
+ int y;
+#if (defined(HAS_RAWTOYROW_NEON) || defined(HAS_RAWTOYROW_MSA))
+ void (*RAWToUVRow)(const uint8_t* src_raw, int src_stride_raw, uint8_t* dst_u,
+ uint8_t* dst_v, int width) = RAWToUVRow_C;
+ void (*RAWToYRow)(const uint8_t* src_raw, uint8_t* dst_y, int width) =
+ RAWToYRow_C;
+#else
+ void (*RAWToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) =
+ RAWToARGBRow_C;
+ void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb,
+ uint8_t* dst_u, uint8_t* dst_v, int width) =
+ ARGBToUVRow_C;
+ void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
+ ARGBToYRow_C;
+#endif
+ if (!src_raw || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_raw = src_raw + (height - 1) * src_stride_raw;
+ src_stride_raw = -src_stride_raw;
+ }
+
+// Neon version does direct RAW to YUV.
+#if defined(HAS_RAWTOYROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ RAWToUVRow = RAWToUVRow_Any_NEON;
+ RAWToYRow = RAWToYRow_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ RAWToYRow = RAWToYRow_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ RAWToUVRow = RAWToUVRow_NEON;
+ }
+ }
+ }
+#elif defined(HAS_RAWTOYROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ RAWToUVRow = RAWToUVRow_Any_MSA;
+ RAWToYRow = RAWToYRow_Any_MSA;
+ if (IS_ALIGNED(width, 16)) {
+ RAWToYRow = RAWToYRow_MSA;
+ RAWToUVRow = RAWToUVRow_MSA;
+ }
+ }
+// Other platforms do intermediate conversion from RAW to ARGB.
+#else
+#if defined(HAS_RAWTOARGBROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ RAWToARGBRow = RAWToARGBRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ RAWToARGBRow = RAWToARGBRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
+ ARGBToYRow = ARGBToYRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToUVRow = ARGBToUVRow_SSSE3;
+ ARGBToYRow = ARGBToYRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBToUVRow = ARGBToUVRow_Any_AVX2;
+ ARGBToYRow = ARGBToYRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ ARGBToUVRow = ARGBToUVRow_AVX2;
+ ARGBToYRow = ARGBToYRow_AVX2;
+ }
+ }
+#endif
+#endif
+
+ {
+#if !(defined(HAS_RAWTOYROW_NEON) || defined(HAS_RAWTOYROW_MSA))
+ // Allocate 2 rows of ARGB.
+ const int kRowSize = (width * 4 + 31) & ~31;
+ align_buffer_64(row, kRowSize * 2);
+#endif
+
+ for (y = 0; y < height - 1; y += 2) {
+#if (defined(HAS_RAWTOYROW_NEON) || defined(HAS_RAWTOYROW_MSA))
+ RAWToUVRow(src_raw, src_stride_raw, dst_u, dst_v, width);
+ RAWToYRow(src_raw, dst_y, width);
+ RAWToYRow(src_raw + src_stride_raw, dst_y + dst_stride_y, width);
+#else
+ RAWToARGBRow(src_raw, row, width);
+ RAWToARGBRow(src_raw + src_stride_raw, row + kRowSize, width);
+ ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
+ ARGBToYRow(row, dst_y, width);
+ ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
+#endif
+ src_raw += src_stride_raw * 2;
+ dst_y += dst_stride_y * 2;
+ dst_u += dst_stride_u;
+ dst_v += dst_stride_v;
+ }
+ if (height & 1) {
+#if (defined(HAS_RAWTOYROW_NEON) || defined(HAS_RAWTOYROW_MSA))
+ RAWToUVRow(src_raw, 0, dst_u, dst_v, width);
+ RAWToYRow(src_raw, dst_y, width);
+#else
+ RAWToARGBRow(src_raw, row, width);
+ ARGBToUVRow(row, 0, dst_u, dst_v, width);
+ ARGBToYRow(row, dst_y, width);
+#endif
+ }
+#if !(defined(HAS_RAWTOYROW_NEON) || defined(HAS_RAWTOYROW_MSA))
+ free_aligned_buffer_64(row);
+#endif
+ }
+ return 0;
+}
+
+// Convert RGB565 to I420.
+LIBYUV_API
+int RGB565ToI420(const uint8_t* src_rgb565,
+ int src_stride_rgb565,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height) {
+ int y;
+#if (defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA))
+ void (*RGB565ToUVRow)(const uint8_t* src_rgb565, int src_stride_rgb565,
+ uint8_t* dst_u, uint8_t* dst_v, int width) =
+ RGB565ToUVRow_C;
+ void (*RGB565ToYRow)(const uint8_t* src_rgb565, uint8_t* dst_y, int width) =
+ RGB565ToYRow_C;
+#else
+ void (*RGB565ToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb,
+ int width) = RGB565ToARGBRow_C;
+ void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb,
+ uint8_t* dst_u, uint8_t* dst_v, int width) =
+ ARGBToUVRow_C;
+ void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
+ ARGBToYRow_C;
+#endif
+ if (!src_rgb565 || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_rgb565 = src_rgb565 + (height - 1) * src_stride_rgb565;
+ src_stride_rgb565 = -src_stride_rgb565;
+ }
+
+// Neon version does direct RGB565 to YUV.
+#if defined(HAS_RGB565TOYROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ RGB565ToUVRow = RGB565ToUVRow_Any_NEON;
+ RGB565ToYRow = RGB565ToYRow_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ RGB565ToYRow = RGB565ToYRow_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ RGB565ToUVRow = RGB565ToUVRow_NEON;
+ }
+ }
+ }
+#elif defined(HAS_RGB565TOYROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ RGB565ToUVRow = RGB565ToUVRow_Any_MSA;
+ RGB565ToYRow = RGB565ToYRow_Any_MSA;
+ if (IS_ALIGNED(width, 16)) {
+ RGB565ToYRow = RGB565ToYRow_MSA;
+ RGB565ToUVRow = RGB565ToUVRow_MSA;
+ }
+ }
+// Other platforms do intermediate conversion from RGB565 to ARGB.
+#else
+#if defined(HAS_RGB565TOARGBROW_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ RGB565ToARGBRow = RGB565ToARGBRow_Any_SSE2;
+ if (IS_ALIGNED(width, 8)) {
+ RGB565ToARGBRow = RGB565ToARGBRow_SSE2;
+ }
+ }
+#endif
+#if defined(HAS_RGB565TOARGBROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ RGB565ToARGBRow = RGB565ToARGBRow_Any_AVX2;
+ if (IS_ALIGNED(width, 16)) {
+ RGB565ToARGBRow = RGB565ToARGBRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
+ ARGBToYRow = ARGBToYRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToUVRow = ARGBToUVRow_SSSE3;
+ ARGBToYRow = ARGBToYRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBToUVRow = ARGBToUVRow_Any_AVX2;
+ ARGBToYRow = ARGBToYRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ ARGBToUVRow = ARGBToUVRow_AVX2;
+ ARGBToYRow = ARGBToYRow_AVX2;
+ }
+ }
+#endif
+#endif
+ {
+#if !(defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA))
+ // Allocate 2 rows of ARGB.
+ const int kRowSize = (width * 4 + 31) & ~31;
+ align_buffer_64(row, kRowSize * 2);
+#endif
+ for (y = 0; y < height - 1; y += 2) {
+#if (defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA))
+ RGB565ToUVRow(src_rgb565, src_stride_rgb565, dst_u, dst_v, width);
+ RGB565ToYRow(src_rgb565, dst_y, width);
+ RGB565ToYRow(src_rgb565 + src_stride_rgb565, dst_y + dst_stride_y, width);
+#else
+ RGB565ToARGBRow(src_rgb565, row, width);
+ RGB565ToARGBRow(src_rgb565 + src_stride_rgb565, row + kRowSize, width);
+ ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
+ ARGBToYRow(row, dst_y, width);
+ ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
+#endif
+ src_rgb565 += src_stride_rgb565 * 2;
+ dst_y += dst_stride_y * 2;
+ dst_u += dst_stride_u;
+ dst_v += dst_stride_v;
+ }
+ if (height & 1) {
+#if (defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA))
+ RGB565ToUVRow(src_rgb565, 0, dst_u, dst_v, width);
+ RGB565ToYRow(src_rgb565, dst_y, width);
+#else
+ RGB565ToARGBRow(src_rgb565, row, width);
+ ARGBToUVRow(row, 0, dst_u, dst_v, width);
+ ARGBToYRow(row, dst_y, width);
+#endif
+ }
+#if !(defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA))
+ free_aligned_buffer_64(row);
+#endif
+ }
+ return 0;
+}
+
+// Convert ARGB1555 to I420.
+LIBYUV_API
+int ARGB1555ToI420(const uint8_t* src_argb1555,
+ int src_stride_argb1555,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height) {
+ int y;
+#if (defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_MSA))
+ void (*ARGB1555ToUVRow)(const uint8_t* src_argb1555, int src_stride_argb1555,
+ uint8_t* dst_u, uint8_t* dst_v, int width) =
+ ARGB1555ToUVRow_C;
+ void (*ARGB1555ToYRow)(const uint8_t* src_argb1555, uint8_t* dst_y,
+ int width) = ARGB1555ToYRow_C;
+#else
+ void (*ARGB1555ToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb,
+ int width) = ARGB1555ToARGBRow_C;
+ void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb,
+ uint8_t* dst_u, uint8_t* dst_v, int width) =
+ ARGBToUVRow_C;
+ void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
+ ARGBToYRow_C;
+#endif
+ if (!src_argb1555 || !dst_y || !dst_u || !dst_v || width <= 0 ||
+ height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_argb1555 = src_argb1555 + (height - 1) * src_stride_argb1555;
+ src_stride_argb1555 = -src_stride_argb1555;
+ }
+
+// Neon version does direct ARGB1555 to YUV.
+#if defined(HAS_ARGB1555TOYROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ARGB1555ToUVRow = ARGB1555ToUVRow_Any_NEON;
+ ARGB1555ToYRow = ARGB1555ToYRow_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ ARGB1555ToYRow = ARGB1555ToYRow_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ ARGB1555ToUVRow = ARGB1555ToUVRow_NEON;
+ }
+ }
+ }
+#elif defined(HAS_ARGB1555TOYROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ ARGB1555ToUVRow = ARGB1555ToUVRow_Any_MSA;
+ ARGB1555ToYRow = ARGB1555ToYRow_Any_MSA;
+ if (IS_ALIGNED(width, 16)) {
+ ARGB1555ToYRow = ARGB1555ToYRow_MSA;
+ ARGB1555ToUVRow = ARGB1555ToUVRow_MSA;
+ }
+ }
+// Other platforms do intermediate conversion from ARGB1555 to ARGB.
+#else
+#if defined(HAS_ARGB1555TOARGBROW_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_SSE2;
+ if (IS_ALIGNED(width, 8)) {
+ ARGB1555ToARGBRow = ARGB1555ToARGBRow_SSE2;
+ }
+ }
+#endif
+#if defined(HAS_ARGB1555TOARGBROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_AVX2;
+ if (IS_ALIGNED(width, 16)) {
+ ARGB1555ToARGBRow = ARGB1555ToARGBRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
+ ARGBToYRow = ARGBToYRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToUVRow = ARGBToUVRow_SSSE3;
+ ARGBToYRow = ARGBToYRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBToUVRow = ARGBToUVRow_Any_AVX2;
+ ARGBToYRow = ARGBToYRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ ARGBToUVRow = ARGBToUVRow_AVX2;
+ ARGBToYRow = ARGBToYRow_AVX2;
+ }
+ }
+#endif
+#endif
+ {
+#if !(defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_MSA))
+ // Allocate 2 rows of ARGB.
+ const int kRowSize = (width * 4 + 31) & ~31;
+ align_buffer_64(row, kRowSize * 2);
+#endif
+
+ for (y = 0; y < height - 1; y += 2) {
+#if (defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_MSA))
+ ARGB1555ToUVRow(src_argb1555, src_stride_argb1555, dst_u, dst_v, width);
+ ARGB1555ToYRow(src_argb1555, dst_y, width);
+ ARGB1555ToYRow(src_argb1555 + src_stride_argb1555, dst_y + dst_stride_y,
+ width);
+#else
+ ARGB1555ToARGBRow(src_argb1555, row, width);
+ ARGB1555ToARGBRow(src_argb1555 + src_stride_argb1555, row + kRowSize,
+ width);
+ ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
+ ARGBToYRow(row, dst_y, width);
+ ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
+#endif
+ src_argb1555 += src_stride_argb1555 * 2;
+ dst_y += dst_stride_y * 2;
+ dst_u += dst_stride_u;
+ dst_v += dst_stride_v;
+ }
+ if (height & 1) {
+#if (defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_MSA))
+ ARGB1555ToUVRow(src_argb1555, 0, dst_u, dst_v, width);
+ ARGB1555ToYRow(src_argb1555, dst_y, width);
+#else
+ ARGB1555ToARGBRow(src_argb1555, row, width);
+ ARGBToUVRow(row, 0, dst_u, dst_v, width);
+ ARGBToYRow(row, dst_y, width);
+#endif
+ }
+#if !(defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_MSA))
+ free_aligned_buffer_64(row);
+#endif
+ }
+ return 0;
+}
+
+// Convert ARGB4444 to I420.
+LIBYUV_API
+int ARGB4444ToI420(const uint8_t* src_argb4444,
+ int src_stride_argb4444,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height) {
+ int y;
+#if defined(HAS_ARGB4444TOYROW_NEON)
+ void (*ARGB4444ToUVRow)(const uint8_t* src_argb4444, int src_stride_argb4444,
+ uint8_t* dst_u, uint8_t* dst_v, int width) =
+ ARGB4444ToUVRow_C;
+ void (*ARGB4444ToYRow)(const uint8_t* src_argb4444, uint8_t* dst_y,
+ int width) = ARGB4444ToYRow_C;
+#else
+ void (*ARGB4444ToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb,
+ int width) = ARGB4444ToARGBRow_C;
+ void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb,
+ uint8_t* dst_u, uint8_t* dst_v, int width) =
+ ARGBToUVRow_C;
+ void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
+ ARGBToYRow_C;
+#endif
+ if (!src_argb4444 || !dst_y || !dst_u || !dst_v || width <= 0 ||
+ height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_argb4444 = src_argb4444 + (height - 1) * src_stride_argb4444;
+ src_stride_argb4444 = -src_stride_argb4444;
+ }
+
+// Neon version does direct ARGB4444 to YUV.
+#if defined(HAS_ARGB4444TOYROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ARGB4444ToUVRow = ARGB4444ToUVRow_Any_NEON;
+ ARGB4444ToYRow = ARGB4444ToYRow_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ ARGB4444ToYRow = ARGB4444ToYRow_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ ARGB4444ToUVRow = ARGB4444ToUVRow_NEON;
+ }
+ }
+ }
+// Other platforms do intermediate conversion from ARGB4444 to ARGB.
+#else
+#if defined(HAS_ARGB4444TOARGBROW_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_SSE2;
+ if (IS_ALIGNED(width, 8)) {
+ ARGB4444ToARGBRow = ARGB4444ToARGBRow_SSE2;
+ }
+ }
+#endif
+#if defined(HAS_ARGB4444TOARGBROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_AVX2;
+ if (IS_ALIGNED(width, 16)) {
+ ARGB4444ToARGBRow = ARGB4444ToARGBRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_ARGB4444TOARGBROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_MSA;
+ if (IS_ALIGNED(width, 16)) {
+ ARGB4444ToARGBRow = ARGB4444ToARGBRow_MSA;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
+ ARGBToYRow = ARGBToYRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToUVRow = ARGBToUVRow_SSSE3;
+ ARGBToYRow = ARGBToYRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBToUVRow = ARGBToUVRow_Any_AVX2;
+ ARGBToYRow = ARGBToYRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ ARGBToUVRow = ARGBToUVRow_AVX2;
+ ARGBToYRow = ARGBToYRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ ARGBToUVRow = ARGBToUVRow_Any_MSA;
+ ARGBToYRow = ARGBToYRow_Any_MSA;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToYRow = ARGBToYRow_MSA;
+ if (IS_ALIGNED(width, 32)) {
+ ARGBToUVRow = ARGBToUVRow_MSA;
+ }
+ }
+ }
+#endif
+#endif
+
+ {
+#if !defined(HAS_ARGB4444TOYROW_NEON)
+ // Allocate 2 rows of ARGB.
+ const int kRowSize = (width * 4 + 31) & ~31;
+ align_buffer_64(row, kRowSize * 2);
+#endif
+
+ for (y = 0; y < height - 1; y += 2) {
+#if defined(HAS_ARGB4444TOYROW_NEON)
+ ARGB4444ToUVRow(src_argb4444, src_stride_argb4444, dst_u, dst_v, width);
+ ARGB4444ToYRow(src_argb4444, dst_y, width);
+ ARGB4444ToYRow(src_argb4444 + src_stride_argb4444, dst_y + dst_stride_y,
+ width);
+#else
+ ARGB4444ToARGBRow(src_argb4444, row, width);
+ ARGB4444ToARGBRow(src_argb4444 + src_stride_argb4444, row + kRowSize,
+ width);
+ ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
+ ARGBToYRow(row, dst_y, width);
+ ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
+#endif
+ src_argb4444 += src_stride_argb4444 * 2;
+ dst_y += dst_stride_y * 2;
+ dst_u += dst_stride_u;
+ dst_v += dst_stride_v;
+ }
+ if (height & 1) {
+#if defined(HAS_ARGB4444TOYROW_NEON)
+ ARGB4444ToUVRow(src_argb4444, 0, dst_u, dst_v, width);
+ ARGB4444ToYRow(src_argb4444, dst_y, width);
+#else
+ ARGB4444ToARGBRow(src_argb4444, row, width);
+ ARGBToUVRow(row, 0, dst_u, dst_v, width);
+ ARGBToYRow(row, dst_y, width);
+#endif
+ }
+#if !defined(HAS_ARGB4444TOYROW_NEON)
+ free_aligned_buffer_64(row);
+#endif
+ }
+ return 0;
+}
+
+static void SplitPixels(const uint8_t* src_u,
+ int src_pixel_stride_uv,
+ uint8_t* dst_u,
+ int width) {
+ int i;
+ for (i = 0; i < width; ++i) {
+ *dst_u = *src_u;
+ ++dst_u;
+ src_u += src_pixel_stride_uv;
+ }
+}
+
+// Convert Android420 to I420.
+LIBYUV_API
+int Android420ToI420(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ int src_pixel_stride_uv,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height) {
+ int y;
+ const ptrdiff_t vu_off = src_v - src_u;
+ int halfwidth = (width + 1) >> 1;
+ int halfheight = (height + 1) >> 1;
+ if (!src_u || !src_v || !dst_u || !dst_v || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ halfheight = (height + 1) >> 1;
+ src_y = src_y + (height - 1) * src_stride_y;
+ src_u = src_u + (halfheight - 1) * src_stride_u;
+ src_v = src_v + (halfheight - 1) * src_stride_v;
+ src_stride_y = -src_stride_y;
+ src_stride_u = -src_stride_u;
+ src_stride_v = -src_stride_v;
+ }
+
+ if (dst_y) {
+ CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
+ }
+
+ // Copy UV planes as is - I420
+ if (src_pixel_stride_uv == 1) {
+ CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, halfheight);
+ CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, halfheight);
+ return 0;
+ // Split UV planes - NV21
+ }
+ if (src_pixel_stride_uv == 2 && vu_off == -1 &&
+ src_stride_u == src_stride_v) {
+ SplitUVPlane(src_v, src_stride_v, dst_v, dst_stride_v, dst_u, dst_stride_u,
+ halfwidth, halfheight);
+ return 0;
+ // Split UV planes - NV12
+ }
+ if (src_pixel_stride_uv == 2 && vu_off == 1 && src_stride_u == src_stride_v) {
+ SplitUVPlane(src_u, src_stride_u, dst_u, dst_stride_u, dst_v, dst_stride_v,
+ halfwidth, halfheight);
+ return 0;
+ }
+
+ for (y = 0; y < halfheight; ++y) {
+ SplitPixels(src_u, src_pixel_stride_uv, dst_u, halfwidth);
+ SplitPixels(src_v, src_pixel_stride_uv, dst_v, halfwidth);
+ src_u += src_stride_u;
+ src_v += src_stride_v;
+ dst_u += dst_stride_u;
+ dst_v += dst_stride_v;
+ }
+ return 0;
+}
+
+#ifdef __cplusplus
+} // extern "C"
+} // namespace libyuv
+#endif
diff --git a/media/libvpx/libvpx/third_party/libyuv/source/convert_argb.cc b/media/libvpx/libvpx/third_party/libyuv/source/convert_argb.cc
new file mode 100644
index 0000000000..f2fe474f70
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/libyuv/source/convert_argb.cc
@@ -0,0 +1,2231 @@
+/*
+ * Copyright 2011 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/convert_argb.h"
+
+#include "libyuv/cpu_id.h"
+#ifdef HAVE_JPEG
+#include "libyuv/mjpeg_decoder.h"
+#endif
+#include "libyuv/planar_functions.h" // For CopyPlane and ARGBShuffle.
+#include "libyuv/rotate_argb.h"
+#include "libyuv/row.h"
+#include "libyuv/video_common.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+// Copy ARGB with optional flipping
+LIBYUV_API
+int ARGBCopy(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height) {
+ if (!src_argb || !dst_argb || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_argb = src_argb + (height - 1) * src_stride_argb;
+ src_stride_argb = -src_stride_argb;
+ }
+
+ CopyPlane(src_argb, src_stride_argb, dst_argb, dst_stride_argb, width * 4,
+ height);
+ return 0;
+}
+
+// Convert I420 to ARGB with matrix
+static int I420ToARGBMatrix(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ const struct YuvConstants* yuvconstants,
+ int width,
+ int height) {
+ int y;
+ void (*I422ToARGBRow)(const uint8_t* y_buf, const uint8_t* u_buf,
+ const uint8_t* v_buf, uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants, int width) =
+ I422ToARGBRow_C;
+ if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ dst_argb = dst_argb + (height - 1) * dst_stride_argb;
+ dst_stride_argb = -dst_stride_argb;
+ }
+#if defined(HAS_I422TOARGBROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 8)) {
+ I422ToARGBRow = I422ToARGBRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_I422TOARGBROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ I422ToARGBRow = I422ToARGBRow_Any_AVX2;
+ if (IS_ALIGNED(width, 16)) {
+ I422ToARGBRow = I422ToARGBRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_I422TOARGBROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ I422ToARGBRow = I422ToARGBRow_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ I422ToARGBRow = I422ToARGBRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_I422TOARGBROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ I422ToARGBRow = I422ToARGBRow_Any_MSA;
+ if (IS_ALIGNED(width, 8)) {
+ I422ToARGBRow = I422ToARGBRow_MSA;
+ }
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ I422ToARGBRow(src_y, src_u, src_v, dst_argb, yuvconstants, width);
+ dst_argb += dst_stride_argb;
+ src_y += src_stride_y;
+ if (y & 1) {
+ src_u += src_stride_u;
+ src_v += src_stride_v;
+ }
+ }
+ return 0;
+}
+
+// Convert I420 to ARGB.
+LIBYUV_API
+int I420ToARGB(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height) {
+ return I420ToARGBMatrix(src_y, src_stride_y, src_u, src_stride_u, src_v,
+ src_stride_v, dst_argb, dst_stride_argb,
+ &kYuvI601Constants, width, height);
+}
+
+// Convert I420 to ABGR.
+LIBYUV_API
+int I420ToABGR(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_abgr,
+ int dst_stride_abgr,
+ int width,
+ int height) {
+ return I420ToARGBMatrix(src_y, src_stride_y, src_v,
+ src_stride_v, // Swap U and V
+ src_u, src_stride_u, dst_abgr, dst_stride_abgr,
+ &kYvuI601Constants, // Use Yvu matrix
+ width, height);
+}
+
+// Convert J420 to ARGB.
+LIBYUV_API
+int J420ToARGB(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height) {
+ return I420ToARGBMatrix(src_y, src_stride_y, src_u, src_stride_u, src_v,
+ src_stride_v, dst_argb, dst_stride_argb,
+ &kYuvJPEGConstants, width, height);
+}
+
+// Convert J420 to ABGR.
+LIBYUV_API
+int J420ToABGR(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_abgr,
+ int dst_stride_abgr,
+ int width,
+ int height) {
+ return I420ToARGBMatrix(src_y, src_stride_y, src_v,
+ src_stride_v, // Swap U and V
+ src_u, src_stride_u, dst_abgr, dst_stride_abgr,
+ &kYvuJPEGConstants, // Use Yvu matrix
+ width, height);
+}
+
+// Convert H420 to ARGB.
+LIBYUV_API
+int H420ToARGB(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height) {
+ return I420ToARGBMatrix(src_y, src_stride_y, src_u, src_stride_u, src_v,
+ src_stride_v, dst_argb, dst_stride_argb,
+ &kYuvH709Constants, width, height);
+}
+
+// Convert H420 to ABGR.
+LIBYUV_API
+int H420ToABGR(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_abgr,
+ int dst_stride_abgr,
+ int width,
+ int height) {
+ return I420ToARGBMatrix(src_y, src_stride_y, src_v,
+ src_stride_v, // Swap U and V
+ src_u, src_stride_u, dst_abgr, dst_stride_abgr,
+ &kYvuH709Constants, // Use Yvu matrix
+ width, height);
+}
+
+// Convert I422 to ARGB with matrix
+static int I422ToARGBMatrix(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ const struct YuvConstants* yuvconstants,
+ int width,
+ int height) {
+ int y;
+ void (*I422ToARGBRow)(const uint8_t* y_buf, const uint8_t* u_buf,
+ const uint8_t* v_buf, uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants, int width) =
+ I422ToARGBRow_C;
+ if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ dst_argb = dst_argb + (height - 1) * dst_stride_argb;
+ dst_stride_argb = -dst_stride_argb;
+ }
+ // Coalesce rows.
+ if (src_stride_y == width && src_stride_u * 2 == width &&
+ src_stride_v * 2 == width && dst_stride_argb == width * 4) {
+ width *= height;
+ height = 1;
+ src_stride_y = src_stride_u = src_stride_v = dst_stride_argb = 0;
+ }
+#if defined(HAS_I422TOARGBROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 8)) {
+ I422ToARGBRow = I422ToARGBRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_I422TOARGBROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ I422ToARGBRow = I422ToARGBRow_Any_AVX2;
+ if (IS_ALIGNED(width, 16)) {
+ I422ToARGBRow = I422ToARGBRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_I422TOARGBROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ I422ToARGBRow = I422ToARGBRow_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ I422ToARGBRow = I422ToARGBRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_I422TOARGBROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ I422ToARGBRow = I422ToARGBRow_Any_MSA;
+ if (IS_ALIGNED(width, 8)) {
+ I422ToARGBRow = I422ToARGBRow_MSA;
+ }
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ I422ToARGBRow(src_y, src_u, src_v, dst_argb, yuvconstants, width);
+ dst_argb += dst_stride_argb;
+ src_y += src_stride_y;
+ src_u += src_stride_u;
+ src_v += src_stride_v;
+ }
+ return 0;
+}
+
+// Convert I422 to ARGB.
+LIBYUV_API
+int I422ToARGB(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height) {
+ return I422ToARGBMatrix(src_y, src_stride_y, src_u, src_stride_u, src_v,
+ src_stride_v, dst_argb, dst_stride_argb,
+ &kYuvI601Constants, width, height);
+}
+
+// Convert I422 to ABGR.
+LIBYUV_API
+int I422ToABGR(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_abgr,
+ int dst_stride_abgr,
+ int width,
+ int height) {
+ return I422ToARGBMatrix(src_y, src_stride_y, src_v,
+ src_stride_v, // Swap U and V
+ src_u, src_stride_u, dst_abgr, dst_stride_abgr,
+ &kYvuI601Constants, // Use Yvu matrix
+ width, height);
+}
+
+// Convert J422 to ARGB.
+LIBYUV_API
+int J422ToARGB(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height) {
+ return I422ToARGBMatrix(src_y, src_stride_y, src_u, src_stride_u, src_v,
+ src_stride_v, dst_argb, dst_stride_argb,
+ &kYuvJPEGConstants, width, height);
+}
+
+// Convert J422 to ABGR.
+LIBYUV_API
+int J422ToABGR(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_abgr,
+ int dst_stride_abgr,
+ int width,
+ int height) {
+ return I422ToARGBMatrix(src_y, src_stride_y, src_v,
+ src_stride_v, // Swap U and V
+ src_u, src_stride_u, dst_abgr, dst_stride_abgr,
+ &kYvuJPEGConstants, // Use Yvu matrix
+ width, height);
+}
+
+// Convert H422 to ARGB.
+LIBYUV_API
+int H422ToARGB(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height) {
+ return I422ToARGBMatrix(src_y, src_stride_y, src_u, src_stride_u, src_v,
+ src_stride_v, dst_argb, dst_stride_argb,
+ &kYuvH709Constants, width, height);
+}
+
+// Convert H422 to ABGR.
+LIBYUV_API
+int H422ToABGR(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_abgr,
+ int dst_stride_abgr,
+ int width,
+ int height) {
+ return I422ToARGBMatrix(src_y, src_stride_y, src_v,
+ src_stride_v, // Swap U and V
+ src_u, src_stride_u, dst_abgr, dst_stride_abgr,
+ &kYvuH709Constants, // Use Yvu matrix
+ width, height);
+}
+
+// Convert 10 bit YUV to ARGB with matrix
+// TODO(fbarchard): Consider passing scale multiplier to I210ToARGB to
+// multiply 10 bit yuv into high bits to allow any number of bits.
+static int I010ToAR30Matrix(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_ar30,
+ int dst_stride_ar30,
+ const struct YuvConstants* yuvconstants,
+ int width,
+ int height) {
+ int y;
+ void (*I210ToAR30Row)(const uint16_t* y_buf, const uint16_t* u_buf,
+ const uint16_t* v_buf, uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants, int width) =
+ I210ToAR30Row_C;
+ if (!src_y || !src_u || !src_v || !dst_ar30 || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ dst_ar30 = dst_ar30 + (height - 1) * dst_stride_ar30;
+ dst_stride_ar30 = -dst_stride_ar30;
+ }
+#if defined(HAS_I210TOAR30ROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ I210ToAR30Row = I210ToAR30Row_Any_SSSE3;
+ if (IS_ALIGNED(width, 8)) {
+ I210ToAR30Row = I210ToAR30Row_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_I210TOAR30ROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ I210ToAR30Row = I210ToAR30Row_Any_AVX2;
+ if (IS_ALIGNED(width, 16)) {
+ I210ToAR30Row = I210ToAR30Row_AVX2;
+ }
+ }
+#endif
+ for (y = 0; y < height; ++y) {
+ I210ToAR30Row(src_y, src_u, src_v, dst_ar30, yuvconstants, width);
+ dst_ar30 += dst_stride_ar30;
+ src_y += src_stride_y;
+ if (y & 1) {
+ src_u += src_stride_u;
+ src_v += src_stride_v;
+ }
+ }
+ return 0;
+}
+
+// Convert I010 to AR30.
+LIBYUV_API
+int I010ToAR30(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_ar30,
+ int dst_stride_ar30,
+ int width,
+ int height) {
+ return I010ToAR30Matrix(src_y, src_stride_y, src_u, src_stride_u, src_v,
+ src_stride_v, dst_ar30, dst_stride_ar30,
+ &kYuvI601Constants, width, height);
+}
+
+// Convert H010 to AR30.
+LIBYUV_API
+int H010ToAR30(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_ar30,
+ int dst_stride_ar30,
+ int width,
+ int height) {
+ return I010ToAR30Matrix(src_y, src_stride_y, src_u, src_stride_u, src_v,
+ src_stride_v, dst_ar30, dst_stride_ar30,
+ &kYuvH709Constants, width, height);
+}
+
+// Convert I010 to AB30.
+LIBYUV_API
+int I010ToAB30(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_ab30,
+ int dst_stride_ab30,
+ int width,
+ int height) {
+ return I010ToAR30Matrix(src_y, src_stride_y, src_v, src_stride_v, src_u,
+ src_stride_u, dst_ab30, dst_stride_ab30,
+ &kYvuI601Constants, width, height);
+}
+
+// Convert H010 to AB30.
+LIBYUV_API
+int H010ToAB30(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_ab30,
+ int dst_stride_ab30,
+ int width,
+ int height) {
+ return I010ToAR30Matrix(src_y, src_stride_y, src_v, src_stride_v, src_u,
+ src_stride_u, dst_ab30, dst_stride_ab30,
+ &kYvuH709Constants, width, height);
+}
+
+// Convert 10 bit YUV to ARGB with matrix
+static int I010ToARGBMatrix(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ const struct YuvConstants* yuvconstants,
+ int width,
+ int height) {
+ int y;
+ void (*I210ToARGBRow)(const uint16_t* y_buf, const uint16_t* u_buf,
+ const uint16_t* v_buf, uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants, int width) =
+ I210ToARGBRow_C;
+ if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ dst_argb = dst_argb + (height - 1) * dst_stride_argb;
+ dst_stride_argb = -dst_stride_argb;
+ }
+#if defined(HAS_I210TOARGBROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ I210ToARGBRow = I210ToARGBRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 8)) {
+ I210ToARGBRow = I210ToARGBRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_I210TOARGBROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ I210ToARGBRow = I210ToARGBRow_Any_AVX2;
+ if (IS_ALIGNED(width, 16)) {
+ I210ToARGBRow = I210ToARGBRow_AVX2;
+ }
+ }
+#endif
+ for (y = 0; y < height; ++y) {
+ I210ToARGBRow(src_y, src_u, src_v, dst_argb, yuvconstants, width);
+ dst_argb += dst_stride_argb;
+ src_y += src_stride_y;
+ if (y & 1) {
+ src_u += src_stride_u;
+ src_v += src_stride_v;
+ }
+ }
+ return 0;
+}
+
+// Convert I010 to ARGB.
+LIBYUV_API
+int I010ToARGB(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height) {
+ return I010ToARGBMatrix(src_y, src_stride_y, src_u, src_stride_u, src_v,
+ src_stride_v, dst_argb, dst_stride_argb,
+ &kYuvI601Constants, width, height);
+}
+
+// Convert I010 to ABGR.
+LIBYUV_API
+int I010ToABGR(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_abgr,
+ int dst_stride_abgr,
+ int width,
+ int height) {
+ return I010ToARGBMatrix(src_y, src_stride_y, src_v,
+ src_stride_v, // Swap U and V
+ src_u, src_stride_u, dst_abgr, dst_stride_abgr,
+ &kYvuI601Constants, // Use Yvu matrix
+ width, height);
+}
+
+// Convert H010 to ARGB.
+LIBYUV_API
+int H010ToARGB(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height) {
+ return I010ToARGBMatrix(src_y, src_stride_y, src_u, src_stride_u, src_v,
+ src_stride_v, dst_argb, dst_stride_argb,
+ &kYuvH709Constants, width, height);
+}
+
+// Convert H010 to ABGR.
+LIBYUV_API
+int H010ToABGR(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_abgr,
+ int dst_stride_abgr,
+ int width,
+ int height) {
+ return I010ToARGBMatrix(src_y, src_stride_y, src_v,
+ src_stride_v, // Swap U and V
+ src_u, src_stride_u, dst_abgr, dst_stride_abgr,
+ &kYvuH709Constants, // Use Yvu matrix
+ width, height);
+}
+
+// Convert I444 to ARGB with matrix
+static int I444ToARGBMatrix(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ const struct YuvConstants* yuvconstants,
+ int width,
+ int height) {
+ int y;
+ void (*I444ToARGBRow)(const uint8_t* y_buf, const uint8_t* u_buf,
+ const uint8_t* v_buf, uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants, int width) =
+ I444ToARGBRow_C;
+ if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ dst_argb = dst_argb + (height - 1) * dst_stride_argb;
+ dst_stride_argb = -dst_stride_argb;
+ }
+ // Coalesce rows.
+ if (src_stride_y == width && src_stride_u == width && src_stride_v == width &&
+ dst_stride_argb == width * 4) {
+ width *= height;
+ height = 1;
+ src_stride_y = src_stride_u = src_stride_v = dst_stride_argb = 0;
+ }
+#if defined(HAS_I444TOARGBROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ I444ToARGBRow = I444ToARGBRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 8)) {
+ I444ToARGBRow = I444ToARGBRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_I444TOARGBROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ I444ToARGBRow = I444ToARGBRow_Any_AVX2;
+ if (IS_ALIGNED(width, 16)) {
+ I444ToARGBRow = I444ToARGBRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_I444TOARGBROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ I444ToARGBRow = I444ToARGBRow_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ I444ToARGBRow = I444ToARGBRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_I444TOARGBROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ I444ToARGBRow = I444ToARGBRow_Any_MSA;
+ if (IS_ALIGNED(width, 8)) {
+ I444ToARGBRow = I444ToARGBRow_MSA;
+ }
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ I444ToARGBRow(src_y, src_u, src_v, dst_argb, yuvconstants, width);
+ dst_argb += dst_stride_argb;
+ src_y += src_stride_y;
+ src_u += src_stride_u;
+ src_v += src_stride_v;
+ }
+ return 0;
+}
+
+// Convert I444 to ARGB.
+LIBYUV_API
+int I444ToARGB(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height) {
+ return I444ToARGBMatrix(src_y, src_stride_y, src_u, src_stride_u, src_v,
+ src_stride_v, dst_argb, dst_stride_argb,
+ &kYuvI601Constants, width, height);
+}
+
+// Convert I444 to ABGR.
+LIBYUV_API
+int I444ToABGR(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_abgr,
+ int dst_stride_abgr,
+ int width,
+ int height) {
+ return I444ToARGBMatrix(src_y, src_stride_y, src_v,
+ src_stride_v, // Swap U and V
+ src_u, src_stride_u, dst_abgr, dst_stride_abgr,
+ &kYvuI601Constants, // Use Yvu matrix
+ width, height);
+}
+
+// Convert J444 to ARGB.
+LIBYUV_API
+int J444ToARGB(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height) {
+ return I444ToARGBMatrix(src_y, src_stride_y, src_u, src_stride_u, src_v,
+ src_stride_v, dst_argb, dst_stride_argb,
+ &kYuvJPEGConstants, width, height);
+}
+
+// Convert I420 with Alpha to preattenuated ARGB.
+static int I420AlphaToARGBMatrix(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ const uint8_t* src_a,
+ int src_stride_a,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ const struct YuvConstants* yuvconstants,
+ int width,
+ int height,
+ int attenuate) {
+ int y;
+ void (*I422AlphaToARGBRow)(const uint8_t* y_buf, const uint8_t* u_buf,
+ const uint8_t* v_buf, const uint8_t* a_buf,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) = I422AlphaToARGBRow_C;
+ void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb,
+ int width) = ARGBAttenuateRow_C;
+ if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ dst_argb = dst_argb + (height - 1) * dst_stride_argb;
+ dst_stride_argb = -dst_stride_argb;
+ }
+#if defined(HAS_I422ALPHATOARGBROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ I422AlphaToARGBRow = I422AlphaToARGBRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 8)) {
+ I422AlphaToARGBRow = I422AlphaToARGBRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_I422ALPHATOARGBROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ I422AlphaToARGBRow = I422AlphaToARGBRow_Any_AVX2;
+ if (IS_ALIGNED(width, 16)) {
+ I422AlphaToARGBRow = I422AlphaToARGBRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_I422ALPHATOARGBROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ I422AlphaToARGBRow = I422AlphaToARGBRow_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ I422AlphaToARGBRow = I422AlphaToARGBRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_I422ALPHATOARGBROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ I422AlphaToARGBRow = I422AlphaToARGBRow_Any_MSA;
+ if (IS_ALIGNED(width, 8)) {
+ I422AlphaToARGBRow = I422AlphaToARGBRow_MSA;
+ }
+ }
+#endif
+#if defined(HAS_ARGBATTENUATEROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ ARGBAttenuateRow = ARGBAttenuateRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 4)) {
+ ARGBAttenuateRow = ARGBAttenuateRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_ARGBATTENUATEROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBAttenuateRow = ARGBAttenuateRow_Any_AVX2;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBAttenuateRow = ARGBAttenuateRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_ARGBATTENUATEROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ARGBAttenuateRow = ARGBAttenuateRow_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBAttenuateRow = ARGBAttenuateRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_ARGBATTENUATEROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ ARGBAttenuateRow = ARGBAttenuateRow_Any_MSA;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBAttenuateRow = ARGBAttenuateRow_MSA;
+ }
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ I422AlphaToARGBRow(src_y, src_u, src_v, src_a, dst_argb, yuvconstants,
+ width);
+ if (attenuate) {
+ ARGBAttenuateRow(dst_argb, dst_argb, width);
+ }
+ dst_argb += dst_stride_argb;
+ src_a += src_stride_a;
+ src_y += src_stride_y;
+ if (y & 1) {
+ src_u += src_stride_u;
+ src_v += src_stride_v;
+ }
+ }
+ return 0;
+}
+
+// Convert I420 with Alpha to ARGB.
+LIBYUV_API
+int I420AlphaToARGB(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ const uint8_t* src_a,
+ int src_stride_a,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height,
+ int attenuate) {
+ return I420AlphaToARGBMatrix(src_y, src_stride_y, src_u, src_stride_u, src_v,
+ src_stride_v, src_a, src_stride_a, dst_argb,
+ dst_stride_argb, &kYuvI601Constants, width,
+ height, attenuate);
+}
+
+// Convert I420 with Alpha to ABGR.
+LIBYUV_API
+int I420AlphaToABGR(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ const uint8_t* src_a,
+ int src_stride_a,
+ uint8_t* dst_abgr,
+ int dst_stride_abgr,
+ int width,
+ int height,
+ int attenuate) {
+ return I420AlphaToARGBMatrix(
+ src_y, src_stride_y, src_v, src_stride_v, // Swap U and V
+ src_u, src_stride_u, src_a, src_stride_a, dst_abgr, dst_stride_abgr,
+ &kYvuI601Constants, // Use Yvu matrix
+ width, height, attenuate);
+}
+
+// Convert I400 to ARGB.
+LIBYUV_API
+int I400ToARGB(const uint8_t* src_y,
+ int src_stride_y,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height) {
+ int y;
+ void (*I400ToARGBRow)(const uint8_t* y_buf, uint8_t* rgb_buf, int width) =
+ I400ToARGBRow_C;
+ if (!src_y || !dst_argb || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ dst_argb = dst_argb + (height - 1) * dst_stride_argb;
+ dst_stride_argb = -dst_stride_argb;
+ }
+ // Coalesce rows.
+ if (src_stride_y == width && dst_stride_argb == width * 4) {
+ width *= height;
+ height = 1;
+ src_stride_y = dst_stride_argb = 0;
+ }
+#if defined(HAS_I400TOARGBROW_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ I400ToARGBRow = I400ToARGBRow_Any_SSE2;
+ if (IS_ALIGNED(width, 8)) {
+ I400ToARGBRow = I400ToARGBRow_SSE2;
+ }
+ }
+#endif
+#if defined(HAS_I400TOARGBROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ I400ToARGBRow = I400ToARGBRow_Any_AVX2;
+ if (IS_ALIGNED(width, 16)) {
+ I400ToARGBRow = I400ToARGBRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_I400TOARGBROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ I400ToARGBRow = I400ToARGBRow_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ I400ToARGBRow = I400ToARGBRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_I400TOARGBROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ I400ToARGBRow = I400ToARGBRow_Any_MSA;
+ if (IS_ALIGNED(width, 16)) {
+ I400ToARGBRow = I400ToARGBRow_MSA;
+ }
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ I400ToARGBRow(src_y, dst_argb, width);
+ dst_argb += dst_stride_argb;
+ src_y += src_stride_y;
+ }
+ return 0;
+}
+
+// Convert J400 to ARGB.
+LIBYUV_API
+int J400ToARGB(const uint8_t* src_y,
+ int src_stride_y,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height) {
+ int y;
+ void (*J400ToARGBRow)(const uint8_t* src_y, uint8_t* dst_argb, int width) =
+ J400ToARGBRow_C;
+ if (!src_y || !dst_argb || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_y = src_y + (height - 1) * src_stride_y;
+ src_stride_y = -src_stride_y;
+ }
+ // Coalesce rows.
+ if (src_stride_y == width && dst_stride_argb == width * 4) {
+ width *= height;
+ height = 1;
+ src_stride_y = dst_stride_argb = 0;
+ }
+#if defined(HAS_J400TOARGBROW_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ J400ToARGBRow = J400ToARGBRow_Any_SSE2;
+ if (IS_ALIGNED(width, 8)) {
+ J400ToARGBRow = J400ToARGBRow_SSE2;
+ }
+ }
+#endif
+#if defined(HAS_J400TOARGBROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ J400ToARGBRow = J400ToARGBRow_Any_AVX2;
+ if (IS_ALIGNED(width, 16)) {
+ J400ToARGBRow = J400ToARGBRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_J400TOARGBROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ J400ToARGBRow = J400ToARGBRow_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ J400ToARGBRow = J400ToARGBRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_J400TOARGBROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ J400ToARGBRow = J400ToARGBRow_Any_MSA;
+ if (IS_ALIGNED(width, 16)) {
+ J400ToARGBRow = J400ToARGBRow_MSA;
+ }
+ }
+#endif
+ for (y = 0; y < height; ++y) {
+ J400ToARGBRow(src_y, dst_argb, width);
+ src_y += src_stride_y;
+ dst_argb += dst_stride_argb;
+ }
+ return 0;
+}
+
+// Shuffle table for converting BGRA to ARGB.
+static const uvec8 kShuffleMaskBGRAToARGB = {
+ 3u, 2u, 1u, 0u, 7u, 6u, 5u, 4u, 11u, 10u, 9u, 8u, 15u, 14u, 13u, 12u};
+
+// Shuffle table for converting ABGR to ARGB.
+static const uvec8 kShuffleMaskABGRToARGB = {
+ 2u, 1u, 0u, 3u, 6u, 5u, 4u, 7u, 10u, 9u, 8u, 11u, 14u, 13u, 12u, 15u};
+
+// Shuffle table for converting RGBA to ARGB.
+static const uvec8 kShuffleMaskRGBAToARGB = {
+ 1u, 2u, 3u, 0u, 5u, 6u, 7u, 4u, 9u, 10u, 11u, 8u, 13u, 14u, 15u, 12u};
+
+// Convert BGRA to ARGB.
+LIBYUV_API
+int BGRAToARGB(const uint8_t* src_bgra,
+ int src_stride_bgra,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height) {
+ return ARGBShuffle(src_bgra, src_stride_bgra, dst_argb, dst_stride_argb,
+ (const uint8_t*)(&kShuffleMaskBGRAToARGB), width, height);
+}
+
+// Convert ARGB to BGRA (same as BGRAToARGB).
+LIBYUV_API
+int ARGBToBGRA(const uint8_t* src_bgra,
+ int src_stride_bgra,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height) {
+ return ARGBShuffle(src_bgra, src_stride_bgra, dst_argb, dst_stride_argb,
+ (const uint8_t*)(&kShuffleMaskBGRAToARGB), width, height);
+}
+
+// Convert ABGR to ARGB.
+LIBYUV_API
+int ABGRToARGB(const uint8_t* src_abgr,
+ int src_stride_abgr,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height) {
+ return ARGBShuffle(src_abgr, src_stride_abgr, dst_argb, dst_stride_argb,
+ (const uint8_t*)(&kShuffleMaskABGRToARGB), width, height);
+}
+
+// Convert ARGB to ABGR to (same as ABGRToARGB).
+LIBYUV_API
+int ARGBToABGR(const uint8_t* src_abgr,
+ int src_stride_abgr,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height) {
+ return ARGBShuffle(src_abgr, src_stride_abgr, dst_argb, dst_stride_argb,
+ (const uint8_t*)(&kShuffleMaskABGRToARGB), width, height);
+}
+
+// Convert RGBA to ARGB.
+LIBYUV_API
+int RGBAToARGB(const uint8_t* src_rgba,
+ int src_stride_rgba,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height) {
+ return ARGBShuffle(src_rgba, src_stride_rgba, dst_argb, dst_stride_argb,
+ (const uint8_t*)(&kShuffleMaskRGBAToARGB), width, height);
+}
+
+// Convert RGB24 to ARGB.
+LIBYUV_API
+int RGB24ToARGB(const uint8_t* src_rgb24,
+ int src_stride_rgb24,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height) {
+ int y;
+ void (*RGB24ToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) =
+ RGB24ToARGBRow_C;
+ if (!src_rgb24 || !dst_argb || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_rgb24 = src_rgb24 + (height - 1) * src_stride_rgb24;
+ src_stride_rgb24 = -src_stride_rgb24;
+ }
+ // Coalesce rows.
+ if (src_stride_rgb24 == width * 3 && dst_stride_argb == width * 4) {
+ width *= height;
+ height = 1;
+ src_stride_rgb24 = dst_stride_argb = 0;
+ }
+#if defined(HAS_RGB24TOARGBROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ RGB24ToARGBRow = RGB24ToARGBRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ RGB24ToARGBRow = RGB24ToARGBRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_RGB24TOARGBROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ RGB24ToARGBRow = RGB24ToARGBRow_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ RGB24ToARGBRow = RGB24ToARGBRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_RGB24TOARGBROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ RGB24ToARGBRow = RGB24ToARGBRow_Any_MSA;
+ if (IS_ALIGNED(width, 16)) {
+ RGB24ToARGBRow = RGB24ToARGBRow_MSA;
+ }
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ RGB24ToARGBRow(src_rgb24, dst_argb, width);
+ src_rgb24 += src_stride_rgb24;
+ dst_argb += dst_stride_argb;
+ }
+ return 0;
+}
+
+// Convert RAW to ARGB.
+LIBYUV_API
+int RAWToARGB(const uint8_t* src_raw,
+ int src_stride_raw,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height) {
+ int y;
+ void (*RAWToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) =
+ RAWToARGBRow_C;
+ if (!src_raw || !dst_argb || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_raw = src_raw + (height - 1) * src_stride_raw;
+ src_stride_raw = -src_stride_raw;
+ }
+ // Coalesce rows.
+ if (src_stride_raw == width * 3 && dst_stride_argb == width * 4) {
+ width *= height;
+ height = 1;
+ src_stride_raw = dst_stride_argb = 0;
+ }
+#if defined(HAS_RAWTOARGBROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ RAWToARGBRow = RAWToARGBRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ RAWToARGBRow = RAWToARGBRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_RAWTOARGBROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ RAWToARGBRow = RAWToARGBRow_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ RAWToARGBRow = RAWToARGBRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_RAWTOARGBROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ RAWToARGBRow = RAWToARGBRow_Any_MSA;
+ if (IS_ALIGNED(width, 16)) {
+ RAWToARGBRow = RAWToARGBRow_MSA;
+ }
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ RAWToARGBRow(src_raw, dst_argb, width);
+ src_raw += src_stride_raw;
+ dst_argb += dst_stride_argb;
+ }
+ return 0;
+}
+
+// Convert RGB565 to ARGB.
+LIBYUV_API
+int RGB565ToARGB(const uint8_t* src_rgb565,
+ int src_stride_rgb565,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height) {
+ int y;
+ void (*RGB565ToARGBRow)(const uint8_t* src_rgb565, uint8_t* dst_argb,
+ int width) = RGB565ToARGBRow_C;
+ if (!src_rgb565 || !dst_argb || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_rgb565 = src_rgb565 + (height - 1) * src_stride_rgb565;
+ src_stride_rgb565 = -src_stride_rgb565;
+ }
+ // Coalesce rows.
+ if (src_stride_rgb565 == width * 2 && dst_stride_argb == width * 4) {
+ width *= height;
+ height = 1;
+ src_stride_rgb565 = dst_stride_argb = 0;
+ }
+#if defined(HAS_RGB565TOARGBROW_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ RGB565ToARGBRow = RGB565ToARGBRow_Any_SSE2;
+ if (IS_ALIGNED(width, 8)) {
+ RGB565ToARGBRow = RGB565ToARGBRow_SSE2;
+ }
+ }
+#endif
+#if defined(HAS_RGB565TOARGBROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ RGB565ToARGBRow = RGB565ToARGBRow_Any_AVX2;
+ if (IS_ALIGNED(width, 16)) {
+ RGB565ToARGBRow = RGB565ToARGBRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_RGB565TOARGBROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ RGB565ToARGBRow = RGB565ToARGBRow_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ RGB565ToARGBRow = RGB565ToARGBRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_RGB565TOARGBROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ RGB565ToARGBRow = RGB565ToARGBRow_Any_MSA;
+ if (IS_ALIGNED(width, 16)) {
+ RGB565ToARGBRow = RGB565ToARGBRow_MSA;
+ }
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ RGB565ToARGBRow(src_rgb565, dst_argb, width);
+ src_rgb565 += src_stride_rgb565;
+ dst_argb += dst_stride_argb;
+ }
+ return 0;
+}
+
+// Convert ARGB1555 to ARGB.
+LIBYUV_API
+int ARGB1555ToARGB(const uint8_t* src_argb1555,
+ int src_stride_argb1555,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height) {
+ int y;
+ void (*ARGB1555ToARGBRow)(const uint8_t* src_argb1555, uint8_t* dst_argb,
+ int width) = ARGB1555ToARGBRow_C;
+ if (!src_argb1555 || !dst_argb || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_argb1555 = src_argb1555 + (height - 1) * src_stride_argb1555;
+ src_stride_argb1555 = -src_stride_argb1555;
+ }
+ // Coalesce rows.
+ if (src_stride_argb1555 == width * 2 && dst_stride_argb == width * 4) {
+ width *= height;
+ height = 1;
+ src_stride_argb1555 = dst_stride_argb = 0;
+ }
+#if defined(HAS_ARGB1555TOARGBROW_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_SSE2;
+ if (IS_ALIGNED(width, 8)) {
+ ARGB1555ToARGBRow = ARGB1555ToARGBRow_SSE2;
+ }
+ }
+#endif
+#if defined(HAS_ARGB1555TOARGBROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_AVX2;
+ if (IS_ALIGNED(width, 16)) {
+ ARGB1555ToARGBRow = ARGB1555ToARGBRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_ARGB1555TOARGBROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ ARGB1555ToARGBRow = ARGB1555ToARGBRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_ARGB1555TOARGBROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_MSA;
+ if (IS_ALIGNED(width, 16)) {
+ ARGB1555ToARGBRow = ARGB1555ToARGBRow_MSA;
+ }
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ ARGB1555ToARGBRow(src_argb1555, dst_argb, width);
+ src_argb1555 += src_stride_argb1555;
+ dst_argb += dst_stride_argb;
+ }
+ return 0;
+}
+
+// Convert ARGB4444 to ARGB.
+LIBYUV_API
+int ARGB4444ToARGB(const uint8_t* src_argb4444,
+ int src_stride_argb4444,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height) {
+ int y;
+ void (*ARGB4444ToARGBRow)(const uint8_t* src_argb4444, uint8_t* dst_argb,
+ int width) = ARGB4444ToARGBRow_C;
+ if (!src_argb4444 || !dst_argb || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_argb4444 = src_argb4444 + (height - 1) * src_stride_argb4444;
+ src_stride_argb4444 = -src_stride_argb4444;
+ }
+ // Coalesce rows.
+ if (src_stride_argb4444 == width * 2 && dst_stride_argb == width * 4) {
+ width *= height;
+ height = 1;
+ src_stride_argb4444 = dst_stride_argb = 0;
+ }
+#if defined(HAS_ARGB4444TOARGBROW_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_SSE2;
+ if (IS_ALIGNED(width, 8)) {
+ ARGB4444ToARGBRow = ARGB4444ToARGBRow_SSE2;
+ }
+ }
+#endif
+#if defined(HAS_ARGB4444TOARGBROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_AVX2;
+ if (IS_ALIGNED(width, 16)) {
+ ARGB4444ToARGBRow = ARGB4444ToARGBRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_ARGB4444TOARGBROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ ARGB4444ToARGBRow = ARGB4444ToARGBRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_ARGB4444TOARGBROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_MSA;
+ if (IS_ALIGNED(width, 16)) {
+ ARGB4444ToARGBRow = ARGB4444ToARGBRow_MSA;
+ }
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ ARGB4444ToARGBRow(src_argb4444, dst_argb, width);
+ src_argb4444 += src_stride_argb4444;
+ dst_argb += dst_stride_argb;
+ }
+ return 0;
+}
+
+// Convert AR30 to ARGB.
+LIBYUV_API
+int AR30ToARGB(const uint8_t* src_ar30,
+ int src_stride_ar30,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height) {
+ int y;
+ if (!src_ar30 || !dst_argb || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_ar30 = src_ar30 + (height - 1) * src_stride_ar30;
+ src_stride_ar30 = -src_stride_ar30;
+ }
+ // Coalesce rows.
+ if (src_stride_ar30 == width * 4 && dst_stride_argb == width * 4) {
+ width *= height;
+ height = 1;
+ src_stride_ar30 = dst_stride_argb = 0;
+ }
+ for (y = 0; y < height; ++y) {
+ AR30ToARGBRow_C(src_ar30, dst_argb, width);
+ src_ar30 += src_stride_ar30;
+ dst_argb += dst_stride_argb;
+ }
+ return 0;
+}
+
+// Convert AR30 to ABGR.
+LIBYUV_API
+int AR30ToABGR(const uint8_t* src_ar30,
+ int src_stride_ar30,
+ uint8_t* dst_abgr,
+ int dst_stride_abgr,
+ int width,
+ int height) {
+ int y;
+ if (!src_ar30 || !dst_abgr || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_ar30 = src_ar30 + (height - 1) * src_stride_ar30;
+ src_stride_ar30 = -src_stride_ar30;
+ }
+ // Coalesce rows.
+ if (src_stride_ar30 == width * 4 && dst_stride_abgr == width * 4) {
+ width *= height;
+ height = 1;
+ src_stride_ar30 = dst_stride_abgr = 0;
+ }
+ for (y = 0; y < height; ++y) {
+ AR30ToABGRRow_C(src_ar30, dst_abgr, width);
+ src_ar30 += src_stride_ar30;
+ dst_abgr += dst_stride_abgr;
+ }
+ return 0;
+}
+
+// Convert AR30 to AB30.
+LIBYUV_API
+int AR30ToAB30(const uint8_t* src_ar30,
+ int src_stride_ar30,
+ uint8_t* dst_ab30,
+ int dst_stride_ab30,
+ int width,
+ int height) {
+ int y;
+ if (!src_ar30 || !dst_ab30 || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_ar30 = src_ar30 + (height - 1) * src_stride_ar30;
+ src_stride_ar30 = -src_stride_ar30;
+ }
+ // Coalesce rows.
+ if (src_stride_ar30 == width * 4 && dst_stride_ab30 == width * 4) {
+ width *= height;
+ height = 1;
+ src_stride_ar30 = dst_stride_ab30 = 0;
+ }
+ for (y = 0; y < height; ++y) {
+ AR30ToAB30Row_C(src_ar30, dst_ab30, width);
+ src_ar30 += src_stride_ar30;
+ dst_ab30 += dst_stride_ab30;
+ }
+ return 0;
+}
+
+// Convert NV12 to ARGB with matrix
+static int NV12ToARGBMatrix(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_uv,
+ int src_stride_uv,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ const struct YuvConstants* yuvconstants,
+ int width,
+ int height) {
+ int y;
+ void (*NV12ToARGBRow)(
+ const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants, int width) = NV12ToARGBRow_C;
+ if (!src_y || !src_uv || !dst_argb || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ dst_argb = dst_argb + (height - 1) * dst_stride_argb;
+ dst_stride_argb = -dst_stride_argb;
+ }
+#if defined(HAS_NV12TOARGBROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ NV12ToARGBRow = NV12ToARGBRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 8)) {
+ NV12ToARGBRow = NV12ToARGBRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_NV12TOARGBROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ NV12ToARGBRow = NV12ToARGBRow_Any_AVX2;
+ if (IS_ALIGNED(width, 16)) {
+ NV12ToARGBRow = NV12ToARGBRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_NV12TOARGBROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ NV12ToARGBRow = NV12ToARGBRow_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ NV12ToARGBRow = NV12ToARGBRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_NV12TOARGBROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ NV12ToARGBRow = NV12ToARGBRow_Any_MSA;
+ if (IS_ALIGNED(width, 8)) {
+ NV12ToARGBRow = NV12ToARGBRow_MSA;
+ }
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ NV12ToARGBRow(src_y, src_uv, dst_argb, yuvconstants, width);
+ dst_argb += dst_stride_argb;
+ src_y += src_stride_y;
+ if (y & 1) {
+ src_uv += src_stride_uv;
+ }
+ }
+ return 0;
+}
+
+// Convert NV21 to ARGB with matrix
+static int NV21ToARGBMatrix(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_vu,
+ int src_stride_vu,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ const struct YuvConstants* yuvconstants,
+ int width,
+ int height) {
+ int y;
+ void (*NV21ToARGBRow)(
+ const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants, int width) = NV21ToARGBRow_C;
+ if (!src_y || !src_vu || !dst_argb || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ dst_argb = dst_argb + (height - 1) * dst_stride_argb;
+ dst_stride_argb = -dst_stride_argb;
+ }
+#if defined(HAS_NV21TOARGBROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ NV21ToARGBRow = NV21ToARGBRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 8)) {
+ NV21ToARGBRow = NV21ToARGBRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_NV21TOARGBROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ NV21ToARGBRow = NV21ToARGBRow_Any_AVX2;
+ if (IS_ALIGNED(width, 16)) {
+ NV21ToARGBRow = NV21ToARGBRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_NV21TOARGBROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ NV21ToARGBRow = NV21ToARGBRow_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ NV21ToARGBRow = NV21ToARGBRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_NV21TOARGBROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ NV21ToARGBRow = NV21ToARGBRow_Any_MSA;
+ if (IS_ALIGNED(width, 8)) {
+ NV21ToARGBRow = NV21ToARGBRow_MSA;
+ }
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ NV21ToARGBRow(src_y, src_vu, dst_argb, yuvconstants, width);
+ dst_argb += dst_stride_argb;
+ src_y += src_stride_y;
+ if (y & 1) {
+ src_vu += src_stride_vu;
+ }
+ }
+ return 0;
+}
+
+// Convert NV12 to ARGB.
+LIBYUV_API
+int NV12ToARGB(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_uv,
+ int src_stride_uv,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height) {
+ return NV12ToARGBMatrix(src_y, src_stride_y, src_uv, src_stride_uv, dst_argb,
+ dst_stride_argb, &kYuvI601Constants, width, height);
+}
+
+// Convert NV21 to ARGB.
+LIBYUV_API
+int NV21ToARGB(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_vu,
+ int src_stride_vu,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height) {
+ return NV21ToARGBMatrix(src_y, src_stride_y, src_vu, src_stride_vu, dst_argb,
+ dst_stride_argb, &kYuvI601Constants, width, height);
+}
+
+// Convert NV12 to ABGR.
+// To output ABGR instead of ARGB swap the UV and use a mirrrored yuc matrix.
+// To swap the UV use NV12 instead of NV21.LIBYUV_API
+int NV12ToABGR(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_uv,
+ int src_stride_uv,
+ uint8_t* dst_abgr,
+ int dst_stride_abgr,
+ int width,
+ int height) {
+ return NV21ToARGBMatrix(src_y, src_stride_y, src_uv, src_stride_uv, dst_abgr,
+ dst_stride_abgr, &kYvuI601Constants, width, height);
+}
+
+// Convert NV21 to ABGR.
+LIBYUV_API
+int NV21ToABGR(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_vu,
+ int src_stride_vu,
+ uint8_t* dst_abgr,
+ int dst_stride_abgr,
+ int width,
+ int height) {
+ return NV12ToARGBMatrix(src_y, src_stride_y, src_vu, src_stride_vu, dst_abgr,
+ dst_stride_abgr, &kYvuI601Constants, width, height);
+}
+
+// TODO(fbarchard): Consider SSSE3 2 step conversion.
+// Convert NV12 to RGB24 with matrix
+static int NV12ToRGB24Matrix(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_uv,
+ int src_stride_uv,
+ uint8_t* dst_rgb24,
+ int dst_stride_rgb24,
+ const struct YuvConstants* yuvconstants,
+ int width,
+ int height) {
+ int y;
+ void (*NV12ToRGB24Row)(
+ const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants, int width) = NV12ToRGB24Row_C;
+ if (!src_y || !src_uv || !dst_rgb24 || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ dst_rgb24 = dst_rgb24 + (height - 1) * dst_stride_rgb24;
+ dst_stride_rgb24 = -dst_stride_rgb24;
+ }
+#if defined(HAS_NV12TORGB24ROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ NV12ToRGB24Row = NV12ToRGB24Row_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ NV12ToRGB24Row = NV12ToRGB24Row_NEON;
+ }
+ }
+#endif
+#if defined(HAS_NV12TORGB24ROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ NV12ToRGB24Row = NV12ToRGB24Row_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ NV12ToRGB24Row = NV12ToRGB24Row_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_NV12TORGB24ROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ NV12ToRGB24Row = NV12ToRGB24Row_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ NV12ToRGB24Row = NV12ToRGB24Row_AVX2;
+ }
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ NV12ToRGB24Row(src_y, src_uv, dst_rgb24, yuvconstants, width);
+ dst_rgb24 += dst_stride_rgb24;
+ src_y += src_stride_y;
+ if (y & 1) {
+ src_uv += src_stride_uv;
+ }
+ }
+ return 0;
+}
+
+// Convert NV21 to RGB24 with matrix
+static int NV21ToRGB24Matrix(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_vu,
+ int src_stride_vu,
+ uint8_t* dst_rgb24,
+ int dst_stride_rgb24,
+ const struct YuvConstants* yuvconstants,
+ int width,
+ int height) {
+ int y;
+ void (*NV21ToRGB24Row)(
+ const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants, int width) = NV21ToRGB24Row_C;
+ if (!src_y || !src_vu || !dst_rgb24 || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ dst_rgb24 = dst_rgb24 + (height - 1) * dst_stride_rgb24;
+ dst_stride_rgb24 = -dst_stride_rgb24;
+ }
+#if defined(HAS_NV21TORGB24ROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ NV21ToRGB24Row = NV21ToRGB24Row_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ NV21ToRGB24Row = NV21ToRGB24Row_NEON;
+ }
+ }
+#endif
+#if defined(HAS_NV21TORGB24ROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ NV21ToRGB24Row = NV21ToRGB24Row_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ NV21ToRGB24Row = NV21ToRGB24Row_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_NV21TORGB24ROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ NV21ToRGB24Row = NV21ToRGB24Row_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ NV21ToRGB24Row = NV21ToRGB24Row_AVX2;
+ }
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ NV21ToRGB24Row(src_y, src_vu, dst_rgb24, yuvconstants, width);
+ dst_rgb24 += dst_stride_rgb24;
+ src_y += src_stride_y;
+ if (y & 1) {
+ src_vu += src_stride_vu;
+ }
+ }
+ return 0;
+}
+
+// TODO(fbarchard): NV12ToRAW can be implemented by mirrored matrix.
+// Convert NV12 to RGB24.
+LIBYUV_API
+int NV12ToRGB24(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_uv,
+ int src_stride_uv,
+ uint8_t* dst_rgb24,
+ int dst_stride_rgb24,
+ int width,
+ int height) {
+ return NV12ToRGB24Matrix(src_y, src_stride_y, src_uv, src_stride_uv,
+ dst_rgb24, dst_stride_rgb24, &kYuvI601Constants,
+ width, height);
+}
+
+// Convert NV21 to RGB24.
+LIBYUV_API
+int NV21ToRGB24(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_vu,
+ int src_stride_vu,
+ uint8_t* dst_rgb24,
+ int dst_stride_rgb24,
+ int width,
+ int height) {
+ return NV21ToRGB24Matrix(src_y, src_stride_y, src_vu, src_stride_vu,
+ dst_rgb24, dst_stride_rgb24, &kYuvI601Constants,
+ width, height);
+}
+
+// Convert M420 to ARGB.
+LIBYUV_API
+int M420ToARGB(const uint8_t* src_m420,
+ int src_stride_m420,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height) {
+ int y;
+ void (*NV12ToARGBRow)(
+ const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants, int width) = NV12ToARGBRow_C;
+ if (!src_m420 || !dst_argb || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ dst_argb = dst_argb + (height - 1) * dst_stride_argb;
+ dst_stride_argb = -dst_stride_argb;
+ }
+#if defined(HAS_NV12TOARGBROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ NV12ToARGBRow = NV12ToARGBRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 8)) {
+ NV12ToARGBRow = NV12ToARGBRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_NV12TOARGBROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ NV12ToARGBRow = NV12ToARGBRow_Any_AVX2;
+ if (IS_ALIGNED(width, 16)) {
+ NV12ToARGBRow = NV12ToARGBRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_NV12TOARGBROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ NV12ToARGBRow = NV12ToARGBRow_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ NV12ToARGBRow = NV12ToARGBRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_NV12TOARGBROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ NV12ToARGBRow = NV12ToARGBRow_Any_MSA;
+ if (IS_ALIGNED(width, 8)) {
+ NV12ToARGBRow = NV12ToARGBRow_MSA;
+ }
+ }
+#endif
+
+ for (y = 0; y < height - 1; y += 2) {
+ NV12ToARGBRow(src_m420, src_m420 + src_stride_m420 * 2, dst_argb,
+ &kYuvI601Constants, width);
+ NV12ToARGBRow(src_m420 + src_stride_m420, src_m420 + src_stride_m420 * 2,
+ dst_argb + dst_stride_argb, &kYuvI601Constants, width);
+ dst_argb += dst_stride_argb * 2;
+ src_m420 += src_stride_m420 * 3;
+ }
+ if (height & 1) {
+ NV12ToARGBRow(src_m420, src_m420 + src_stride_m420 * 2, dst_argb,
+ &kYuvI601Constants, width);
+ }
+ return 0;
+}
+
+// Convert YUY2 to ARGB.
+LIBYUV_API
+int YUY2ToARGB(const uint8_t* src_yuy2,
+ int src_stride_yuy2,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height) {
+ int y;
+ void (*YUY2ToARGBRow)(const uint8_t* src_yuy2, uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants, int width) =
+ YUY2ToARGBRow_C;
+ if (!src_yuy2 || !dst_argb || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2;
+ src_stride_yuy2 = -src_stride_yuy2;
+ }
+ // Coalesce rows.
+ if (src_stride_yuy2 == width * 2 && dst_stride_argb == width * 4) {
+ width *= height;
+ height = 1;
+ src_stride_yuy2 = dst_stride_argb = 0;
+ }
+#if defined(HAS_YUY2TOARGBROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ YUY2ToARGBRow = YUY2ToARGBRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ YUY2ToARGBRow = YUY2ToARGBRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_YUY2TOARGBROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ YUY2ToARGBRow = YUY2ToARGBRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ YUY2ToARGBRow = YUY2ToARGBRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_YUY2TOARGBROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ YUY2ToARGBRow = YUY2ToARGBRow_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ YUY2ToARGBRow = YUY2ToARGBRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_YUY2TOARGBROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ YUY2ToARGBRow = YUY2ToARGBRow_Any_MSA;
+ if (IS_ALIGNED(width, 8)) {
+ YUY2ToARGBRow = YUY2ToARGBRow_MSA;
+ }
+ }
+#endif
+ for (y = 0; y < height; ++y) {
+ YUY2ToARGBRow(src_yuy2, dst_argb, &kYuvI601Constants, width);
+ src_yuy2 += src_stride_yuy2;
+ dst_argb += dst_stride_argb;
+ }
+ return 0;
+}
+
+// Convert UYVY to ARGB.
+LIBYUV_API
+int UYVYToARGB(const uint8_t* src_uyvy,
+ int src_stride_uyvy,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height) {
+ int y;
+ void (*UYVYToARGBRow)(const uint8_t* src_uyvy, uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants, int width) =
+ UYVYToARGBRow_C;
+ if (!src_uyvy || !dst_argb || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_uyvy = src_uyvy + (height - 1) * src_stride_uyvy;
+ src_stride_uyvy = -src_stride_uyvy;
+ }
+ // Coalesce rows.
+ if (src_stride_uyvy == width * 2 && dst_stride_argb == width * 4) {
+ width *= height;
+ height = 1;
+ src_stride_uyvy = dst_stride_argb = 0;
+ }
+#if defined(HAS_UYVYTOARGBROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ UYVYToARGBRow = UYVYToARGBRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ UYVYToARGBRow = UYVYToARGBRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_UYVYTOARGBROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ UYVYToARGBRow = UYVYToARGBRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ UYVYToARGBRow = UYVYToARGBRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_UYVYTOARGBROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ UYVYToARGBRow = UYVYToARGBRow_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ UYVYToARGBRow = UYVYToARGBRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_UYVYTOARGBROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ UYVYToARGBRow = UYVYToARGBRow_Any_MSA;
+ if (IS_ALIGNED(width, 8)) {
+ UYVYToARGBRow = UYVYToARGBRow_MSA;
+ }
+ }
+#endif
+ for (y = 0; y < height; ++y) {
+ UYVYToARGBRow(src_uyvy, dst_argb, &kYuvI601Constants, width);
+ src_uyvy += src_stride_uyvy;
+ dst_argb += dst_stride_argb;
+ }
+ return 0;
+}
+static void WeavePixels(const uint8_t* src_u,
+ const uint8_t* src_v,
+ int src_pixel_stride_uv,
+ uint8_t* dst_uv,
+ int width) {
+ int i;
+ for (i = 0; i < width; ++i) {
+ dst_uv[0] = *src_u;
+ dst_uv[1] = *src_v;
+ dst_uv += 2;
+ src_u += src_pixel_stride_uv;
+ src_v += src_pixel_stride_uv;
+ }
+}
+
+// Convert Android420 to ARGB.
+LIBYUV_API
+int Android420ToARGBMatrix(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ int src_pixel_stride_uv,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ const struct YuvConstants* yuvconstants,
+ int width,
+ int height) {
+ int y;
+ uint8_t* dst_uv;
+ const ptrdiff_t vu_off = src_v - src_u;
+ int halfwidth = (width + 1) >> 1;
+ int halfheight = (height + 1) >> 1;
+ if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ halfheight = (height + 1) >> 1;
+ dst_argb = dst_argb + (height - 1) * dst_stride_argb;
+ dst_stride_argb = -dst_stride_argb;
+ }
+
+ // I420
+ if (src_pixel_stride_uv == 1) {
+ return I420ToARGBMatrix(src_y, src_stride_y, src_u, src_stride_u, src_v,
+ src_stride_v, dst_argb, dst_stride_argb,
+ yuvconstants, width, height);
+ // NV21
+ }
+ if (src_pixel_stride_uv == 2 && vu_off == -1 &&
+ src_stride_u == src_stride_v) {
+ return NV21ToARGBMatrix(src_y, src_stride_y, src_v, src_stride_v, dst_argb,
+ dst_stride_argb, yuvconstants, width, height);
+ // NV12
+ }
+ if (src_pixel_stride_uv == 2 && vu_off == 1 && src_stride_u == src_stride_v) {
+ return NV12ToARGBMatrix(src_y, src_stride_y, src_u, src_stride_u, dst_argb,
+ dst_stride_argb, yuvconstants, width, height);
+ }
+
+ // General case fallback creates NV12
+ align_buffer_64(plane_uv, halfwidth * 2 * halfheight);
+ dst_uv = plane_uv;
+ for (y = 0; y < halfheight; ++y) {
+ WeavePixels(src_u, src_v, src_pixel_stride_uv, dst_uv, halfwidth);
+ src_u += src_stride_u;
+ src_v += src_stride_v;
+ dst_uv += halfwidth * 2;
+ }
+ NV12ToARGBMatrix(src_y, src_stride_y, plane_uv, halfwidth * 2, dst_argb,
+ dst_stride_argb, yuvconstants, width, height);
+ free_aligned_buffer_64(plane_uv);
+ return 0;
+}
+
+// Convert Android420 to ARGB.
+LIBYUV_API
+int Android420ToARGB(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ int src_pixel_stride_uv,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height) {
+ return Android420ToARGBMatrix(src_y, src_stride_y, src_u, src_stride_u, src_v,
+ src_stride_v, src_pixel_stride_uv, dst_argb,
+ dst_stride_argb, &kYuvI601Constants, width,
+ height);
+}
+
+// Convert Android420 to ABGR.
+LIBYUV_API
+int Android420ToABGR(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ int src_pixel_stride_uv,
+ uint8_t* dst_abgr,
+ int dst_stride_abgr,
+ int width,
+ int height) {
+ return Android420ToARGBMatrix(src_y, src_stride_y, src_v, src_stride_v, src_u,
+ src_stride_u, src_pixel_stride_uv, dst_abgr,
+ dst_stride_abgr, &kYvuI601Constants, width,
+ height);
+}
+
+#ifdef __cplusplus
+} // extern "C"
+} // namespace libyuv
+#endif
diff --git a/media/libvpx/libvpx/third_party/libyuv/source/convert_from.cc b/media/libvpx/libvpx/third_party/libyuv/source/convert_from.cc
new file mode 100644
index 0000000000..6fa253237e
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/libyuv/source/convert_from.cc
@@ -0,0 +1,1429 @@
+/*
+ * Copyright 2012 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/convert_from.h"
+
+#include "libyuv/basic_types.h"
+#include "libyuv/convert.h" // For I420Copy
+#include "libyuv/cpu_id.h"
+#include "libyuv/planar_functions.h"
+#include "libyuv/rotate.h"
+#include "libyuv/row.h"
+#include "libyuv/scale.h" // For ScalePlane()
+#include "libyuv/video_common.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+#define SUBSAMPLE(v, a, s) (v < 0) ? (-((-v + a) >> s)) : ((v + a) >> s)
+static __inline int Abs(int v) {
+ return v >= 0 ? v : -v;
+}
+
+// I420 To any I4xx YUV format with mirroring.
+static int I420ToI4xx(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int src_y_width,
+ int src_y_height,
+ int dst_uv_width,
+ int dst_uv_height) {
+ const int dst_y_width = Abs(src_y_width);
+ const int dst_y_height = Abs(src_y_height);
+ const int src_uv_width = SUBSAMPLE(src_y_width, 1, 1);
+ const int src_uv_height = SUBSAMPLE(src_y_height, 1, 1);
+ if (src_y_width == 0 || src_y_height == 0 || dst_uv_width <= 0 ||
+ dst_uv_height <= 0) {
+ return -1;
+ }
+ if (dst_y) {
+ ScalePlane(src_y, src_stride_y, src_y_width, src_y_height, dst_y,
+ dst_stride_y, dst_y_width, dst_y_height, kFilterBilinear);
+ }
+ ScalePlane(src_u, src_stride_u, src_uv_width, src_uv_height, dst_u,
+ dst_stride_u, dst_uv_width, dst_uv_height, kFilterBilinear);
+ ScalePlane(src_v, src_stride_v, src_uv_width, src_uv_height, dst_v,
+ dst_stride_v, dst_uv_width, dst_uv_height, kFilterBilinear);
+ return 0;
+}
+
+// Convert 8 bit YUV to 10 bit.
+LIBYUV_API
+int I420ToI010(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint16_t* dst_y,
+ int dst_stride_y,
+ uint16_t* dst_u,
+ int dst_stride_u,
+ uint16_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height) {
+ int halfwidth = (width + 1) >> 1;
+ int halfheight = (height + 1) >> 1;
+ if (!src_u || !src_v || !dst_u || !dst_v || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ halfheight = (height + 1) >> 1;
+ src_y = src_y + (height - 1) * src_stride_y;
+ src_u = src_u + (halfheight - 1) * src_stride_u;
+ src_v = src_v + (halfheight - 1) * src_stride_v;
+ src_stride_y = -src_stride_y;
+ src_stride_u = -src_stride_u;
+ src_stride_v = -src_stride_v;
+ }
+
+ // Convert Y plane.
+ Convert8To16Plane(src_y, src_stride_y, dst_y, dst_stride_y, 1024, width,
+ height);
+ // Convert UV planes.
+ Convert8To16Plane(src_u, src_stride_u, dst_u, dst_stride_u, 1024, halfwidth,
+ halfheight);
+ Convert8To16Plane(src_v, src_stride_v, dst_v, dst_stride_v, 1024, halfwidth,
+ halfheight);
+ return 0;
+}
+
+// 420 chroma is 1/2 width, 1/2 height
+// 422 chroma is 1/2 width, 1x height
+LIBYUV_API
+int I420ToI422(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height) {
+ const int dst_uv_width = (Abs(width) + 1) >> 1;
+ const int dst_uv_height = Abs(height);
+ return I420ToI4xx(src_y, src_stride_y, src_u, src_stride_u, src_v,
+ src_stride_v, dst_y, dst_stride_y, dst_u, dst_stride_u,
+ dst_v, dst_stride_v, width, height, dst_uv_width,
+ dst_uv_height);
+}
+
+// 420 chroma is 1/2 width, 1/2 height
+// 444 chroma is 1x width, 1x height
+LIBYUV_API
+int I420ToI444(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height) {
+ const int dst_uv_width = Abs(width);
+ const int dst_uv_height = Abs(height);
+ return I420ToI4xx(src_y, src_stride_y, src_u, src_stride_u, src_v,
+ src_stride_v, dst_y, dst_stride_y, dst_u, dst_stride_u,
+ dst_v, dst_stride_v, width, height, dst_uv_width,
+ dst_uv_height);
+}
+
+// Copy to I400. Source can be I420,422,444,400,NV12,NV21
+LIBYUV_API
+int I400Copy(const uint8_t* src_y,
+ int src_stride_y,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ int width,
+ int height) {
+ if (!src_y || !dst_y || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_y = src_y + (height - 1) * src_stride_y;
+ src_stride_y = -src_stride_y;
+ }
+ CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
+ return 0;
+}
+
+LIBYUV_API
+int I422ToYUY2(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_yuy2,
+ int dst_stride_yuy2,
+ int width,
+ int height) {
+ int y;
+ void (*I422ToYUY2Row)(const uint8_t* src_y, const uint8_t* src_u,
+ const uint8_t* src_v, uint8_t* dst_yuy2, int width) =
+ I422ToYUY2Row_C;
+ if (!src_y || !src_u || !src_v || !dst_yuy2 || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ dst_yuy2 = dst_yuy2 + (height - 1) * dst_stride_yuy2;
+ dst_stride_yuy2 = -dst_stride_yuy2;
+ }
+ // Coalesce rows.
+ if (src_stride_y == width && src_stride_u * 2 == width &&
+ src_stride_v * 2 == width && dst_stride_yuy2 == width * 2) {
+ width *= height;
+ height = 1;
+ src_stride_y = src_stride_u = src_stride_v = dst_stride_yuy2 = 0;
+ }
+#if defined(HAS_I422TOYUY2ROW_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ I422ToYUY2Row = I422ToYUY2Row_Any_SSE2;
+ if (IS_ALIGNED(width, 16)) {
+ I422ToYUY2Row = I422ToYUY2Row_SSE2;
+ }
+ }
+#endif
+#if defined(HAS_I422TOYUY2ROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ I422ToYUY2Row = I422ToYUY2Row_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ I422ToYUY2Row = I422ToYUY2Row_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_I422TOYUY2ROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ I422ToYUY2Row = I422ToYUY2Row_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ I422ToYUY2Row = I422ToYUY2Row_NEON;
+ }
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ I422ToYUY2Row(src_y, src_u, src_v, dst_yuy2, width);
+ src_y += src_stride_y;
+ src_u += src_stride_u;
+ src_v += src_stride_v;
+ dst_yuy2 += dst_stride_yuy2;
+ }
+ return 0;
+}
+
+LIBYUV_API
+int I420ToYUY2(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_yuy2,
+ int dst_stride_yuy2,
+ int width,
+ int height) {
+ int y;
+ void (*I422ToYUY2Row)(const uint8_t* src_y, const uint8_t* src_u,
+ const uint8_t* src_v, uint8_t* dst_yuy2, int width) =
+ I422ToYUY2Row_C;
+ if (!src_y || !src_u || !src_v || !dst_yuy2 || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ dst_yuy2 = dst_yuy2 + (height - 1) * dst_stride_yuy2;
+ dst_stride_yuy2 = -dst_stride_yuy2;
+ }
+#if defined(HAS_I422TOYUY2ROW_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ I422ToYUY2Row = I422ToYUY2Row_Any_SSE2;
+ if (IS_ALIGNED(width, 16)) {
+ I422ToYUY2Row = I422ToYUY2Row_SSE2;
+ }
+ }
+#endif
+#if defined(HAS_I422TOYUY2ROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ I422ToYUY2Row = I422ToYUY2Row_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ I422ToYUY2Row = I422ToYUY2Row_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_I422TOYUY2ROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ I422ToYUY2Row = I422ToYUY2Row_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ I422ToYUY2Row = I422ToYUY2Row_NEON;
+ }
+ }
+#endif
+#if defined(HAS_I422TOYUY2ROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ I422ToYUY2Row = I422ToYUY2Row_Any_MSA;
+ if (IS_ALIGNED(width, 32)) {
+ I422ToYUY2Row = I422ToYUY2Row_MSA;
+ }
+ }
+#endif
+
+ for (y = 0; y < height - 1; y += 2) {
+ I422ToYUY2Row(src_y, src_u, src_v, dst_yuy2, width);
+ I422ToYUY2Row(src_y + src_stride_y, src_u, src_v,
+ dst_yuy2 + dst_stride_yuy2, width);
+ src_y += src_stride_y * 2;
+ src_u += src_stride_u;
+ src_v += src_stride_v;
+ dst_yuy2 += dst_stride_yuy2 * 2;
+ }
+ if (height & 1) {
+ I422ToYUY2Row(src_y, src_u, src_v, dst_yuy2, width);
+ }
+ return 0;
+}
+
+LIBYUV_API
+int I422ToUYVY(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_uyvy,
+ int dst_stride_uyvy,
+ int width,
+ int height) {
+ int y;
+ void (*I422ToUYVYRow)(const uint8_t* src_y, const uint8_t* src_u,
+ const uint8_t* src_v, uint8_t* dst_uyvy, int width) =
+ I422ToUYVYRow_C;
+ if (!src_y || !src_u || !src_v || !dst_uyvy || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ dst_uyvy = dst_uyvy + (height - 1) * dst_stride_uyvy;
+ dst_stride_uyvy = -dst_stride_uyvy;
+ }
+ // Coalesce rows.
+ if (src_stride_y == width && src_stride_u * 2 == width &&
+ src_stride_v * 2 == width && dst_stride_uyvy == width * 2) {
+ width *= height;
+ height = 1;
+ src_stride_y = src_stride_u = src_stride_v = dst_stride_uyvy = 0;
+ }
+#if defined(HAS_I422TOUYVYROW_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ I422ToUYVYRow = I422ToUYVYRow_Any_SSE2;
+ if (IS_ALIGNED(width, 16)) {
+ I422ToUYVYRow = I422ToUYVYRow_SSE2;
+ }
+ }
+#endif
+#if defined(HAS_I422TOUYVYROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ I422ToUYVYRow = I422ToUYVYRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ I422ToUYVYRow = I422ToUYVYRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_I422TOUYVYROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ I422ToUYVYRow = I422ToUYVYRow_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ I422ToUYVYRow = I422ToUYVYRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_I422TOUYVYROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ I422ToUYVYRow = I422ToUYVYRow_Any_MSA;
+ if (IS_ALIGNED(width, 32)) {
+ I422ToUYVYRow = I422ToUYVYRow_MSA;
+ }
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ I422ToUYVYRow(src_y, src_u, src_v, dst_uyvy, width);
+ src_y += src_stride_y;
+ src_u += src_stride_u;
+ src_v += src_stride_v;
+ dst_uyvy += dst_stride_uyvy;
+ }
+ return 0;
+}
+
+LIBYUV_API
+int I420ToUYVY(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_uyvy,
+ int dst_stride_uyvy,
+ int width,
+ int height) {
+ int y;
+ void (*I422ToUYVYRow)(const uint8_t* src_y, const uint8_t* src_u,
+ const uint8_t* src_v, uint8_t* dst_uyvy, int width) =
+ I422ToUYVYRow_C;
+ if (!src_y || !src_u || !src_v || !dst_uyvy || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ dst_uyvy = dst_uyvy + (height - 1) * dst_stride_uyvy;
+ dst_stride_uyvy = -dst_stride_uyvy;
+ }
+#if defined(HAS_I422TOUYVYROW_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ I422ToUYVYRow = I422ToUYVYRow_Any_SSE2;
+ if (IS_ALIGNED(width, 16)) {
+ I422ToUYVYRow = I422ToUYVYRow_SSE2;
+ }
+ }
+#endif
+#if defined(HAS_I422TOUYVYROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ I422ToUYVYRow = I422ToUYVYRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ I422ToUYVYRow = I422ToUYVYRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_I422TOUYVYROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ I422ToUYVYRow = I422ToUYVYRow_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ I422ToUYVYRow = I422ToUYVYRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_I422TOUYVYROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ I422ToUYVYRow = I422ToUYVYRow_Any_MSA;
+ if (IS_ALIGNED(width, 32)) {
+ I422ToUYVYRow = I422ToUYVYRow_MSA;
+ }
+ }
+#endif
+
+ for (y = 0; y < height - 1; y += 2) {
+ I422ToUYVYRow(src_y, src_u, src_v, dst_uyvy, width);
+ I422ToUYVYRow(src_y + src_stride_y, src_u, src_v,
+ dst_uyvy + dst_stride_uyvy, width);
+ src_y += src_stride_y * 2;
+ src_u += src_stride_u;
+ src_v += src_stride_v;
+ dst_uyvy += dst_stride_uyvy * 2;
+ }
+ if (height & 1) {
+ I422ToUYVYRow(src_y, src_u, src_v, dst_uyvy, width);
+ }
+ return 0;
+}
+
+// TODO(fbarchard): test negative height for invert.
+LIBYUV_API
+int I420ToNV12(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_uv,
+ int dst_stride_uv,
+ int width,
+ int height) {
+ if (!src_y || !src_u || !src_v || !dst_y || !dst_uv || width <= 0 ||
+ height == 0) {
+ return -1;
+ }
+ int halfwidth = (width + 1) / 2;
+ int halfheight = height > 0 ? (height + 1) / 2 : (height - 1) / 2;
+ if (dst_y) {
+ CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
+ }
+ MergeUVPlane(src_u, src_stride_u, src_v, src_stride_v, dst_uv, dst_stride_uv,
+ halfwidth, halfheight);
+ return 0;
+}
+
+LIBYUV_API
+int I420ToNV21(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_vu,
+ int dst_stride_vu,
+ int width,
+ int height) {
+ return I420ToNV12(src_y, src_stride_y, src_v, src_stride_v, src_u,
+ src_stride_u, dst_y, dst_stride_y, dst_vu, dst_stride_vu,
+ width, height);
+}
+
+// Convert I422 to RGBA with matrix
+static int I420ToRGBAMatrix(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_rgba,
+ int dst_stride_rgba,
+ const struct YuvConstants* yuvconstants,
+ int width,
+ int height) {
+ int y;
+ void (*I422ToRGBARow)(const uint8_t* y_buf, const uint8_t* u_buf,
+ const uint8_t* v_buf, uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants, int width) =
+ I422ToRGBARow_C;
+ if (!src_y || !src_u || !src_v || !dst_rgba || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ dst_rgba = dst_rgba + (height - 1) * dst_stride_rgba;
+ dst_stride_rgba = -dst_stride_rgba;
+ }
+#if defined(HAS_I422TORGBAROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ I422ToRGBARow = I422ToRGBARow_Any_SSSE3;
+ if (IS_ALIGNED(width, 8)) {
+ I422ToRGBARow = I422ToRGBARow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_I422TORGBAROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ I422ToRGBARow = I422ToRGBARow_Any_AVX2;
+ if (IS_ALIGNED(width, 16)) {
+ I422ToRGBARow = I422ToRGBARow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_I422TORGBAROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ I422ToRGBARow = I422ToRGBARow_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ I422ToRGBARow = I422ToRGBARow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_I422TORGBAROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ I422ToRGBARow = I422ToRGBARow_Any_MSA;
+ if (IS_ALIGNED(width, 8)) {
+ I422ToRGBARow = I422ToRGBARow_MSA;
+ }
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ I422ToRGBARow(src_y, src_u, src_v, dst_rgba, yuvconstants, width);
+ dst_rgba += dst_stride_rgba;
+ src_y += src_stride_y;
+ if (y & 1) {
+ src_u += src_stride_u;
+ src_v += src_stride_v;
+ }
+ }
+ return 0;
+}
+
+// Convert I420 to RGBA.
+LIBYUV_API
+int I420ToRGBA(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_rgba,
+ int dst_stride_rgba,
+ int width,
+ int height) {
+ return I420ToRGBAMatrix(src_y, src_stride_y, src_u, src_stride_u, src_v,
+ src_stride_v, dst_rgba, dst_stride_rgba,
+ &kYuvI601Constants, width, height);
+}
+
+// Convert I420 to BGRA.
+LIBYUV_API
+int I420ToBGRA(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_bgra,
+ int dst_stride_bgra,
+ int width,
+ int height) {
+ return I420ToRGBAMatrix(src_y, src_stride_y, src_v,
+ src_stride_v, // Swap U and V
+ src_u, src_stride_u, dst_bgra, dst_stride_bgra,
+ &kYvuI601Constants, // Use Yvu matrix
+ width, height);
+}
+
+// Convert I420 to RGB24 with matrix
+static int I420ToRGB24Matrix(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_rgb24,
+ int dst_stride_rgb24,
+ const struct YuvConstants* yuvconstants,
+ int width,
+ int height) {
+ int y;
+ void (*I422ToRGB24Row)(const uint8_t* y_buf, const uint8_t* u_buf,
+ const uint8_t* v_buf, uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants, int width) =
+ I422ToRGB24Row_C;
+ if (!src_y || !src_u || !src_v || !dst_rgb24 || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ dst_rgb24 = dst_rgb24 + (height - 1) * dst_stride_rgb24;
+ dst_stride_rgb24 = -dst_stride_rgb24;
+ }
+#if defined(HAS_I422TORGB24ROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ I422ToRGB24Row = I422ToRGB24Row_Any_SSSE3;
+ if (IS_ALIGNED(width, 8)) {
+ I422ToRGB24Row = I422ToRGB24Row_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_I422TORGB24ROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ I422ToRGB24Row = I422ToRGB24Row_Any_AVX2;
+ if (IS_ALIGNED(width, 16)) {
+ I422ToRGB24Row = I422ToRGB24Row_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_I422TORGB24ROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ I422ToRGB24Row = I422ToRGB24Row_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ I422ToRGB24Row = I422ToRGB24Row_NEON;
+ }
+ }
+#endif
+#if defined(HAS_I422TORGB24ROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ I422ToRGB24Row = I422ToRGB24Row_Any_MSA;
+ if (IS_ALIGNED(width, 16)) {
+ I422ToRGB24Row = I422ToRGB24Row_MSA;
+ }
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ I422ToRGB24Row(src_y, src_u, src_v, dst_rgb24, yuvconstants, width);
+ dst_rgb24 += dst_stride_rgb24;
+ src_y += src_stride_y;
+ if (y & 1) {
+ src_u += src_stride_u;
+ src_v += src_stride_v;
+ }
+ }
+ return 0;
+}
+
+// Convert I420 to RGB24.
+LIBYUV_API
+int I420ToRGB24(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_rgb24,
+ int dst_stride_rgb24,
+ int width,
+ int height) {
+ return I420ToRGB24Matrix(src_y, src_stride_y, src_u, src_stride_u, src_v,
+ src_stride_v, dst_rgb24, dst_stride_rgb24,
+ &kYuvI601Constants, width, height);
+}
+
+// Convert I420 to RAW.
+LIBYUV_API
+int I420ToRAW(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_raw,
+ int dst_stride_raw,
+ int width,
+ int height) {
+ return I420ToRGB24Matrix(src_y, src_stride_y, src_v,
+ src_stride_v, // Swap U and V
+ src_u, src_stride_u, dst_raw, dst_stride_raw,
+ &kYvuI601Constants, // Use Yvu matrix
+ width, height);
+}
+
+// Convert H420 to RGB24.
+LIBYUV_API
+int H420ToRGB24(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_rgb24,
+ int dst_stride_rgb24,
+ int width,
+ int height) {
+ return I420ToRGB24Matrix(src_y, src_stride_y, src_u, src_stride_u, src_v,
+ src_stride_v, dst_rgb24, dst_stride_rgb24,
+ &kYuvH709Constants, width, height);
+}
+
+// Convert H420 to RAW.
+LIBYUV_API
+int H420ToRAW(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_raw,
+ int dst_stride_raw,
+ int width,
+ int height) {
+ return I420ToRGB24Matrix(src_y, src_stride_y, src_v,
+ src_stride_v, // Swap U and V
+ src_u, src_stride_u, dst_raw, dst_stride_raw,
+ &kYvuH709Constants, // Use Yvu matrix
+ width, height);
+}
+
+// Convert I420 to ARGB1555.
+LIBYUV_API
+int I420ToARGB1555(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_argb1555,
+ int dst_stride_argb1555,
+ int width,
+ int height) {
+ int y;
+ void (*I422ToARGB1555Row)(const uint8_t* y_buf, const uint8_t* u_buf,
+ const uint8_t* v_buf, uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants,
+ int width) = I422ToARGB1555Row_C;
+ if (!src_y || !src_u || !src_v || !dst_argb1555 || width <= 0 ||
+ height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ dst_argb1555 = dst_argb1555 + (height - 1) * dst_stride_argb1555;
+ dst_stride_argb1555 = -dst_stride_argb1555;
+ }
+#if defined(HAS_I422TOARGB1555ROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ I422ToARGB1555Row = I422ToARGB1555Row_Any_SSSE3;
+ if (IS_ALIGNED(width, 8)) {
+ I422ToARGB1555Row = I422ToARGB1555Row_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_I422TOARGB1555ROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ I422ToARGB1555Row = I422ToARGB1555Row_Any_AVX2;
+ if (IS_ALIGNED(width, 16)) {
+ I422ToARGB1555Row = I422ToARGB1555Row_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_I422TOARGB1555ROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ I422ToARGB1555Row = I422ToARGB1555Row_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ I422ToARGB1555Row = I422ToARGB1555Row_NEON;
+ }
+ }
+#endif
+#if defined(HAS_I422TOARGB1555ROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ I422ToARGB1555Row = I422ToARGB1555Row_Any_MSA;
+ if (IS_ALIGNED(width, 8)) {
+ I422ToARGB1555Row = I422ToARGB1555Row_MSA;
+ }
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ I422ToARGB1555Row(src_y, src_u, src_v, dst_argb1555, &kYuvI601Constants,
+ width);
+ dst_argb1555 += dst_stride_argb1555;
+ src_y += src_stride_y;
+ if (y & 1) {
+ src_u += src_stride_u;
+ src_v += src_stride_v;
+ }
+ }
+ return 0;
+}
+
+// Convert I420 to ARGB4444.
+LIBYUV_API
+int I420ToARGB4444(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_argb4444,
+ int dst_stride_argb4444,
+ int width,
+ int height) {
+ int y;
+ void (*I422ToARGB4444Row)(const uint8_t* y_buf, const uint8_t* u_buf,
+ const uint8_t* v_buf, uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants,
+ int width) = I422ToARGB4444Row_C;
+ if (!src_y || !src_u || !src_v || !dst_argb4444 || width <= 0 ||
+ height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ dst_argb4444 = dst_argb4444 + (height - 1) * dst_stride_argb4444;
+ dst_stride_argb4444 = -dst_stride_argb4444;
+ }
+#if defined(HAS_I422TOARGB4444ROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ I422ToARGB4444Row = I422ToARGB4444Row_Any_SSSE3;
+ if (IS_ALIGNED(width, 8)) {
+ I422ToARGB4444Row = I422ToARGB4444Row_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_I422TOARGB4444ROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ I422ToARGB4444Row = I422ToARGB4444Row_Any_AVX2;
+ if (IS_ALIGNED(width, 16)) {
+ I422ToARGB4444Row = I422ToARGB4444Row_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_I422TOARGB4444ROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ I422ToARGB4444Row = I422ToARGB4444Row_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ I422ToARGB4444Row = I422ToARGB4444Row_NEON;
+ }
+ }
+#endif
+#if defined(HAS_I422TOARGB4444ROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ I422ToARGB4444Row = I422ToARGB4444Row_Any_MSA;
+ if (IS_ALIGNED(width, 8)) {
+ I422ToARGB4444Row = I422ToARGB4444Row_MSA;
+ }
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ I422ToARGB4444Row(src_y, src_u, src_v, dst_argb4444, &kYuvI601Constants,
+ width);
+ dst_argb4444 += dst_stride_argb4444;
+ src_y += src_stride_y;
+ if (y & 1) {
+ src_u += src_stride_u;
+ src_v += src_stride_v;
+ }
+ }
+ return 0;
+}
+
+// Convert I420 to RGB565.
+LIBYUV_API
+int I420ToRGB565(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_rgb565,
+ int dst_stride_rgb565,
+ int width,
+ int height) {
+ int y;
+ void (*I422ToRGB565Row)(const uint8_t* y_buf, const uint8_t* u_buf,
+ const uint8_t* v_buf, uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants, int width) =
+ I422ToRGB565Row_C;
+ if (!src_y || !src_u || !src_v || !dst_rgb565 || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ dst_rgb565 = dst_rgb565 + (height - 1) * dst_stride_rgb565;
+ dst_stride_rgb565 = -dst_stride_rgb565;
+ }
+#if defined(HAS_I422TORGB565ROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ I422ToRGB565Row = I422ToRGB565Row_Any_SSSE3;
+ if (IS_ALIGNED(width, 8)) {
+ I422ToRGB565Row = I422ToRGB565Row_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_I422TORGB565ROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ I422ToRGB565Row = I422ToRGB565Row_Any_AVX2;
+ if (IS_ALIGNED(width, 16)) {
+ I422ToRGB565Row = I422ToRGB565Row_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_I422TORGB565ROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ I422ToRGB565Row = I422ToRGB565Row_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ I422ToRGB565Row = I422ToRGB565Row_NEON;
+ }
+ }
+#endif
+#if defined(HAS_I422TORGB565ROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ I422ToRGB565Row = I422ToRGB565Row_Any_MSA;
+ if (IS_ALIGNED(width, 8)) {
+ I422ToRGB565Row = I422ToRGB565Row_MSA;
+ }
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ I422ToRGB565Row(src_y, src_u, src_v, dst_rgb565, &kYuvI601Constants, width);
+ dst_rgb565 += dst_stride_rgb565;
+ src_y += src_stride_y;
+ if (y & 1) {
+ src_u += src_stride_u;
+ src_v += src_stride_v;
+ }
+ }
+ return 0;
+}
+
+// Convert I422 to RGB565.
+LIBYUV_API
+int I422ToRGB565(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_rgb565,
+ int dst_stride_rgb565,
+ int width,
+ int height) {
+ int y;
+ void (*I422ToRGB565Row)(const uint8_t* y_buf, const uint8_t* u_buf,
+ const uint8_t* v_buf, uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants, int width) =
+ I422ToRGB565Row_C;
+ if (!src_y || !src_u || !src_v || !dst_rgb565 || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ dst_rgb565 = dst_rgb565 + (height - 1) * dst_stride_rgb565;
+ dst_stride_rgb565 = -dst_stride_rgb565;
+ }
+#if defined(HAS_I422TORGB565ROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ I422ToRGB565Row = I422ToRGB565Row_Any_SSSE3;
+ if (IS_ALIGNED(width, 8)) {
+ I422ToRGB565Row = I422ToRGB565Row_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_I422TORGB565ROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ I422ToRGB565Row = I422ToRGB565Row_Any_AVX2;
+ if (IS_ALIGNED(width, 16)) {
+ I422ToRGB565Row = I422ToRGB565Row_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_I422TORGB565ROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ I422ToRGB565Row = I422ToRGB565Row_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ I422ToRGB565Row = I422ToRGB565Row_NEON;
+ }
+ }
+#endif
+#if defined(HAS_I422TORGB565ROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ I422ToRGB565Row = I422ToRGB565Row_Any_MSA;
+ if (IS_ALIGNED(width, 8)) {
+ I422ToRGB565Row = I422ToRGB565Row_MSA;
+ }
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ I422ToRGB565Row(src_y, src_u, src_v, dst_rgb565, &kYuvI601Constants, width);
+ dst_rgb565 += dst_stride_rgb565;
+ src_y += src_stride_y;
+ src_u += src_stride_u;
+ src_v += src_stride_v;
+ }
+ return 0;
+}
+
+// Ordered 8x8 dither for 888 to 565. Values from 0 to 7.
+static const uint8_t kDither565_4x4[16] = {
+ 0, 4, 1, 5, 6, 2, 7, 3, 1, 5, 0, 4, 7, 3, 6, 2,
+};
+
+// Convert I420 to RGB565 with dithering.
+LIBYUV_API
+int I420ToRGB565Dither(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_rgb565,
+ int dst_stride_rgb565,
+ const uint8_t* dither4x4,
+ int width,
+ int height) {
+ int y;
+ void (*I422ToARGBRow)(const uint8_t* y_buf, const uint8_t* u_buf,
+ const uint8_t* v_buf, uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants, int width) =
+ I422ToARGBRow_C;
+ void (*ARGBToRGB565DitherRow)(const uint8_t* src_argb, uint8_t* dst_rgb,
+ const uint32_t dither4, int width) =
+ ARGBToRGB565DitherRow_C;
+ if (!src_y || !src_u || !src_v || !dst_rgb565 || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ dst_rgb565 = dst_rgb565 + (height - 1) * dst_stride_rgb565;
+ dst_stride_rgb565 = -dst_stride_rgb565;
+ }
+ if (!dither4x4) {
+ dither4x4 = kDither565_4x4;
+ }
+#if defined(HAS_I422TOARGBROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 8)) {
+ I422ToARGBRow = I422ToARGBRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_I422TOARGBROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ I422ToARGBRow = I422ToARGBRow_Any_AVX2;
+ if (IS_ALIGNED(width, 16)) {
+ I422ToARGBRow = I422ToARGBRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_I422TOARGBROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ I422ToARGBRow = I422ToARGBRow_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ I422ToARGBRow = I422ToARGBRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_I422TOARGBROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ I422ToARGBRow = I422ToARGBRow_Any_MSA;
+ if (IS_ALIGNED(width, 8)) {
+ I422ToARGBRow = I422ToARGBRow_MSA;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTORGB565DITHERROW_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_SSE2;
+ if (IS_ALIGNED(width, 4)) {
+ ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_SSE2;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTORGB565DITHERROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_AVX2;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTORGB565DITHERROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTORGB565DITHERROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_MSA;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_MSA;
+ }
+ }
+#endif
+ {
+ // Allocate a row of argb.
+ align_buffer_64(row_argb, width * 4);
+ for (y = 0; y < height; ++y) {
+ I422ToARGBRow(src_y, src_u, src_v, row_argb, &kYuvI601Constants, width);
+ ARGBToRGB565DitherRow(row_argb, dst_rgb565,
+ *(const uint32_t*)(dither4x4 + ((y & 3) << 2)),
+ width);
+ dst_rgb565 += dst_stride_rgb565;
+ src_y += src_stride_y;
+ if (y & 1) {
+ src_u += src_stride_u;
+ src_v += src_stride_v;
+ }
+ }
+ free_aligned_buffer_64(row_argb);
+ }
+ return 0;
+}
+
+// Convert I420 to AR30 with matrix
+static int I420ToAR30Matrix(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_ar30,
+ int dst_stride_ar30,
+ const struct YuvConstants* yuvconstants,
+ int width,
+ int height) {
+ int y;
+ void (*I422ToAR30Row)(const uint8_t* y_buf, const uint8_t* u_buf,
+ const uint8_t* v_buf, uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants, int width) =
+ I422ToAR30Row_C;
+
+ if (!src_y || !src_u || !src_v || !dst_ar30 || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ dst_ar30 = dst_ar30 + (height - 1) * dst_stride_ar30;
+ dst_stride_ar30 = -dst_stride_ar30;
+ }
+
+#if defined(HAS_I422TOAR30ROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ I422ToAR30Row = I422ToAR30Row_Any_SSSE3;
+ if (IS_ALIGNED(width, 8)) {
+ I422ToAR30Row = I422ToAR30Row_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_I422TOAR30ROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ I422ToAR30Row = I422ToAR30Row_Any_AVX2;
+ if (IS_ALIGNED(width, 16)) {
+ I422ToAR30Row = I422ToAR30Row_AVX2;
+ }
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ I422ToAR30Row(src_y, src_u, src_v, dst_ar30, yuvconstants, width);
+ dst_ar30 += dst_stride_ar30;
+ src_y += src_stride_y;
+ if (y & 1) {
+ src_u += src_stride_u;
+ src_v += src_stride_v;
+ }
+ }
+ return 0;
+}
+
+// Convert I420 to AR30.
+LIBYUV_API
+int I420ToAR30(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_ar30,
+ int dst_stride_ar30,
+ int width,
+ int height) {
+ return I420ToAR30Matrix(src_y, src_stride_y, src_u, src_stride_u, src_v,
+ src_stride_v, dst_ar30, dst_stride_ar30,
+ &kYuvI601Constants, width, height);
+}
+
+// Convert H420 to AR30.
+LIBYUV_API
+int H420ToAR30(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_ar30,
+ int dst_stride_ar30,
+ int width,
+ int height) {
+ return I420ToAR30Matrix(src_y, src_stride_y, src_u, src_stride_u, src_v,
+ src_stride_v, dst_ar30, dst_stride_ar30,
+ &kYvuH709Constants, width, height);
+}
+
+// Convert I420 to specified format
+LIBYUV_API
+int ConvertFromI420(const uint8_t* y,
+ int y_stride,
+ const uint8_t* u,
+ int u_stride,
+ const uint8_t* v,
+ int v_stride,
+ uint8_t* dst_sample,
+ int dst_sample_stride,
+ int width,
+ int height,
+ uint32_t fourcc) {
+ uint32_t format = CanonicalFourCC(fourcc);
+ int r = 0;
+ if (!y || !u || !v || !dst_sample || width <= 0 || height == 0) {
+ return -1;
+ }
+ switch (format) {
+ // Single plane formats
+ case FOURCC_YUY2:
+ r = I420ToYUY2(y, y_stride, u, u_stride, v, v_stride, dst_sample,
+ dst_sample_stride ? dst_sample_stride : width * 2, width,
+ height);
+ break;
+ case FOURCC_UYVY:
+ r = I420ToUYVY(y, y_stride, u, u_stride, v, v_stride, dst_sample,
+ dst_sample_stride ? dst_sample_stride : width * 2, width,
+ height);
+ break;
+ case FOURCC_RGBP:
+ r = I420ToRGB565(y, y_stride, u, u_stride, v, v_stride, dst_sample,
+ dst_sample_stride ? dst_sample_stride : width * 2, width,
+ height);
+ break;
+ case FOURCC_RGBO:
+ r = I420ToARGB1555(y, y_stride, u, u_stride, v, v_stride, dst_sample,
+ dst_sample_stride ? dst_sample_stride : width * 2,
+ width, height);
+ break;
+ case FOURCC_R444:
+ r = I420ToARGB4444(y, y_stride, u, u_stride, v, v_stride, dst_sample,
+ dst_sample_stride ? dst_sample_stride : width * 2,
+ width, height);
+ break;
+ case FOURCC_24BG:
+ r = I420ToRGB24(y, y_stride, u, u_stride, v, v_stride, dst_sample,
+ dst_sample_stride ? dst_sample_stride : width * 3, width,
+ height);
+ break;
+ case FOURCC_RAW:
+ r = I420ToRAW(y, y_stride, u, u_stride, v, v_stride, dst_sample,
+ dst_sample_stride ? dst_sample_stride : width * 3, width,
+ height);
+ break;
+ case FOURCC_ARGB:
+ r = I420ToARGB(y, y_stride, u, u_stride, v, v_stride, dst_sample,
+ dst_sample_stride ? dst_sample_stride : width * 4, width,
+ height);
+ break;
+ case FOURCC_BGRA:
+ r = I420ToBGRA(y, y_stride, u, u_stride, v, v_stride, dst_sample,
+ dst_sample_stride ? dst_sample_stride : width * 4, width,
+ height);
+ break;
+ case FOURCC_ABGR:
+ r = I420ToABGR(y, y_stride, u, u_stride, v, v_stride, dst_sample,
+ dst_sample_stride ? dst_sample_stride : width * 4, width,
+ height);
+ break;
+ case FOURCC_RGBA:
+ r = I420ToRGBA(y, y_stride, u, u_stride, v, v_stride, dst_sample,
+ dst_sample_stride ? dst_sample_stride : width * 4, width,
+ height);
+ break;
+ case FOURCC_AR30:
+ r = I420ToAR30(y, y_stride, u, u_stride, v, v_stride, dst_sample,
+ dst_sample_stride ? dst_sample_stride : width * 4, width,
+ height);
+ break;
+ case FOURCC_I400:
+ r = I400Copy(y, y_stride, dst_sample,
+ dst_sample_stride ? dst_sample_stride : width, width,
+ height);
+ break;
+ case FOURCC_NV12: {
+ uint8_t* dst_uv = dst_sample + width * height;
+ r = I420ToNV12(y, y_stride, u, u_stride, v, v_stride, dst_sample,
+ dst_sample_stride ? dst_sample_stride : width, dst_uv,
+ dst_sample_stride ? dst_sample_stride : width, width,
+ height);
+ break;
+ }
+ case FOURCC_NV21: {
+ uint8_t* dst_vu = dst_sample + width * height;
+ r = I420ToNV21(y, y_stride, u, u_stride, v, v_stride, dst_sample,
+ dst_sample_stride ? dst_sample_stride : width, dst_vu,
+ dst_sample_stride ? dst_sample_stride : width, width,
+ height);
+ break;
+ }
+ // TODO(fbarchard): Add M420.
+ // Triplanar formats
+ case FOURCC_I420:
+ case FOURCC_YV12: {
+ dst_sample_stride = dst_sample_stride ? dst_sample_stride : width;
+ int halfstride = (dst_sample_stride + 1) / 2;
+ int halfheight = (height + 1) / 2;
+ uint8_t* dst_u;
+ uint8_t* dst_v;
+ if (format == FOURCC_YV12) {
+ dst_v = dst_sample + dst_sample_stride * height;
+ dst_u = dst_v + halfstride * halfheight;
+ } else {
+ dst_u = dst_sample + dst_sample_stride * height;
+ dst_v = dst_u + halfstride * halfheight;
+ }
+ r = I420Copy(y, y_stride, u, u_stride, v, v_stride, dst_sample,
+ dst_sample_stride, dst_u, halfstride, dst_v, halfstride,
+ width, height);
+ break;
+ }
+ case FOURCC_I422:
+ case FOURCC_YV16: {
+ dst_sample_stride = dst_sample_stride ? dst_sample_stride : width;
+ int halfstride = (dst_sample_stride + 1) / 2;
+ uint8_t* dst_u;
+ uint8_t* dst_v;
+ if (format == FOURCC_YV16) {
+ dst_v = dst_sample + dst_sample_stride * height;
+ dst_u = dst_v + halfstride * height;
+ } else {
+ dst_u = dst_sample + dst_sample_stride * height;
+ dst_v = dst_u + halfstride * height;
+ }
+ r = I420ToI422(y, y_stride, u, u_stride, v, v_stride, dst_sample,
+ dst_sample_stride, dst_u, halfstride, dst_v, halfstride,
+ width, height);
+ break;
+ }
+ case FOURCC_I444:
+ case FOURCC_YV24: {
+ dst_sample_stride = dst_sample_stride ? dst_sample_stride : width;
+ uint8_t* dst_u;
+ uint8_t* dst_v;
+ if (format == FOURCC_YV24) {
+ dst_v = dst_sample + dst_sample_stride * height;
+ dst_u = dst_v + dst_sample_stride * height;
+ } else {
+ dst_u = dst_sample + dst_sample_stride * height;
+ dst_v = dst_u + dst_sample_stride * height;
+ }
+ r = I420ToI444(y, y_stride, u, u_stride, v, v_stride, dst_sample,
+ dst_sample_stride, dst_u, dst_sample_stride, dst_v,
+ dst_sample_stride, width, height);
+ break;
+ }
+ // Formats not supported - MJPG, biplanar, some rgb formats.
+ default:
+ return -1; // unknown fourcc - return failure code.
+ }
+ return r;
+}
+
+#ifdef __cplusplus
+} // extern "C"
+} // namespace libyuv
+#endif
diff --git a/media/libvpx/libvpx/third_party/libyuv/source/convert_from_argb.cc b/media/libvpx/libvpx/third_party/libyuv/source/convert_from_argb.cc
new file mode 100644
index 0000000000..c8d91252e9
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/libyuv/source/convert_from_argb.cc
@@ -0,0 +1,1617 @@
+/*
+ * Copyright 2012 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/convert_from_argb.h"
+
+#include "libyuv/basic_types.h"
+#include "libyuv/cpu_id.h"
+#include "libyuv/planar_functions.h"
+#include "libyuv/row.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+// ARGB little endian (bgra in memory) to I444
+LIBYUV_API
+int ARGBToI444(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height) {
+ int y;
+ void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
+ ARGBToYRow_C;
+ void (*ARGBToUV444Row)(const uint8_t* src_argb, uint8_t* dst_u,
+ uint8_t* dst_v, int width) = ARGBToUV444Row_C;
+ if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
+ return -1;
+ }
+ if (height < 0) {
+ height = -height;
+ src_argb = src_argb + (height - 1) * src_stride_argb;
+ src_stride_argb = -src_stride_argb;
+ }
+ // Coalesce rows.
+ if (src_stride_argb == width * 4 && dst_stride_y == width &&
+ dst_stride_u == width && dst_stride_v == width) {
+ width *= height;
+ height = 1;
+ src_stride_argb = dst_stride_y = dst_stride_u = dst_stride_v = 0;
+ }
+#if defined(HAS_ARGBTOUV444ROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ ARGBToUV444Row = ARGBToUV444Row_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToUV444Row = ARGBToUV444Row_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOUV444ROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ARGBToUV444Row = ARGBToUV444Row_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBToUV444Row = ARGBToUV444Row_NEON;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOUV444ROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ ARGBToUV444Row = ARGBToUV444Row_Any_MSA;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToUV444Row = ARGBToUV444Row_MSA;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ ARGBToYRow = ARGBToYRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToYRow = ARGBToYRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBToYRow = ARGBToYRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ ARGBToYRow = ARGBToYRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ARGBToYRow = ARGBToYRow_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBToYRow = ARGBToYRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ ARGBToYRow = ARGBToYRow_Any_MSA;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToYRow = ARGBToYRow_MSA;
+ }
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ ARGBToUV444Row(src_argb, dst_u, dst_v, width);
+ ARGBToYRow(src_argb, dst_y, width);
+ src_argb += src_stride_argb;
+ dst_y += dst_stride_y;
+ dst_u += dst_stride_u;
+ dst_v += dst_stride_v;
+ }
+ return 0;
+}
+
+// ARGB little endian (bgra in memory) to I422
+LIBYUV_API
+int ARGBToI422(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height) {
+ int y;
+ void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb,
+ uint8_t* dst_u, uint8_t* dst_v, int width) =
+ ARGBToUVRow_C;
+ void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
+ ARGBToYRow_C;
+ if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_argb = src_argb + (height - 1) * src_stride_argb;
+ src_stride_argb = -src_stride_argb;
+ }
+ // Coalesce rows.
+ if (src_stride_argb == width * 4 && dst_stride_y == width &&
+ dst_stride_u * 2 == width && dst_stride_v * 2 == width) {
+ width *= height;
+ height = 1;
+ src_stride_argb = dst_stride_y = dst_stride_u = dst_stride_v = 0;
+ }
+#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
+ ARGBToYRow = ARGBToYRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToUVRow = ARGBToUVRow_SSSE3;
+ ARGBToYRow = ARGBToYRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBToUVRow = ARGBToUVRow_Any_AVX2;
+ ARGBToYRow = ARGBToYRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ ARGBToUVRow = ARGBToUVRow_AVX2;
+ ARGBToYRow = ARGBToYRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ARGBToYRow = ARGBToYRow_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBToYRow = ARGBToYRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOUVROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ARGBToUVRow = ARGBToUVRow_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToUVRow = ARGBToUVRow_NEON;
+ }
+ }
+#endif
+
+#if defined(HAS_ARGBTOYROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ ARGBToYRow = ARGBToYRow_Any_MSA;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToYRow = ARGBToYRow_MSA;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOUVROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ ARGBToUVRow = ARGBToUVRow_Any_MSA;
+ if (IS_ALIGNED(width, 32)) {
+ ARGBToUVRow = ARGBToUVRow_MSA;
+ }
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ ARGBToUVRow(src_argb, 0, dst_u, dst_v, width);
+ ARGBToYRow(src_argb, dst_y, width);
+ src_argb += src_stride_argb;
+ dst_y += dst_stride_y;
+ dst_u += dst_stride_u;
+ dst_v += dst_stride_v;
+ }
+ return 0;
+}
+
+LIBYUV_API
+int ARGBToNV12(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_uv,
+ int dst_stride_uv,
+ int width,
+ int height) {
+ int y;
+ int halfwidth = (width + 1) >> 1;
+ void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb,
+ uint8_t* dst_u, uint8_t* dst_v, int width) =
+ ARGBToUVRow_C;
+ void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
+ ARGBToYRow_C;
+ void (*MergeUVRow_)(const uint8_t* src_u, const uint8_t* src_v,
+ uint8_t* dst_uv, int width) = MergeUVRow_C;
+ if (!src_argb || !dst_y || !dst_uv || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_argb = src_argb + (height - 1) * src_stride_argb;
+ src_stride_argb = -src_stride_argb;
+ }
+#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
+ ARGBToYRow = ARGBToYRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToUVRow = ARGBToUVRow_SSSE3;
+ ARGBToYRow = ARGBToYRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBToUVRow = ARGBToUVRow_Any_AVX2;
+ ARGBToYRow = ARGBToYRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ ARGBToUVRow = ARGBToUVRow_AVX2;
+ ARGBToYRow = ARGBToYRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ARGBToYRow = ARGBToYRow_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBToYRow = ARGBToYRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOUVROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ARGBToUVRow = ARGBToUVRow_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToUVRow = ARGBToUVRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ ARGBToYRow = ARGBToYRow_Any_MSA;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToYRow = ARGBToYRow_MSA;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOUVROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ ARGBToUVRow = ARGBToUVRow_Any_MSA;
+ if (IS_ALIGNED(width, 32)) {
+ ARGBToUVRow = ARGBToUVRow_MSA;
+ }
+ }
+#endif
+#if defined(HAS_MERGEUVROW_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ MergeUVRow_ = MergeUVRow_Any_SSE2;
+ if (IS_ALIGNED(halfwidth, 16)) {
+ MergeUVRow_ = MergeUVRow_SSE2;
+ }
+ }
+#endif
+#if defined(HAS_MERGEUVROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ MergeUVRow_ = MergeUVRow_Any_AVX2;
+ if (IS_ALIGNED(halfwidth, 32)) {
+ MergeUVRow_ = MergeUVRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_MERGEUVROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ MergeUVRow_ = MergeUVRow_Any_NEON;
+ if (IS_ALIGNED(halfwidth, 16)) {
+ MergeUVRow_ = MergeUVRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_MERGEUVROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ MergeUVRow_ = MergeUVRow_Any_MSA;
+ if (IS_ALIGNED(halfwidth, 16)) {
+ MergeUVRow_ = MergeUVRow_MSA;
+ }
+ }
+#endif
+ {
+ // Allocate a rows of uv.
+ align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2);
+ uint8_t* row_v = row_u + ((halfwidth + 31) & ~31);
+
+ for (y = 0; y < height - 1; y += 2) {
+ ARGBToUVRow(src_argb, src_stride_argb, row_u, row_v, width);
+ MergeUVRow_(row_u, row_v, dst_uv, halfwidth);
+ ARGBToYRow(src_argb, dst_y, width);
+ ARGBToYRow(src_argb + src_stride_argb, dst_y + dst_stride_y, width);
+ src_argb += src_stride_argb * 2;
+ dst_y += dst_stride_y * 2;
+ dst_uv += dst_stride_uv;
+ }
+ if (height & 1) {
+ ARGBToUVRow(src_argb, 0, row_u, row_v, width);
+ MergeUVRow_(row_u, row_v, dst_uv, halfwidth);
+ ARGBToYRow(src_argb, dst_y, width);
+ }
+ free_aligned_buffer_64(row_u);
+ }
+ return 0;
+}
+
+// Same as NV12 but U and V swapped.
+LIBYUV_API
+int ARGBToNV21(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_vu,
+ int dst_stride_vu,
+ int width,
+ int height) {
+ int y;
+ int halfwidth = (width + 1) >> 1;
+ void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb,
+ uint8_t* dst_u, uint8_t* dst_v, int width) =
+ ARGBToUVRow_C;
+ void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
+ ARGBToYRow_C;
+ void (*MergeUVRow_)(const uint8_t* src_u, const uint8_t* src_v,
+ uint8_t* dst_vu, int width) = MergeUVRow_C;
+ if (!src_argb || !dst_y || !dst_vu || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_argb = src_argb + (height - 1) * src_stride_argb;
+ src_stride_argb = -src_stride_argb;
+ }
+#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
+ ARGBToYRow = ARGBToYRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToUVRow = ARGBToUVRow_SSSE3;
+ ARGBToYRow = ARGBToYRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBToUVRow = ARGBToUVRow_Any_AVX2;
+ ARGBToYRow = ARGBToYRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ ARGBToUVRow = ARGBToUVRow_AVX2;
+ ARGBToYRow = ARGBToYRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ARGBToYRow = ARGBToYRow_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBToYRow = ARGBToYRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOUVROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ARGBToUVRow = ARGBToUVRow_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToUVRow = ARGBToUVRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ ARGBToYRow = ARGBToYRow_Any_MSA;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToYRow = ARGBToYRow_MSA;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOUVROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ ARGBToUVRow = ARGBToUVRow_Any_MSA;
+ if (IS_ALIGNED(width, 32)) {
+ ARGBToUVRow = ARGBToUVRow_MSA;
+ }
+ }
+#endif
+#if defined(HAS_MERGEUVROW_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ MergeUVRow_ = MergeUVRow_Any_SSE2;
+ if (IS_ALIGNED(halfwidth, 16)) {
+ MergeUVRow_ = MergeUVRow_SSE2;
+ }
+ }
+#endif
+#if defined(HAS_MERGEUVROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ MergeUVRow_ = MergeUVRow_Any_AVX2;
+ if (IS_ALIGNED(halfwidth, 32)) {
+ MergeUVRow_ = MergeUVRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_MERGEUVROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ MergeUVRow_ = MergeUVRow_Any_NEON;
+ if (IS_ALIGNED(halfwidth, 16)) {
+ MergeUVRow_ = MergeUVRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_MERGEUVROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ MergeUVRow_ = MergeUVRow_Any_MSA;
+ if (IS_ALIGNED(halfwidth, 16)) {
+ MergeUVRow_ = MergeUVRow_MSA;
+ }
+ }
+#endif
+ {
+ // Allocate a rows of uv.
+ align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2);
+ uint8_t* row_v = row_u + ((halfwidth + 31) & ~31);
+
+ for (y = 0; y < height - 1; y += 2) {
+ ARGBToUVRow(src_argb, src_stride_argb, row_u, row_v, width);
+ MergeUVRow_(row_v, row_u, dst_vu, halfwidth);
+ ARGBToYRow(src_argb, dst_y, width);
+ ARGBToYRow(src_argb + src_stride_argb, dst_y + dst_stride_y, width);
+ src_argb += src_stride_argb * 2;
+ dst_y += dst_stride_y * 2;
+ dst_vu += dst_stride_vu;
+ }
+ if (height & 1) {
+ ARGBToUVRow(src_argb, 0, row_u, row_v, width);
+ MergeUVRow_(row_v, row_u, dst_vu, halfwidth);
+ ARGBToYRow(src_argb, dst_y, width);
+ }
+ free_aligned_buffer_64(row_u);
+ }
+ return 0;
+}
+
+// Convert ARGB to YUY2.
+LIBYUV_API
+int ARGBToYUY2(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_yuy2,
+ int dst_stride_yuy2,
+ int width,
+ int height) {
+ int y;
+ void (*ARGBToUVRow)(const uint8_t* src_argb, int src_stride_argb,
+ uint8_t* dst_u, uint8_t* dst_v, int width) =
+ ARGBToUVRow_C;
+ void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
+ ARGBToYRow_C;
+ void (*I422ToYUY2Row)(const uint8_t* src_y, const uint8_t* src_u,
+ const uint8_t* src_v, uint8_t* dst_yuy2, int width) =
+ I422ToYUY2Row_C;
+
+ if (!src_argb || !dst_yuy2 || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ dst_yuy2 = dst_yuy2 + (height - 1) * dst_stride_yuy2;
+ dst_stride_yuy2 = -dst_stride_yuy2;
+ }
+ // Coalesce rows.
+ if (src_stride_argb == width * 4 && dst_stride_yuy2 == width * 2) {
+ width *= height;
+ height = 1;
+ src_stride_argb = dst_stride_yuy2 = 0;
+ }
+#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
+ ARGBToYRow = ARGBToYRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToUVRow = ARGBToUVRow_SSSE3;
+ ARGBToYRow = ARGBToYRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBToUVRow = ARGBToUVRow_Any_AVX2;
+ ARGBToYRow = ARGBToYRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ ARGBToUVRow = ARGBToUVRow_AVX2;
+ ARGBToYRow = ARGBToYRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ARGBToYRow = ARGBToYRow_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBToYRow = ARGBToYRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOUVROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ARGBToUVRow = ARGBToUVRow_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToUVRow = ARGBToUVRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ ARGBToYRow = ARGBToYRow_Any_MSA;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToYRow = ARGBToYRow_MSA;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOUVROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ ARGBToUVRow = ARGBToUVRow_Any_MSA;
+ if (IS_ALIGNED(width, 32)) {
+ ARGBToUVRow = ARGBToUVRow_MSA;
+ }
+ }
+#endif
+#if defined(HAS_I422TOYUY2ROW_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ I422ToYUY2Row = I422ToYUY2Row_Any_SSE2;
+ if (IS_ALIGNED(width, 16)) {
+ I422ToYUY2Row = I422ToYUY2Row_SSE2;
+ }
+ }
+#endif
+#if defined(HAS_I422TOYUY2ROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ I422ToYUY2Row = I422ToYUY2Row_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ I422ToYUY2Row = I422ToYUY2Row_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_I422TOYUY2ROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ I422ToYUY2Row = I422ToYUY2Row_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ I422ToYUY2Row = I422ToYUY2Row_NEON;
+ }
+ }
+#endif
+#if defined(HAS_I422TOYUY2ROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ I422ToYUY2Row = I422ToYUY2Row_Any_MSA;
+ if (IS_ALIGNED(width, 32)) {
+ I422ToYUY2Row = I422ToYUY2Row_MSA;
+ }
+ }
+#endif
+
+ {
+ // Allocate a rows of yuv.
+ align_buffer_64(row_y, ((width + 63) & ~63) * 2);
+ uint8_t* row_u = row_y + ((width + 63) & ~63);
+ uint8_t* row_v = row_u + ((width + 63) & ~63) / 2;
+
+ for (y = 0; y < height; ++y) {
+ ARGBToUVRow(src_argb, 0, row_u, row_v, width);
+ ARGBToYRow(src_argb, row_y, width);
+ I422ToYUY2Row(row_y, row_u, row_v, dst_yuy2, width);
+ src_argb += src_stride_argb;
+ dst_yuy2 += dst_stride_yuy2;
+ }
+
+ free_aligned_buffer_64(row_y);
+ }
+ return 0;
+}
+
+// Convert ARGB to UYVY.
+LIBYUV_API
+int ARGBToUYVY(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_uyvy,
+ int dst_stride_uyvy,
+ int width,
+ int height) {
+ int y;
+ void (*ARGBToUVRow)(const uint8_t* src_argb, int src_stride_argb,
+ uint8_t* dst_u, uint8_t* dst_v, int width) =
+ ARGBToUVRow_C;
+ void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
+ ARGBToYRow_C;
+ void (*I422ToUYVYRow)(const uint8_t* src_y, const uint8_t* src_u,
+ const uint8_t* src_v, uint8_t* dst_uyvy, int width) =
+ I422ToUYVYRow_C;
+
+ if (!src_argb || !dst_uyvy || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ dst_uyvy = dst_uyvy + (height - 1) * dst_stride_uyvy;
+ dst_stride_uyvy = -dst_stride_uyvy;
+ }
+ // Coalesce rows.
+ if (src_stride_argb == width * 4 && dst_stride_uyvy == width * 2) {
+ width *= height;
+ height = 1;
+ src_stride_argb = dst_stride_uyvy = 0;
+ }
+#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
+ ARGBToYRow = ARGBToYRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToUVRow = ARGBToUVRow_SSSE3;
+ ARGBToYRow = ARGBToYRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBToUVRow = ARGBToUVRow_Any_AVX2;
+ ARGBToYRow = ARGBToYRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ ARGBToUVRow = ARGBToUVRow_AVX2;
+ ARGBToYRow = ARGBToYRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ARGBToYRow = ARGBToYRow_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBToYRow = ARGBToYRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOUVROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ARGBToUVRow = ARGBToUVRow_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToUVRow = ARGBToUVRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ ARGBToYRow = ARGBToYRow_Any_MSA;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToYRow = ARGBToYRow_MSA;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOUVROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ ARGBToUVRow = ARGBToUVRow_Any_MSA;
+ if (IS_ALIGNED(width, 32)) {
+ ARGBToUVRow = ARGBToUVRow_MSA;
+ }
+ }
+#endif
+#if defined(HAS_I422TOUYVYROW_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ I422ToUYVYRow = I422ToUYVYRow_Any_SSE2;
+ if (IS_ALIGNED(width, 16)) {
+ I422ToUYVYRow = I422ToUYVYRow_SSE2;
+ }
+ }
+#endif
+#if defined(HAS_I422TOUYVYROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ I422ToUYVYRow = I422ToUYVYRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ I422ToUYVYRow = I422ToUYVYRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_I422TOUYVYROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ I422ToUYVYRow = I422ToUYVYRow_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ I422ToUYVYRow = I422ToUYVYRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_I422TOUYVYROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ I422ToUYVYRow = I422ToUYVYRow_Any_MSA;
+ if (IS_ALIGNED(width, 32)) {
+ I422ToUYVYRow = I422ToUYVYRow_MSA;
+ }
+ }
+#endif
+
+ {
+ // Allocate a rows of yuv.
+ align_buffer_64(row_y, ((width + 63) & ~63) * 2);
+ uint8_t* row_u = row_y + ((width + 63) & ~63);
+ uint8_t* row_v = row_u + ((width + 63) & ~63) / 2;
+
+ for (y = 0; y < height; ++y) {
+ ARGBToUVRow(src_argb, 0, row_u, row_v, width);
+ ARGBToYRow(src_argb, row_y, width);
+ I422ToUYVYRow(row_y, row_u, row_v, dst_uyvy, width);
+ src_argb += src_stride_argb;
+ dst_uyvy += dst_stride_uyvy;
+ }
+
+ free_aligned_buffer_64(row_y);
+ }
+ return 0;
+}
+
+// Convert ARGB to I400.
+LIBYUV_API
+int ARGBToI400(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ int width,
+ int height) {
+ int y;
+ void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
+ ARGBToYRow_C;
+ if (!src_argb || !dst_y || width <= 0 || height == 0) {
+ return -1;
+ }
+ if (height < 0) {
+ height = -height;
+ src_argb = src_argb + (height - 1) * src_stride_argb;
+ src_stride_argb = -src_stride_argb;
+ }
+ // Coalesce rows.
+ if (src_stride_argb == width * 4 && dst_stride_y == width) {
+ width *= height;
+ height = 1;
+ src_stride_argb = dst_stride_y = 0;
+ }
+#if defined(HAS_ARGBTOYROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ ARGBToYRow = ARGBToYRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToYRow = ARGBToYRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBToYRow = ARGBToYRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ ARGBToYRow = ARGBToYRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ARGBToYRow = ARGBToYRow_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBToYRow = ARGBToYRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ ARGBToYRow = ARGBToYRow_Any_MSA;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToYRow = ARGBToYRow_MSA;
+ }
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ ARGBToYRow(src_argb, dst_y, width);
+ src_argb += src_stride_argb;
+ dst_y += dst_stride_y;
+ }
+ return 0;
+}
+
+// Shuffle table for converting ARGB to RGBA.
+static const uvec8 kShuffleMaskARGBToRGBA = {
+ 3u, 0u, 1u, 2u, 7u, 4u, 5u, 6u, 11u, 8u, 9u, 10u, 15u, 12u, 13u, 14u};
+
+// Convert ARGB to RGBA.
+LIBYUV_API
+int ARGBToRGBA(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_rgba,
+ int dst_stride_rgba,
+ int width,
+ int height) {
+ return ARGBShuffle(src_argb, src_stride_argb, dst_rgba, dst_stride_rgba,
+ (const uint8_t*)(&kShuffleMaskARGBToRGBA), width, height);
+}
+
+// Convert ARGB To RGB24.
+LIBYUV_API
+int ARGBToRGB24(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_rgb24,
+ int dst_stride_rgb24,
+ int width,
+ int height) {
+ int y;
+ void (*ARGBToRGB24Row)(const uint8_t* src_argb, uint8_t* dst_rgb, int width) =
+ ARGBToRGB24Row_C;
+ if (!src_argb || !dst_rgb24 || width <= 0 || height == 0) {
+ return -1;
+ }
+ if (height < 0) {
+ height = -height;
+ src_argb = src_argb + (height - 1) * src_stride_argb;
+ src_stride_argb = -src_stride_argb;
+ }
+ // Coalesce rows.
+ if (src_stride_argb == width * 4 && dst_stride_rgb24 == width * 3) {
+ width *= height;
+ height = 1;
+ src_stride_argb = dst_stride_rgb24 = 0;
+ }
+#if defined(HAS_ARGBTORGB24ROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ ARGBToRGB24Row = ARGBToRGB24Row_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToRGB24Row = ARGBToRGB24Row_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTORGB24ROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBToRGB24Row = ARGBToRGB24Row_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ ARGBToRGB24Row = ARGBToRGB24Row_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTORGB24ROW_AVX512VBMI)
+ if (TestCpuFlag(kCpuHasAVX512VBMI)) {
+ ARGBToRGB24Row = ARGBToRGB24Row_Any_AVX512VBMI;
+ if (IS_ALIGNED(width, 32)) {
+ ARGBToRGB24Row = ARGBToRGB24Row_AVX512VBMI;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTORGB24ROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ARGBToRGB24Row = ARGBToRGB24Row_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBToRGB24Row = ARGBToRGB24Row_NEON;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTORGB24ROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ ARGBToRGB24Row = ARGBToRGB24Row_Any_MSA;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToRGB24Row = ARGBToRGB24Row_MSA;
+ }
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ ARGBToRGB24Row(src_argb, dst_rgb24, width);
+ src_argb += src_stride_argb;
+ dst_rgb24 += dst_stride_rgb24;
+ }
+ return 0;
+}
+
+// Convert ARGB To RAW.
+LIBYUV_API
+int ARGBToRAW(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_raw,
+ int dst_stride_raw,
+ int width,
+ int height) {
+ int y;
+ void (*ARGBToRAWRow)(const uint8_t* src_argb, uint8_t* dst_rgb, int width) =
+ ARGBToRAWRow_C;
+ if (!src_argb || !dst_raw || width <= 0 || height == 0) {
+ return -1;
+ }
+ if (height < 0) {
+ height = -height;
+ src_argb = src_argb + (height - 1) * src_stride_argb;
+ src_stride_argb = -src_stride_argb;
+ }
+ // Coalesce rows.
+ if (src_stride_argb == width * 4 && dst_stride_raw == width * 3) {
+ width *= height;
+ height = 1;
+ src_stride_argb = dst_stride_raw = 0;
+ }
+#if defined(HAS_ARGBTORAWROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ ARGBToRAWRow = ARGBToRAWRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToRAWRow = ARGBToRAWRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTORAWROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBToRAWRow = ARGBToRAWRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ ARGBToRAWRow = ARGBToRAWRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTORAWROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ARGBToRAWRow = ARGBToRAWRow_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBToRAWRow = ARGBToRAWRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTORAWROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ ARGBToRAWRow = ARGBToRAWRow_Any_MSA;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToRAWRow = ARGBToRAWRow_MSA;
+ }
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ ARGBToRAWRow(src_argb, dst_raw, width);
+ src_argb += src_stride_argb;
+ dst_raw += dst_stride_raw;
+ }
+ return 0;
+}
+
+// Ordered 8x8 dither for 888 to 565. Values from 0 to 7.
+static const uint8_t kDither565_4x4[16] = {
+ 0, 4, 1, 5, 6, 2, 7, 3, 1, 5, 0, 4, 7, 3, 6, 2,
+};
+
+// Convert ARGB To RGB565 with 4x4 dither matrix (16 bytes).
+LIBYUV_API
+int ARGBToRGB565Dither(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_rgb565,
+ int dst_stride_rgb565,
+ const uint8_t* dither4x4,
+ int width,
+ int height) {
+ int y;
+ void (*ARGBToRGB565DitherRow)(const uint8_t* src_argb, uint8_t* dst_rgb,
+ const uint32_t dither4, int width) =
+ ARGBToRGB565DitherRow_C;
+ if (!src_argb || !dst_rgb565 || width <= 0 || height == 0) {
+ return -1;
+ }
+ if (height < 0) {
+ height = -height;
+ src_argb = src_argb + (height - 1) * src_stride_argb;
+ src_stride_argb = -src_stride_argb;
+ }
+ if (!dither4x4) {
+ dither4x4 = kDither565_4x4;
+ }
+#if defined(HAS_ARGBTORGB565DITHERROW_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_SSE2;
+ if (IS_ALIGNED(width, 4)) {
+ ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_SSE2;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTORGB565DITHERROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_AVX2;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTORGB565DITHERROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTORGB565DITHERROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_MSA;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_MSA;
+ }
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ ARGBToRGB565DitherRow(src_argb, dst_rgb565,
+ *(const uint32_t*)(dither4x4 + ((y & 3) << 2)),
+ width);
+ src_argb += src_stride_argb;
+ dst_rgb565 += dst_stride_rgb565;
+ }
+ return 0;
+}
+
+// Convert ARGB To RGB565.
+// TODO(fbarchard): Consider using dither function low level with zeros.
+LIBYUV_API
+int ARGBToRGB565(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_rgb565,
+ int dst_stride_rgb565,
+ int width,
+ int height) {
+ int y;
+ void (*ARGBToRGB565Row)(const uint8_t* src_argb, uint8_t* dst_rgb,
+ int width) = ARGBToRGB565Row_C;
+ if (!src_argb || !dst_rgb565 || width <= 0 || height == 0) {
+ return -1;
+ }
+ if (height < 0) {
+ height = -height;
+ src_argb = src_argb + (height - 1) * src_stride_argb;
+ src_stride_argb = -src_stride_argb;
+ }
+ // Coalesce rows.
+ if (src_stride_argb == width * 4 && dst_stride_rgb565 == width * 2) {
+ width *= height;
+ height = 1;
+ src_stride_argb = dst_stride_rgb565 = 0;
+ }
+#if defined(HAS_ARGBTORGB565ROW_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ ARGBToRGB565Row = ARGBToRGB565Row_Any_SSE2;
+ if (IS_ALIGNED(width, 4)) {
+ ARGBToRGB565Row = ARGBToRGB565Row_SSE2;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTORGB565ROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBToRGB565Row = ARGBToRGB565Row_Any_AVX2;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBToRGB565Row = ARGBToRGB565Row_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTORGB565ROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ARGBToRGB565Row = ARGBToRGB565Row_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBToRGB565Row = ARGBToRGB565Row_NEON;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTORGB565ROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ ARGBToRGB565Row = ARGBToRGB565Row_Any_MSA;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBToRGB565Row = ARGBToRGB565Row_MSA;
+ }
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ ARGBToRGB565Row(src_argb, dst_rgb565, width);
+ src_argb += src_stride_argb;
+ dst_rgb565 += dst_stride_rgb565;
+ }
+ return 0;
+}
+
+// Convert ARGB To ARGB1555.
+LIBYUV_API
+int ARGBToARGB1555(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_argb1555,
+ int dst_stride_argb1555,
+ int width,
+ int height) {
+ int y;
+ void (*ARGBToARGB1555Row)(const uint8_t* src_argb, uint8_t* dst_rgb,
+ int width) = ARGBToARGB1555Row_C;
+ if (!src_argb || !dst_argb1555 || width <= 0 || height == 0) {
+ return -1;
+ }
+ if (height < 0) {
+ height = -height;
+ src_argb = src_argb + (height - 1) * src_stride_argb;
+ src_stride_argb = -src_stride_argb;
+ }
+ // Coalesce rows.
+ if (src_stride_argb == width * 4 && dst_stride_argb1555 == width * 2) {
+ width *= height;
+ height = 1;
+ src_stride_argb = dst_stride_argb1555 = 0;
+ }
+#if defined(HAS_ARGBTOARGB1555ROW_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ ARGBToARGB1555Row = ARGBToARGB1555Row_Any_SSE2;
+ if (IS_ALIGNED(width, 4)) {
+ ARGBToARGB1555Row = ARGBToARGB1555Row_SSE2;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOARGB1555ROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBToARGB1555Row = ARGBToARGB1555Row_Any_AVX2;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBToARGB1555Row = ARGBToARGB1555Row_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOARGB1555ROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ARGBToARGB1555Row = ARGBToARGB1555Row_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBToARGB1555Row = ARGBToARGB1555Row_NEON;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOARGB1555ROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ ARGBToARGB1555Row = ARGBToARGB1555Row_Any_MSA;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBToARGB1555Row = ARGBToARGB1555Row_MSA;
+ }
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ ARGBToARGB1555Row(src_argb, dst_argb1555, width);
+ src_argb += src_stride_argb;
+ dst_argb1555 += dst_stride_argb1555;
+ }
+ return 0;
+}
+
+// Convert ARGB To ARGB4444.
+LIBYUV_API
+int ARGBToARGB4444(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_argb4444,
+ int dst_stride_argb4444,
+ int width,
+ int height) {
+ int y;
+ void (*ARGBToARGB4444Row)(const uint8_t* src_argb, uint8_t* dst_rgb,
+ int width) = ARGBToARGB4444Row_C;
+ if (!src_argb || !dst_argb4444 || width <= 0 || height == 0) {
+ return -1;
+ }
+ if (height < 0) {
+ height = -height;
+ src_argb = src_argb + (height - 1) * src_stride_argb;
+ src_stride_argb = -src_stride_argb;
+ }
+ // Coalesce rows.
+ if (src_stride_argb == width * 4 && dst_stride_argb4444 == width * 2) {
+ width *= height;
+ height = 1;
+ src_stride_argb = dst_stride_argb4444 = 0;
+ }
+#if defined(HAS_ARGBTOARGB4444ROW_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ ARGBToARGB4444Row = ARGBToARGB4444Row_Any_SSE2;
+ if (IS_ALIGNED(width, 4)) {
+ ARGBToARGB4444Row = ARGBToARGB4444Row_SSE2;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOARGB4444ROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBToARGB4444Row = ARGBToARGB4444Row_Any_AVX2;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBToARGB4444Row = ARGBToARGB4444Row_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOARGB4444ROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ARGBToARGB4444Row = ARGBToARGB4444Row_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBToARGB4444Row = ARGBToARGB4444Row_NEON;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOARGB4444ROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ ARGBToARGB4444Row = ARGBToARGB4444Row_Any_MSA;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBToARGB4444Row = ARGBToARGB4444Row_MSA;
+ }
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ ARGBToARGB4444Row(src_argb, dst_argb4444, width);
+ src_argb += src_stride_argb;
+ dst_argb4444 += dst_stride_argb4444;
+ }
+ return 0;
+}
+
+// Convert ABGR To AR30.
+LIBYUV_API
+int ABGRToAR30(const uint8_t* src_abgr,
+ int src_stride_abgr,
+ uint8_t* dst_ar30,
+ int dst_stride_ar30,
+ int width,
+ int height) {
+ int y;
+ void (*ABGRToAR30Row)(const uint8_t* src_abgr, uint8_t* dst_rgb, int width) =
+ ABGRToAR30Row_C;
+ if (!src_abgr || !dst_ar30 || width <= 0 || height == 0) {
+ return -1;
+ }
+ if (height < 0) {
+ height = -height;
+ src_abgr = src_abgr + (height - 1) * src_stride_abgr;
+ src_stride_abgr = -src_stride_abgr;
+ }
+ // Coalesce rows.
+ if (src_stride_abgr == width * 4 && dst_stride_ar30 == width * 4) {
+ width *= height;
+ height = 1;
+ src_stride_abgr = dst_stride_ar30 = 0;
+ }
+#if defined(HAS_ABGRTOAR30ROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ ABGRToAR30Row = ABGRToAR30Row_Any_SSSE3;
+ if (IS_ALIGNED(width, 4)) {
+ ABGRToAR30Row = ABGRToAR30Row_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_ABGRTOAR30ROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ABGRToAR30Row = ABGRToAR30Row_Any_AVX2;
+ if (IS_ALIGNED(width, 8)) {
+ ABGRToAR30Row = ABGRToAR30Row_AVX2;
+ }
+ }
+#endif
+ for (y = 0; y < height; ++y) {
+ ABGRToAR30Row(src_abgr, dst_ar30, width);
+ src_abgr += src_stride_abgr;
+ dst_ar30 += dst_stride_ar30;
+ }
+ return 0;
+}
+
+// Convert ARGB To AR30.
+LIBYUV_API
+int ARGBToAR30(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_ar30,
+ int dst_stride_ar30,
+ int width,
+ int height) {
+ int y;
+ void (*ARGBToAR30Row)(const uint8_t* src_argb, uint8_t* dst_rgb, int width) =
+ ARGBToAR30Row_C;
+ if (!src_argb || !dst_ar30 || width <= 0 || height == 0) {
+ return -1;
+ }
+ if (height < 0) {
+ height = -height;
+ src_argb = src_argb + (height - 1) * src_stride_argb;
+ src_stride_argb = -src_stride_argb;
+ }
+ // Coalesce rows.
+ if (src_stride_argb == width * 4 && dst_stride_ar30 == width * 4) {
+ width *= height;
+ height = 1;
+ src_stride_argb = dst_stride_ar30 = 0;
+ }
+#if defined(HAS_ARGBTOAR30ROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ ARGBToAR30Row = ARGBToAR30Row_Any_SSSE3;
+ if (IS_ALIGNED(width, 4)) {
+ ARGBToAR30Row = ARGBToAR30Row_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOAR30ROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBToAR30Row = ARGBToAR30Row_Any_AVX2;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBToAR30Row = ARGBToAR30Row_AVX2;
+ }
+ }
+#endif
+ for (y = 0; y < height; ++y) {
+ ARGBToAR30Row(src_argb, dst_ar30, width);
+ src_argb += src_stride_argb;
+ dst_ar30 += dst_stride_ar30;
+ }
+ return 0;
+}
+
+// Convert ARGB to J420. (JPeg full range I420).
+LIBYUV_API
+int ARGBToJ420(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_yj,
+ int dst_stride_yj,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height) {
+ int y;
+ void (*ARGBToUVJRow)(const uint8_t* src_argb0, int src_stride_argb,
+ uint8_t* dst_u, uint8_t* dst_v, int width) =
+ ARGBToUVJRow_C;
+ void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_yj, int width) =
+ ARGBToYJRow_C;
+ if (!src_argb || !dst_yj || !dst_u || !dst_v || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_argb = src_argb + (height - 1) * src_stride_argb;
+ src_stride_argb = -src_stride_argb;
+ }
+#if defined(HAS_ARGBTOYJROW_SSSE3) && defined(HAS_ARGBTOUVJROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ ARGBToUVJRow = ARGBToUVJRow_Any_SSSE3;
+ ARGBToYJRow = ARGBToYJRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToUVJRow = ARGBToUVJRow_SSSE3;
+ ARGBToYJRow = ARGBToYJRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYJROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBToYJRow = ARGBToYJRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ ARGBToYJRow = ARGBToYJRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYJROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ARGBToYJRow = ARGBToYJRow_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBToYJRow = ARGBToYJRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOUVJROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ARGBToUVJRow = ARGBToUVJRow_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToUVJRow = ARGBToUVJRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYJROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ ARGBToYJRow = ARGBToYJRow_Any_MSA;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToYJRow = ARGBToYJRow_MSA;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOUVJROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ ARGBToUVJRow = ARGBToUVJRow_Any_MSA;
+ if (IS_ALIGNED(width, 32)) {
+ ARGBToUVJRow = ARGBToUVJRow_MSA;
+ }
+ }
+#endif
+
+ for (y = 0; y < height - 1; y += 2) {
+ ARGBToUVJRow(src_argb, src_stride_argb, dst_u, dst_v, width);
+ ARGBToYJRow(src_argb, dst_yj, width);
+ ARGBToYJRow(src_argb + src_stride_argb, dst_yj + dst_stride_yj, width);
+ src_argb += src_stride_argb * 2;
+ dst_yj += dst_stride_yj * 2;
+ dst_u += dst_stride_u;
+ dst_v += dst_stride_v;
+ }
+ if (height & 1) {
+ ARGBToUVJRow(src_argb, 0, dst_u, dst_v, width);
+ ARGBToYJRow(src_argb, dst_yj, width);
+ }
+ return 0;
+}
+
+// Convert ARGB to J422. (JPeg full range I422).
+LIBYUV_API
+int ARGBToJ422(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_yj,
+ int dst_stride_yj,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height) {
+ int y;
+ void (*ARGBToUVJRow)(const uint8_t* src_argb0, int src_stride_argb,
+ uint8_t* dst_u, uint8_t* dst_v, int width) =
+ ARGBToUVJRow_C;
+ void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_yj, int width) =
+ ARGBToYJRow_C;
+ if (!src_argb || !dst_yj || !dst_u || !dst_v || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_argb = src_argb + (height - 1) * src_stride_argb;
+ src_stride_argb = -src_stride_argb;
+ }
+ // Coalesce rows.
+ if (src_stride_argb == width * 4 && dst_stride_yj == width &&
+ dst_stride_u * 2 == width && dst_stride_v * 2 == width) {
+ width *= height;
+ height = 1;
+ src_stride_argb = dst_stride_yj = dst_stride_u = dst_stride_v = 0;
+ }
+#if defined(HAS_ARGBTOYJROW_SSSE3) && defined(HAS_ARGBTOUVJROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ ARGBToUVJRow = ARGBToUVJRow_Any_SSSE3;
+ ARGBToYJRow = ARGBToYJRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToUVJRow = ARGBToUVJRow_SSSE3;
+ ARGBToYJRow = ARGBToYJRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYJROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBToYJRow = ARGBToYJRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ ARGBToYJRow = ARGBToYJRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYJROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ARGBToYJRow = ARGBToYJRow_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBToYJRow = ARGBToYJRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOUVJROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ARGBToUVJRow = ARGBToUVJRow_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToUVJRow = ARGBToUVJRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYJROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ ARGBToYJRow = ARGBToYJRow_Any_MSA;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToYJRow = ARGBToYJRow_MSA;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOUVJROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ ARGBToUVJRow = ARGBToUVJRow_Any_MSA;
+ if (IS_ALIGNED(width, 32)) {
+ ARGBToUVJRow = ARGBToUVJRow_MSA;
+ }
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ ARGBToUVJRow(src_argb, 0, dst_u, dst_v, width);
+ ARGBToYJRow(src_argb, dst_yj, width);
+ src_argb += src_stride_argb;
+ dst_yj += dst_stride_yj;
+ dst_u += dst_stride_u;
+ dst_v += dst_stride_v;
+ }
+ return 0;
+}
+
+// Convert ARGB to J400.
+LIBYUV_API
+int ARGBToJ400(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_yj,
+ int dst_stride_yj,
+ int width,
+ int height) {
+ int y;
+ void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_yj, int width) =
+ ARGBToYJRow_C;
+ if (!src_argb || !dst_yj || width <= 0 || height == 0) {
+ return -1;
+ }
+ if (height < 0) {
+ height = -height;
+ src_argb = src_argb + (height - 1) * src_stride_argb;
+ src_stride_argb = -src_stride_argb;
+ }
+ // Coalesce rows.
+ if (src_stride_argb == width * 4 && dst_stride_yj == width) {
+ width *= height;
+ height = 1;
+ src_stride_argb = dst_stride_yj = 0;
+ }
+#if defined(HAS_ARGBTOYJROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ ARGBToYJRow = ARGBToYJRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToYJRow = ARGBToYJRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYJROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBToYJRow = ARGBToYJRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ ARGBToYJRow = ARGBToYJRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYJROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ARGBToYJRow = ARGBToYJRow_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBToYJRow = ARGBToYJRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYJROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ ARGBToYJRow = ARGBToYJRow_Any_MSA;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToYJRow = ARGBToYJRow_MSA;
+ }
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ ARGBToYJRow(src_argb, dst_yj, width);
+ src_argb += src_stride_argb;
+ dst_yj += dst_stride_yj;
+ }
+ return 0;
+}
+
+#ifdef __cplusplus
+} // extern "C"
+} // namespace libyuv
+#endif
diff --git a/media/libvpx/libvpx/third_party/libyuv/source/convert_jpeg.cc b/media/libvpx/libvpx/third_party/libyuv/source/convert_jpeg.cc
new file mode 100644
index 0000000000..ae3cc18cd2
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/libyuv/source/convert_jpeg.cc
@@ -0,0 +1,332 @@
+/*
+ * Copyright 2011 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/convert.h"
+#include "libyuv/convert_argb.h"
+
+#ifdef HAVE_JPEG
+#include "libyuv/mjpeg_decoder.h"
+#endif
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+#ifdef HAVE_JPEG
+struct I420Buffers {
+ uint8_t* y;
+ int y_stride;
+ uint8_t* u;
+ int u_stride;
+ uint8_t* v;
+ int v_stride;
+ int w;
+ int h;
+};
+
+static void JpegCopyI420(void* opaque,
+ const uint8_t* const* data,
+ const int* strides,
+ int rows) {
+ I420Buffers* dest = (I420Buffers*)(opaque);
+ I420Copy(data[0], strides[0], data[1], strides[1], data[2], strides[2],
+ dest->y, dest->y_stride, dest->u, dest->u_stride, dest->v,
+ dest->v_stride, dest->w, rows);
+ dest->y += rows * dest->y_stride;
+ dest->u += ((rows + 1) >> 1) * dest->u_stride;
+ dest->v += ((rows + 1) >> 1) * dest->v_stride;
+ dest->h -= rows;
+}
+
+static void JpegI422ToI420(void* opaque,
+ const uint8_t* const* data,
+ const int* strides,
+ int rows) {
+ I420Buffers* dest = (I420Buffers*)(opaque);
+ I422ToI420(data[0], strides[0], data[1], strides[1], data[2], strides[2],
+ dest->y, dest->y_stride, dest->u, dest->u_stride, dest->v,
+ dest->v_stride, dest->w, rows);
+ dest->y += rows * dest->y_stride;
+ dest->u += ((rows + 1) >> 1) * dest->u_stride;
+ dest->v += ((rows + 1) >> 1) * dest->v_stride;
+ dest->h -= rows;
+}
+
+static void JpegI444ToI420(void* opaque,
+ const uint8_t* const* data,
+ const int* strides,
+ int rows) {
+ I420Buffers* dest = (I420Buffers*)(opaque);
+ I444ToI420(data[0], strides[0], data[1], strides[1], data[2], strides[2],
+ dest->y, dest->y_stride, dest->u, dest->u_stride, dest->v,
+ dest->v_stride, dest->w, rows);
+ dest->y += rows * dest->y_stride;
+ dest->u += ((rows + 1) >> 1) * dest->u_stride;
+ dest->v += ((rows + 1) >> 1) * dest->v_stride;
+ dest->h -= rows;
+}
+
+static void JpegI400ToI420(void* opaque,
+ const uint8_t* const* data,
+ const int* strides,
+ int rows) {
+ I420Buffers* dest = (I420Buffers*)(opaque);
+ I400ToI420(data[0], strides[0], dest->y, dest->y_stride, dest->u,
+ dest->u_stride, dest->v, dest->v_stride, dest->w, rows);
+ dest->y += rows * dest->y_stride;
+ dest->u += ((rows + 1) >> 1) * dest->u_stride;
+ dest->v += ((rows + 1) >> 1) * dest->v_stride;
+ dest->h -= rows;
+}
+
+// Query size of MJPG in pixels.
+LIBYUV_API
+int MJPGSize(const uint8_t* sample,
+ size_t sample_size,
+ int* width,
+ int* height) {
+ MJpegDecoder mjpeg_decoder;
+ LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(sample, sample_size);
+ if (ret) {
+ *width = mjpeg_decoder.GetWidth();
+ *height = mjpeg_decoder.GetHeight();
+ }
+ mjpeg_decoder.UnloadFrame();
+ return ret ? 0 : -1; // -1 for runtime failure.
+}
+
+// MJPG (Motion JPeg) to I420
+// TODO(fbarchard): review src_width and src_height requirement. dst_width and
+// dst_height may be enough.
+LIBYUV_API
+int MJPGToI420(const uint8_t* sample,
+ size_t sample_size,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height) {
+ if (sample_size == kUnknownDataSize) {
+ // ERROR: MJPEG frame size unknown
+ return -1;
+ }
+
+ // TODO(fbarchard): Port MJpeg to C.
+ MJpegDecoder mjpeg_decoder;
+ LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(sample, sample_size);
+ if (ret && (mjpeg_decoder.GetWidth() != src_width ||
+ mjpeg_decoder.GetHeight() != src_height)) {
+ // ERROR: MJPEG frame has unexpected dimensions
+ mjpeg_decoder.UnloadFrame();
+ return 1; // runtime failure
+ }
+ if (ret) {
+ I420Buffers bufs = {dst_y, dst_stride_y, dst_u, dst_stride_u,
+ dst_v, dst_stride_v, dst_width, dst_height};
+ // YUV420
+ if (mjpeg_decoder.GetColorSpace() == MJpegDecoder::kColorSpaceYCbCr &&
+ mjpeg_decoder.GetNumComponents() == 3 &&
+ mjpeg_decoder.GetVertSampFactor(0) == 2 &&
+ mjpeg_decoder.GetHorizSampFactor(0) == 2 &&
+ mjpeg_decoder.GetVertSampFactor(1) == 1 &&
+ mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
+ mjpeg_decoder.GetVertSampFactor(2) == 1 &&
+ mjpeg_decoder.GetHorizSampFactor(2) == 1) {
+ ret = mjpeg_decoder.DecodeToCallback(&JpegCopyI420, &bufs, dst_width,
+ dst_height);
+ // YUV422
+ } else if (mjpeg_decoder.GetColorSpace() ==
+ MJpegDecoder::kColorSpaceYCbCr &&
+ mjpeg_decoder.GetNumComponents() == 3 &&
+ mjpeg_decoder.GetVertSampFactor(0) == 1 &&
+ mjpeg_decoder.GetHorizSampFactor(0) == 2 &&
+ mjpeg_decoder.GetVertSampFactor(1) == 1 &&
+ mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
+ mjpeg_decoder.GetVertSampFactor(2) == 1 &&
+ mjpeg_decoder.GetHorizSampFactor(2) == 1) {
+ ret = mjpeg_decoder.DecodeToCallback(&JpegI422ToI420, &bufs, dst_width,
+ dst_height);
+ // YUV444
+ } else if (mjpeg_decoder.GetColorSpace() ==
+ MJpegDecoder::kColorSpaceYCbCr &&
+ mjpeg_decoder.GetNumComponents() == 3 &&
+ mjpeg_decoder.GetVertSampFactor(0) == 1 &&
+ mjpeg_decoder.GetHorizSampFactor(0) == 1 &&
+ mjpeg_decoder.GetVertSampFactor(1) == 1 &&
+ mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
+ mjpeg_decoder.GetVertSampFactor(2) == 1 &&
+ mjpeg_decoder.GetHorizSampFactor(2) == 1) {
+ ret = mjpeg_decoder.DecodeToCallback(&JpegI444ToI420, &bufs, dst_width,
+ dst_height);
+ // YUV400
+ } else if (mjpeg_decoder.GetColorSpace() ==
+ MJpegDecoder::kColorSpaceGrayscale &&
+ mjpeg_decoder.GetNumComponents() == 1 &&
+ mjpeg_decoder.GetVertSampFactor(0) == 1 &&
+ mjpeg_decoder.GetHorizSampFactor(0) == 1) {
+ ret = mjpeg_decoder.DecodeToCallback(&JpegI400ToI420, &bufs, dst_width,
+ dst_height);
+ } else {
+ // TODO(fbarchard): Implement conversion for any other colorspace/sample
+ // factors that occur in practice.
+ // ERROR: Unable to convert MJPEG frame because format is not supported
+ mjpeg_decoder.UnloadFrame();
+ return 1;
+ }
+ }
+ return ret ? 0 : 1;
+}
+
+#ifdef HAVE_JPEG
+struct ARGBBuffers {
+ uint8_t* argb;
+ int argb_stride;
+ int w;
+ int h;
+};
+
+static void JpegI420ToARGB(void* opaque,
+ const uint8_t* const* data,
+ const int* strides,
+ int rows) {
+ ARGBBuffers* dest = (ARGBBuffers*)(opaque);
+ I420ToARGB(data[0], strides[0], data[1], strides[1], data[2], strides[2],
+ dest->argb, dest->argb_stride, dest->w, rows);
+ dest->argb += rows * dest->argb_stride;
+ dest->h -= rows;
+}
+
+static void JpegI422ToARGB(void* opaque,
+ const uint8_t* const* data,
+ const int* strides,
+ int rows) {
+ ARGBBuffers* dest = (ARGBBuffers*)(opaque);
+ I422ToARGB(data[0], strides[0], data[1], strides[1], data[2], strides[2],
+ dest->argb, dest->argb_stride, dest->w, rows);
+ dest->argb += rows * dest->argb_stride;
+ dest->h -= rows;
+}
+
+static void JpegI444ToARGB(void* opaque,
+ const uint8_t* const* data,
+ const int* strides,
+ int rows) {
+ ARGBBuffers* dest = (ARGBBuffers*)(opaque);
+ I444ToARGB(data[0], strides[0], data[1], strides[1], data[2], strides[2],
+ dest->argb, dest->argb_stride, dest->w, rows);
+ dest->argb += rows * dest->argb_stride;
+ dest->h -= rows;
+}
+
+static void JpegI400ToARGB(void* opaque,
+ const uint8_t* const* data,
+ const int* strides,
+ int rows) {
+ ARGBBuffers* dest = (ARGBBuffers*)(opaque);
+ I400ToARGB(data[0], strides[0], dest->argb, dest->argb_stride, dest->w, rows);
+ dest->argb += rows * dest->argb_stride;
+ dest->h -= rows;
+}
+
+// MJPG (Motion JPeg) to ARGB
+// TODO(fbarchard): review src_width and src_height requirement. dst_width and
+// dst_height may be enough.
+LIBYUV_API
+int MJPGToARGB(const uint8_t* sample,
+ size_t sample_size,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height) {
+ if (sample_size == kUnknownDataSize) {
+ // ERROR: MJPEG frame size unknown
+ return -1;
+ }
+
+ // TODO(fbarchard): Port MJpeg to C.
+ MJpegDecoder mjpeg_decoder;
+ LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(sample, sample_size);
+ if (ret && (mjpeg_decoder.GetWidth() != src_width ||
+ mjpeg_decoder.GetHeight() != src_height)) {
+ // ERROR: MJPEG frame has unexpected dimensions
+ mjpeg_decoder.UnloadFrame();
+ return 1; // runtime failure
+ }
+ if (ret) {
+ ARGBBuffers bufs = {dst_argb, dst_stride_argb, dst_width, dst_height};
+ // YUV420
+ if (mjpeg_decoder.GetColorSpace() == MJpegDecoder::kColorSpaceYCbCr &&
+ mjpeg_decoder.GetNumComponents() == 3 &&
+ mjpeg_decoder.GetVertSampFactor(0) == 2 &&
+ mjpeg_decoder.GetHorizSampFactor(0) == 2 &&
+ mjpeg_decoder.GetVertSampFactor(1) == 1 &&
+ mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
+ mjpeg_decoder.GetVertSampFactor(2) == 1 &&
+ mjpeg_decoder.GetHorizSampFactor(2) == 1) {
+ ret = mjpeg_decoder.DecodeToCallback(&JpegI420ToARGB, &bufs, dst_width,
+ dst_height);
+ // YUV422
+ } else if (mjpeg_decoder.GetColorSpace() ==
+ MJpegDecoder::kColorSpaceYCbCr &&
+ mjpeg_decoder.GetNumComponents() == 3 &&
+ mjpeg_decoder.GetVertSampFactor(0) == 1 &&
+ mjpeg_decoder.GetHorizSampFactor(0) == 2 &&
+ mjpeg_decoder.GetVertSampFactor(1) == 1 &&
+ mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
+ mjpeg_decoder.GetVertSampFactor(2) == 1 &&
+ mjpeg_decoder.GetHorizSampFactor(2) == 1) {
+ ret = mjpeg_decoder.DecodeToCallback(&JpegI422ToARGB, &bufs, dst_width,
+ dst_height);
+ // YUV444
+ } else if (mjpeg_decoder.GetColorSpace() ==
+ MJpegDecoder::kColorSpaceYCbCr &&
+ mjpeg_decoder.GetNumComponents() == 3 &&
+ mjpeg_decoder.GetVertSampFactor(0) == 1 &&
+ mjpeg_decoder.GetHorizSampFactor(0) == 1 &&
+ mjpeg_decoder.GetVertSampFactor(1) == 1 &&
+ mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
+ mjpeg_decoder.GetVertSampFactor(2) == 1 &&
+ mjpeg_decoder.GetHorizSampFactor(2) == 1) {
+ ret = mjpeg_decoder.DecodeToCallback(&JpegI444ToARGB, &bufs, dst_width,
+ dst_height);
+ // YUV400
+ } else if (mjpeg_decoder.GetColorSpace() ==
+ MJpegDecoder::kColorSpaceGrayscale &&
+ mjpeg_decoder.GetNumComponents() == 1 &&
+ mjpeg_decoder.GetVertSampFactor(0) == 1 &&
+ mjpeg_decoder.GetHorizSampFactor(0) == 1) {
+ ret = mjpeg_decoder.DecodeToCallback(&JpegI400ToARGB, &bufs, dst_width,
+ dst_height);
+ } else {
+ // TODO(fbarchard): Implement conversion for any other colorspace/sample
+ // factors that occur in practice.
+ // ERROR: Unable to convert MJPEG frame because format is not supported
+ mjpeg_decoder.UnloadFrame();
+ return 1;
+ }
+ }
+ return ret ? 0 : 1;
+}
+#endif
+
+#endif
+
+#ifdef __cplusplus
+} // extern "C"
+} // namespace libyuv
+#endif
diff --git a/media/libvpx/libvpx/third_party/libyuv/source/convert_to_argb.cc b/media/libvpx/libvpx/third_party/libyuv/source/convert_to_argb.cc
new file mode 100644
index 0000000000..67484522c0
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/libyuv/source/convert_to_argb.cc
@@ -0,0 +1,291 @@
+/*
+ * Copyright 2011 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/convert_argb.h"
+
+#include "libyuv/cpu_id.h"
+#ifdef HAVE_JPEG
+#include "libyuv/mjpeg_decoder.h"
+#endif
+#include "libyuv/rotate_argb.h"
+#include "libyuv/row.h"
+#include "libyuv/video_common.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+// Convert camera sample to ARGB with cropping, rotation and vertical flip.
+// src_width is used for source stride computation
+// src_height is used to compute location of planes, and indicate inversion
+// sample_size is measured in bytes and is the size of the frame.
+// With MJPEG it is the compressed size of the frame.
+
+// TODO(fbarchard): Add the following:
+// H010ToARGB
+// H420ToARGB
+// H422ToARGB
+// I010ToARGB
+// J400ToARGB
+// J422ToARGB
+// J444ToARGB
+
+LIBYUV_API
+int ConvertToARGB(const uint8_t* sample,
+ size_t sample_size,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int crop_x,
+ int crop_y,
+ int src_width,
+ int src_height,
+ int crop_width,
+ int crop_height,
+ enum RotationMode rotation,
+ uint32_t fourcc) {
+ uint32_t format = CanonicalFourCC(fourcc);
+ int aligned_src_width = (src_width + 1) & ~1;
+ const uint8_t* src;
+ const uint8_t* src_uv;
+ int abs_src_height = (src_height < 0) ? -src_height : src_height;
+ int inv_crop_height = (crop_height < 0) ? -crop_height : crop_height;
+ int r = 0;
+
+ // One pass rotation is available for some formats. For the rest, convert
+ // to ARGB (with optional vertical flipping) into a temporary ARGB buffer,
+ // and then rotate the ARGB to the final destination buffer.
+ // For in-place conversion, if destination dst_argb is same as source sample,
+ // also enable temporary buffer.
+ LIBYUV_BOOL need_buf =
+ (rotation && format != FOURCC_ARGB) || dst_argb == sample;
+ uint8_t* dest_argb = dst_argb;
+ int dest_dst_stride_argb = dst_stride_argb;
+ uint8_t* rotate_buffer = NULL;
+ int abs_crop_height = (crop_height < 0) ? -crop_height : crop_height;
+
+ if (dst_argb == NULL || sample == NULL || src_width <= 0 || crop_width <= 0 ||
+ src_height == 0 || crop_height == 0) {
+ return -1;
+ }
+ if (src_height < 0) {
+ inv_crop_height = -inv_crop_height;
+ }
+
+ if (need_buf) {
+ int argb_size = crop_width * 4 * abs_crop_height;
+ rotate_buffer = (uint8_t*)malloc(argb_size); /* NOLINT */
+ if (!rotate_buffer) {
+ return 1; // Out of memory runtime error.
+ }
+ dst_argb = rotate_buffer;
+ dst_stride_argb = crop_width * 4;
+ }
+
+ switch (format) {
+ // Single plane formats
+ case FOURCC_YUY2:
+ src = sample + (aligned_src_width * crop_y + crop_x) * 2;
+ r = YUY2ToARGB(src, aligned_src_width * 2, dst_argb, dst_stride_argb,
+ crop_width, inv_crop_height);
+ break;
+ case FOURCC_UYVY:
+ src = sample + (aligned_src_width * crop_y + crop_x) * 2;
+ r = UYVYToARGB(src, aligned_src_width * 2, dst_argb, dst_stride_argb,
+ crop_width, inv_crop_height);
+ break;
+ case FOURCC_24BG:
+ src = sample + (src_width * crop_y + crop_x) * 3;
+ r = RGB24ToARGB(src, src_width * 3, dst_argb, dst_stride_argb, crop_width,
+ inv_crop_height);
+ break;
+ case FOURCC_RAW:
+ src = sample + (src_width * crop_y + crop_x) * 3;
+ r = RAWToARGB(src, src_width * 3, dst_argb, dst_stride_argb, crop_width,
+ inv_crop_height);
+ break;
+ case FOURCC_ARGB:
+ if (!need_buf && !rotation) {
+ src = sample + (src_width * crop_y + crop_x) * 4;
+ r = ARGBToARGB(src, src_width * 4, dst_argb, dst_stride_argb,
+ crop_width, inv_crop_height);
+ }
+ break;
+ case FOURCC_BGRA:
+ src = sample + (src_width * crop_y + crop_x) * 4;
+ r = BGRAToARGB(src, src_width * 4, dst_argb, dst_stride_argb, crop_width,
+ inv_crop_height);
+ break;
+ case FOURCC_ABGR:
+ src = sample + (src_width * crop_y + crop_x) * 4;
+ r = ABGRToARGB(src, src_width * 4, dst_argb, dst_stride_argb, crop_width,
+ inv_crop_height);
+ break;
+ case FOURCC_RGBA:
+ src = sample + (src_width * crop_y + crop_x) * 4;
+ r = RGBAToARGB(src, src_width * 4, dst_argb, dst_stride_argb, crop_width,
+ inv_crop_height);
+ break;
+ case FOURCC_AR30:
+ src = sample + (src_width * crop_y + crop_x) * 4;
+ r = AR30ToARGB(src, src_width * 4, dst_argb, dst_stride_argb, crop_width,
+ inv_crop_height);
+ break;
+ case FOURCC_AB30:
+ src = sample + (src_width * crop_y + crop_x) * 4;
+ r = AB30ToARGB(src, src_width * 4, dst_argb, dst_stride_argb, crop_width,
+ inv_crop_height);
+ break;
+ case FOURCC_RGBP:
+ src = sample + (src_width * crop_y + crop_x) * 2;
+ r = RGB565ToARGB(src, src_width * 2, dst_argb, dst_stride_argb,
+ crop_width, inv_crop_height);
+ break;
+ case FOURCC_RGBO:
+ src = sample + (src_width * crop_y + crop_x) * 2;
+ r = ARGB1555ToARGB(src, src_width * 2, dst_argb, dst_stride_argb,
+ crop_width, inv_crop_height);
+ break;
+ case FOURCC_R444:
+ src = sample + (src_width * crop_y + crop_x) * 2;
+ r = ARGB4444ToARGB(src, src_width * 2, dst_argb, dst_stride_argb,
+ crop_width, inv_crop_height);
+ break;
+ case FOURCC_I400:
+ src = sample + src_width * crop_y + crop_x;
+ r = I400ToARGB(src, src_width, dst_argb, dst_stride_argb, crop_width,
+ inv_crop_height);
+ break;
+
+ // Biplanar formats
+ case FOURCC_NV12:
+ src = sample + (src_width * crop_y + crop_x);
+ src_uv = sample + aligned_src_width * (abs_src_height + crop_y / 2) + crop_x;
+ r = NV12ToARGB(src, src_width, src_uv, aligned_src_width, dst_argb,
+ dst_stride_argb, crop_width, inv_crop_height);
+ break;
+ case FOURCC_NV21:
+ src = sample + (src_width * crop_y + crop_x);
+ src_uv = sample + aligned_src_width * (abs_src_height + crop_y / 2) + crop_x;
+ // Call NV12 but with u and v parameters swapped.
+ r = NV21ToARGB(src, src_width, src_uv, aligned_src_width, dst_argb,
+ dst_stride_argb, crop_width, inv_crop_height);
+ break;
+ case FOURCC_M420:
+ src = sample + (src_width * crop_y) * 12 / 8 + crop_x;
+ r = M420ToARGB(src, src_width, dst_argb, dst_stride_argb, crop_width,
+ inv_crop_height);
+ break;
+
+ // Triplanar formats
+ case FOURCC_I420:
+ case FOURCC_YV12: {
+ const uint8_t* src_y = sample + (src_width * crop_y + crop_x);
+ const uint8_t* src_u;
+ const uint8_t* src_v;
+ int halfwidth = (src_width + 1) / 2;
+ int halfheight = (abs_src_height + 1) / 2;
+ if (format == FOURCC_YV12) {
+ src_v = sample + src_width * abs_src_height +
+ (halfwidth * crop_y + crop_x) / 2;
+ src_u = sample + src_width * abs_src_height +
+ halfwidth * (halfheight + crop_y / 2) + crop_x / 2;
+ } else {
+ src_u = sample + src_width * abs_src_height +
+ (halfwidth * crop_y + crop_x) / 2;
+ src_v = sample + src_width * abs_src_height +
+ halfwidth * (halfheight + crop_y / 2) + crop_x / 2;
+ }
+ r = I420ToARGB(src_y, src_width, src_u, halfwidth, src_v, halfwidth,
+ dst_argb, dst_stride_argb, crop_width, inv_crop_height);
+ break;
+ }
+
+ case FOURCC_J420: {
+ const uint8_t* src_y = sample + (src_width * crop_y + crop_x);
+ const uint8_t* src_u;
+ const uint8_t* src_v;
+ int halfwidth = (src_width + 1) / 2;
+ int halfheight = (abs_src_height + 1) / 2;
+ src_u = sample + src_width * abs_src_height +
+ (halfwidth * crop_y + crop_x) / 2;
+ src_v = sample + src_width * abs_src_height +
+ halfwidth * (halfheight + crop_y / 2) + crop_x / 2;
+ r = J420ToARGB(src_y, src_width, src_u, halfwidth, src_v, halfwidth,
+ dst_argb, dst_stride_argb, crop_width, inv_crop_height);
+ break;
+ }
+
+ case FOURCC_I422:
+ case FOURCC_YV16: {
+ const uint8_t* src_y = sample + src_width * crop_y + crop_x;
+ const uint8_t* src_u;
+ const uint8_t* src_v;
+ int halfwidth = (src_width + 1) / 2;
+ if (format == FOURCC_YV16) {
+ src_v = sample + src_width * abs_src_height + halfwidth * crop_y +
+ crop_x / 2;
+ src_u = sample + src_width * abs_src_height +
+ halfwidth * (abs_src_height + crop_y) + crop_x / 2;
+ } else {
+ src_u = sample + src_width * abs_src_height + halfwidth * crop_y +
+ crop_x / 2;
+ src_v = sample + src_width * abs_src_height +
+ halfwidth * (abs_src_height + crop_y) + crop_x / 2;
+ }
+ r = I422ToARGB(src_y, src_width, src_u, halfwidth, src_v, halfwidth,
+ dst_argb, dst_stride_argb, crop_width, inv_crop_height);
+ break;
+ }
+ case FOURCC_I444:
+ case FOURCC_YV24: {
+ const uint8_t* src_y = sample + src_width * crop_y + crop_x;
+ const uint8_t* src_u;
+ const uint8_t* src_v;
+ if (format == FOURCC_YV24) {
+ src_v = sample + src_width * (abs_src_height + crop_y) + crop_x;
+ src_u = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x;
+ } else {
+ src_u = sample + src_width * (abs_src_height + crop_y) + crop_x;
+ src_v = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x;
+ }
+ r = I444ToARGB(src_y, src_width, src_u, src_width, src_v, src_width,
+ dst_argb, dst_stride_argb, crop_width, inv_crop_height);
+ break;
+ }
+#ifdef HAVE_JPEG
+ case FOURCC_MJPG:
+ r = MJPGToARGB(sample, sample_size, dst_argb, dst_stride_argb, src_width,
+ abs_src_height, crop_width, inv_crop_height);
+ break;
+#endif
+ default:
+ r = -1; // unknown fourcc - return failure code.
+ }
+
+ if (need_buf) {
+ if (!r) {
+ r = ARGBRotate(dst_argb, dst_stride_argb, dest_argb, dest_dst_stride_argb,
+ crop_width, abs_crop_height, rotation);
+ }
+ free(rotate_buffer);
+ } else if (rotation) {
+ src = sample + (src_width * crop_y + crop_x) * 4;
+ r = ARGBRotate(src, src_width * 4, dst_argb, dst_stride_argb, crop_width,
+ inv_crop_height, rotation);
+ }
+
+ return r;
+}
+
+#ifdef __cplusplus
+} // extern "C"
+} // namespace libyuv
+#endif
diff --git a/media/libvpx/libvpx/third_party/libyuv/source/convert_to_i420.cc b/media/libvpx/libvpx/third_party/libyuv/source/convert_to_i420.cc
new file mode 100644
index 0000000000..df08309f9b
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/libyuv/source/convert_to_i420.cc
@@ -0,0 +1,277 @@
+/*
+ * Copyright 2011 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <stdlib.h>
+
+#include "libyuv/convert.h"
+
+#include "libyuv/video_common.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+// Convert camera sample to I420 with cropping, rotation and vertical flip.
+// src_width is used for source stride computation
+// src_height is used to compute location of planes, and indicate inversion
+// sample_size is measured in bytes and is the size of the frame.
+// With MJPEG it is the compressed size of the frame.
+LIBYUV_API
+int ConvertToI420(const uint8_t* sample,
+ size_t sample_size,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int crop_x,
+ int crop_y,
+ int src_width,
+ int src_height,
+ int crop_width,
+ int crop_height,
+ enum RotationMode rotation,
+ uint32_t fourcc) {
+ uint32_t format = CanonicalFourCC(fourcc);
+ int aligned_src_width = (src_width + 1) & ~1;
+ const uint8_t* src;
+ const uint8_t* src_uv;
+ const int abs_src_height = (src_height < 0) ? -src_height : src_height;
+ // TODO(nisse): Why allow crop_height < 0?
+ const int abs_crop_height = (crop_height < 0) ? -crop_height : crop_height;
+ int r = 0;
+ LIBYUV_BOOL need_buf =
+ (rotation && format != FOURCC_I420 && format != FOURCC_NV12 &&
+ format != FOURCC_NV21 && format != FOURCC_YV12) ||
+ dst_y == sample;
+ uint8_t* tmp_y = dst_y;
+ uint8_t* tmp_u = dst_u;
+ uint8_t* tmp_v = dst_v;
+ int tmp_y_stride = dst_stride_y;
+ int tmp_u_stride = dst_stride_u;
+ int tmp_v_stride = dst_stride_v;
+ uint8_t* rotate_buffer = NULL;
+ const int inv_crop_height =
+ (src_height < 0) ? -abs_crop_height : abs_crop_height;
+
+ if (!dst_y || !dst_u || !dst_v || !sample || src_width <= 0 ||
+ crop_width <= 0 || src_height == 0 || crop_height == 0) {
+ return -1;
+ }
+
+ // One pass rotation is available for some formats. For the rest, convert
+ // to I420 (with optional vertical flipping) into a temporary I420 buffer,
+ // and then rotate the I420 to the final destination buffer.
+ // For in-place conversion, if destination dst_y is same as source sample,
+ // also enable temporary buffer.
+ if (need_buf) {
+ int y_size = crop_width * abs_crop_height;
+ int uv_size = ((crop_width + 1) / 2) * ((abs_crop_height + 1) / 2);
+ rotate_buffer = (uint8_t*)malloc(y_size + uv_size * 2); /* NOLINT */
+ if (!rotate_buffer) {
+ return 1; // Out of memory runtime error.
+ }
+ dst_y = rotate_buffer;
+ dst_u = dst_y + y_size;
+ dst_v = dst_u + uv_size;
+ dst_stride_y = crop_width;
+ dst_stride_u = dst_stride_v = ((crop_width + 1) / 2);
+ }
+
+ switch (format) {
+ // Single plane formats
+ case FOURCC_YUY2:
+ src = sample + (aligned_src_width * crop_y + crop_x) * 2;
+ r = YUY2ToI420(src, aligned_src_width * 2, dst_y, dst_stride_y, dst_u,
+ dst_stride_u, dst_v, dst_stride_v, crop_width,
+ inv_crop_height);
+ break;
+ case FOURCC_UYVY:
+ src = sample + (aligned_src_width * crop_y + crop_x) * 2;
+ r = UYVYToI420(src, aligned_src_width * 2, dst_y, dst_stride_y, dst_u,
+ dst_stride_u, dst_v, dst_stride_v, crop_width,
+ inv_crop_height);
+ break;
+ case FOURCC_RGBP:
+ src = sample + (src_width * crop_y + crop_x) * 2;
+ r = RGB565ToI420(src, src_width * 2, dst_y, dst_stride_y, dst_u,
+ dst_stride_u, dst_v, dst_stride_v, crop_width,
+ inv_crop_height);
+ break;
+ case FOURCC_RGBO:
+ src = sample + (src_width * crop_y + crop_x) * 2;
+ r = ARGB1555ToI420(src, src_width * 2, dst_y, dst_stride_y, dst_u,
+ dst_stride_u, dst_v, dst_stride_v, crop_width,
+ inv_crop_height);
+ break;
+ case FOURCC_R444:
+ src = sample + (src_width * crop_y + crop_x) * 2;
+ r = ARGB4444ToI420(src, src_width * 2, dst_y, dst_stride_y, dst_u,
+ dst_stride_u, dst_v, dst_stride_v, crop_width,
+ inv_crop_height);
+ break;
+ case FOURCC_24BG:
+ src = sample + (src_width * crop_y + crop_x) * 3;
+ r = RGB24ToI420(src, src_width * 3, dst_y, dst_stride_y, dst_u,
+ dst_stride_u, dst_v, dst_stride_v, crop_width,
+ inv_crop_height);
+ break;
+ case FOURCC_RAW:
+ src = sample + (src_width * crop_y + crop_x) * 3;
+ r = RAWToI420(src, src_width * 3, dst_y, dst_stride_y, dst_u,
+ dst_stride_u, dst_v, dst_stride_v, crop_width,
+ inv_crop_height);
+ break;
+ case FOURCC_ARGB:
+ src = sample + (src_width * crop_y + crop_x) * 4;
+ r = ARGBToI420(src, src_width * 4, dst_y, dst_stride_y, dst_u,
+ dst_stride_u, dst_v, dst_stride_v, crop_width,
+ inv_crop_height);
+ break;
+ case FOURCC_BGRA:
+ src = sample + (src_width * crop_y + crop_x) * 4;
+ r = BGRAToI420(src, src_width * 4, dst_y, dst_stride_y, dst_u,
+ dst_stride_u, dst_v, dst_stride_v, crop_width,
+ inv_crop_height);
+ break;
+ case FOURCC_ABGR:
+ src = sample + (src_width * crop_y + crop_x) * 4;
+ r = ABGRToI420(src, src_width * 4, dst_y, dst_stride_y, dst_u,
+ dst_stride_u, dst_v, dst_stride_v, crop_width,
+ inv_crop_height);
+ break;
+ case FOURCC_RGBA:
+ src = sample + (src_width * crop_y + crop_x) * 4;
+ r = RGBAToI420(src, src_width * 4, dst_y, dst_stride_y, dst_u,
+ dst_stride_u, dst_v, dst_stride_v, crop_width,
+ inv_crop_height);
+ break;
+ // TODO(fbarchard): Add AR30 and AB30
+ case FOURCC_I400:
+ src = sample + src_width * crop_y + crop_x;
+ r = I400ToI420(src, src_width, dst_y, dst_stride_y, dst_u, dst_stride_u,
+ dst_v, dst_stride_v, crop_width, inv_crop_height);
+ break;
+ // Biplanar formats
+ case FOURCC_NV12:
+ src = sample + (src_width * crop_y + crop_x);
+ src_uv = sample + (src_width * abs_src_height) +
+ ((crop_y / 2) * aligned_src_width) + ((crop_x / 2) * 2);
+ r = NV12ToI420Rotate(src, src_width, src_uv, aligned_src_width, dst_y,
+ dst_stride_y, dst_u, dst_stride_u, dst_v,
+ dst_stride_v, crop_width, inv_crop_height, rotation);
+ break;
+ case FOURCC_NV21:
+ src = sample + (src_width * crop_y + crop_x);
+ src_uv = sample + (src_width * abs_src_height) +
+ ((crop_y / 2) * aligned_src_width) + ((crop_x / 2) * 2);
+ // Call NV12 but with dst_u and dst_v parameters swapped.
+ r = NV12ToI420Rotate(src, src_width, src_uv, aligned_src_width, dst_y,
+ dst_stride_y, dst_v, dst_stride_v, dst_u,
+ dst_stride_u, crop_width, inv_crop_height, rotation);
+ break;
+ case FOURCC_M420:
+ src = sample + (src_width * crop_y) * 12 / 8 + crop_x;
+ r = M420ToI420(src, src_width, dst_y, dst_stride_y, dst_u, dst_stride_u,
+ dst_v, dst_stride_v, crop_width, inv_crop_height);
+ break;
+ // Triplanar formats
+ case FOURCC_I420:
+ case FOURCC_YV12: {
+ const uint8_t* src_y = sample + (src_width * crop_y + crop_x);
+ const uint8_t* src_u;
+ const uint8_t* src_v;
+ int halfwidth = (src_width + 1) / 2;
+ int halfheight = (abs_src_height + 1) / 2;
+ if (format == FOURCC_YV12) {
+ src_v = sample + src_width * abs_src_height +
+ (halfwidth * crop_y + crop_x) / 2;
+ src_u = sample + src_width * abs_src_height +
+ halfwidth * (halfheight + crop_y / 2) + crop_x / 2;
+ } else {
+ src_u = sample + src_width * abs_src_height +
+ (halfwidth * crop_y + crop_x) / 2;
+ src_v = sample + src_width * abs_src_height +
+ halfwidth * (halfheight + crop_y / 2) + crop_x / 2;
+ }
+ r = I420Rotate(src_y, src_width, src_u, halfwidth, src_v, halfwidth,
+ dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v,
+ dst_stride_v, crop_width, inv_crop_height, rotation);
+ break;
+ }
+ case FOURCC_I422:
+ case FOURCC_YV16: {
+ const uint8_t* src_y = sample + src_width * crop_y + crop_x;
+ const uint8_t* src_u;
+ const uint8_t* src_v;
+ int halfwidth = (src_width + 1) / 2;
+ if (format == FOURCC_YV16) {
+ src_v = sample + src_width * abs_src_height + halfwidth * crop_y +
+ crop_x / 2;
+ src_u = sample + src_width * abs_src_height +
+ halfwidth * (abs_src_height + crop_y) + crop_x / 2;
+ } else {
+ src_u = sample + src_width * abs_src_height + halfwidth * crop_y +
+ crop_x / 2;
+ src_v = sample + src_width * abs_src_height +
+ halfwidth * (abs_src_height + crop_y) + crop_x / 2;
+ }
+ r = I422ToI420(src_y, src_width, src_u, halfwidth, src_v, halfwidth,
+ dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v,
+ dst_stride_v, crop_width, inv_crop_height);
+ break;
+ }
+ case FOURCC_I444:
+ case FOURCC_YV24: {
+ const uint8_t* src_y = sample + src_width * crop_y + crop_x;
+ const uint8_t* src_u;
+ const uint8_t* src_v;
+ if (format == FOURCC_YV24) {
+ src_v = sample + src_width * (abs_src_height + crop_y) + crop_x;
+ src_u = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x;
+ } else {
+ src_u = sample + src_width * (abs_src_height + crop_y) + crop_x;
+ src_v = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x;
+ }
+ r = I444ToI420(src_y, src_width, src_u, src_width, src_v, src_width,
+ dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v,
+ dst_stride_v, crop_width, inv_crop_height);
+ break;
+ }
+#ifdef HAVE_JPEG
+ case FOURCC_MJPG:
+ r = MJPGToI420(sample, sample_size, dst_y, dst_stride_y, dst_u,
+ dst_stride_u, dst_v, dst_stride_v, src_width,
+ abs_src_height, crop_width, inv_crop_height);
+ break;
+#endif
+ default:
+ r = -1; // unknown fourcc - return failure code.
+ }
+
+ if (need_buf) {
+ if (!r) {
+ r = I420Rotate(dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v,
+ dst_stride_v, tmp_y, tmp_y_stride, tmp_u, tmp_u_stride,
+ tmp_v, tmp_v_stride, crop_width, abs_crop_height,
+ rotation);
+ }
+ free(rotate_buffer);
+ }
+
+ return r;
+}
+
+#ifdef __cplusplus
+} // extern "C"
+} // namespace libyuv
+#endif
diff --git a/media/libvpx/libvpx/third_party/libyuv/source/cpu_id.cc b/media/libvpx/libvpx/third_party/libyuv/source/cpu_id.cc
new file mode 100644
index 0000000000..31e24b6739
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/libyuv/source/cpu_id.cc
@@ -0,0 +1,276 @@
+/*
+ * Copyright 2011 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/cpu_id.h"
+
+#if defined(_MSC_VER)
+#include <intrin.h> // For __cpuidex()
+#endif
+#if !defined(__pnacl__) && !defined(__CLR_VER) && \
+ !defined(__native_client__) && (defined(_M_IX86) || defined(_M_X64)) && \
+ defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 160040219)
+#include <immintrin.h> // For _xgetbv()
+#endif
+
+// For ArmCpuCaps() but unittested on all platforms
+#include <stdio.h>
+#include <string.h>
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+// For functions that use the stack and have runtime checks for overflow,
+// use SAFEBUFFERS to avoid additional check.
+#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 160040219) && \
+ !defined(__clang__)
+#define SAFEBUFFERS __declspec(safebuffers)
+#else
+#define SAFEBUFFERS
+#endif
+
+// cpu_info_ variable for SIMD instruction sets detected.
+LIBYUV_API int cpu_info_ = 0;
+
+// TODO(fbarchard): Consider using int for cpuid so casting is not needed.
+// Low level cpuid for X86.
+#if (defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || \
+ defined(__x86_64__)) && \
+ !defined(__pnacl__) && !defined(__CLR_VER)
+LIBYUV_API
+void CpuId(int info_eax, int info_ecx, int* cpu_info) {
+#if defined(_MSC_VER)
+// Visual C version uses intrinsic or inline x86 assembly.
+#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 160040219)
+ __cpuidex(cpu_info, info_eax, info_ecx);
+#elif defined(_M_IX86)
+ __asm {
+ mov eax, info_eax
+ mov ecx, info_ecx
+ mov edi, cpu_info
+ cpuid
+ mov [edi], eax
+ mov [edi + 4], ebx
+ mov [edi + 8], ecx
+ mov [edi + 12], edx
+ }
+#else // Visual C but not x86
+ if (info_ecx == 0) {
+ __cpuid(cpu_info, info_eax);
+ } else {
+ cpu_info[3] = cpu_info[2] = cpu_info[1] = cpu_info[0] = 0u;
+ }
+#endif
+// GCC version uses inline x86 assembly.
+#else // defined(_MSC_VER)
+ int info_ebx, info_edx;
+ asm volatile(
+#if defined(__i386__) && defined(__PIC__)
+ // Preserve ebx for fpic 32 bit.
+ "mov %%ebx, %%edi \n"
+ "cpuid \n"
+ "xchg %%edi, %%ebx \n"
+ : "=D"(info_ebx),
+#else
+ "cpuid \n"
+ : "=b"(info_ebx),
+#endif // defined( __i386__) && defined(__PIC__)
+ "+a"(info_eax), "+c"(info_ecx), "=d"(info_edx));
+ cpu_info[0] = info_eax;
+ cpu_info[1] = info_ebx;
+ cpu_info[2] = info_ecx;
+ cpu_info[3] = info_edx;
+#endif // defined(_MSC_VER)
+}
+#else // (defined(_M_IX86) || defined(_M_X64) ...
+LIBYUV_API
+void CpuId(int eax, int ecx, int* cpu_info) {
+ (void)eax;
+ (void)ecx;
+ cpu_info[0] = cpu_info[1] = cpu_info[2] = cpu_info[3] = 0;
+}
+#endif
+
+// For VS2010 and earlier emit can be used:
+// _asm _emit 0x0f _asm _emit 0x01 _asm _emit 0xd0 // For VS2010 and earlier.
+// __asm {
+// xor ecx, ecx // xcr 0
+// xgetbv
+// mov xcr0, eax
+// }
+// For VS2013 and earlier 32 bit, the _xgetbv(0) optimizer produces bad code.
+// https://code.google.com/p/libyuv/issues/detail?id=529
+#if defined(_M_IX86) && (_MSC_VER < 1900)
+#pragma optimize("g", off)
+#endif
+#if (defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || \
+ defined(__x86_64__)) && \
+ !defined(__pnacl__) && !defined(__CLR_VER) && !defined(__native_client__)
+// X86 CPUs have xgetbv to detect OS saves high parts of ymm registers.
+int GetXCR0() {
+ int xcr0 = 0;
+#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 160040219)
+ xcr0 = (int)_xgetbv(0); // VS2010 SP1 required. NOLINT
+#elif defined(__i386__) || defined(__x86_64__)
+ asm(".byte 0x0f, 0x01, 0xd0" : "=a"(xcr0) : "c"(0) : "%edx");
+#endif // defined(__i386__) || defined(__x86_64__)
+ return xcr0;
+}
+#else
+// xgetbv unavailable to query for OSSave support. Return 0.
+#define GetXCR0() 0
+#endif // defined(_M_IX86) || defined(_M_X64) ..
+// Return optimization to previous setting.
+#if defined(_M_IX86) && (_MSC_VER < 1900)
+#pragma optimize("g", on)
+#endif
+
+// based on libvpx arm_cpudetect.c
+// For Arm, but public to allow testing on any CPU
+LIBYUV_API SAFEBUFFERS int ArmCpuCaps(const char* cpuinfo_name) {
+ char cpuinfo_line[512];
+ FILE* f = fopen(cpuinfo_name, "r");
+ if (!f) {
+ // Assume Neon if /proc/cpuinfo is unavailable.
+ // This will occur for Chrome sandbox for Pepper or Render process.
+ return kCpuHasNEON;
+ }
+ while (fgets(cpuinfo_line, sizeof(cpuinfo_line) - 1, f)) {
+ if (memcmp(cpuinfo_line, "Features", 8) == 0) {
+ char* p = strstr(cpuinfo_line, " neon");
+ if (p && (p[5] == ' ' || p[5] == '\n')) {
+ fclose(f);
+ return kCpuHasNEON;
+ }
+ // aarch64 uses asimd for Neon.
+ p = strstr(cpuinfo_line, " asimd");
+ if (p) {
+ fclose(f);
+ return kCpuHasNEON;
+ }
+ }
+ }
+ fclose(f);
+ return 0;
+}
+
+// TODO(fbarchard): Consider read_msa_ir().
+// TODO(fbarchard): Add unittest.
+LIBYUV_API SAFEBUFFERS int MipsCpuCaps(const char* cpuinfo_name,
+ const char ase[]) {
+ char cpuinfo_line[512];
+ FILE* f = fopen(cpuinfo_name, "r");
+ if (!f) {
+ // ase enabled if /proc/cpuinfo is unavailable.
+ if (strcmp(ase, " msa") == 0) {
+ return kCpuHasMSA;
+ }
+ return 0;
+ }
+ while (fgets(cpuinfo_line, sizeof(cpuinfo_line) - 1, f)) {
+ if (memcmp(cpuinfo_line, "ASEs implemented", 16) == 0) {
+ char* p = strstr(cpuinfo_line, ase);
+ if (p) {
+ fclose(f);
+ if (strcmp(ase, " msa") == 0) {
+ return kCpuHasMSA;
+ }
+ return 0;
+ }
+ }
+ }
+ fclose(f);
+ return 0;
+}
+
+static SAFEBUFFERS int GetCpuFlags(void) {
+ int cpu_info = 0;
+#if !defined(__pnacl__) && !defined(__CLR_VER) && \
+ (defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || \
+ defined(_M_IX86))
+ int cpu_info0[4] = {0, 0, 0, 0};
+ int cpu_info1[4] = {0, 0, 0, 0};
+ int cpu_info7[4] = {0, 0, 0, 0};
+ CpuId(0, 0, cpu_info0);
+ CpuId(1, 0, cpu_info1);
+ if (cpu_info0[0] >= 7) {
+ CpuId(7, 0, cpu_info7);
+ }
+ cpu_info = kCpuHasX86 | ((cpu_info1[3] & 0x04000000) ? kCpuHasSSE2 : 0) |
+ ((cpu_info1[2] & 0x00000200) ? kCpuHasSSSE3 : 0) |
+ ((cpu_info1[2] & 0x00080000) ? kCpuHasSSE41 : 0) |
+ ((cpu_info1[2] & 0x00100000) ? kCpuHasSSE42 : 0) |
+ ((cpu_info7[1] & 0x00000200) ? kCpuHasERMS : 0);
+
+ // AVX requires OS saves YMM registers.
+ if (((cpu_info1[2] & 0x1c000000) == 0x1c000000) && // AVX and OSXSave
+ ((GetXCR0() & 6) == 6)) { // Test OS saves YMM registers
+ cpu_info |= kCpuHasAVX | ((cpu_info7[1] & 0x00000020) ? kCpuHasAVX2 : 0) |
+ ((cpu_info1[2] & 0x00001000) ? kCpuHasFMA3 : 0) |
+ ((cpu_info1[2] & 0x20000000) ? kCpuHasF16C : 0);
+
+ // Detect AVX512bw
+ if ((GetXCR0() & 0xe0) == 0xe0) {
+ cpu_info |= (cpu_info7[1] & 0x40000000) ? kCpuHasAVX512BW : 0;
+ cpu_info |= (cpu_info7[1] & 0x80000000) ? kCpuHasAVX512VL : 0;
+ cpu_info |= (cpu_info7[2] & 0x00000002) ? kCpuHasAVX512VBMI : 0;
+ cpu_info |= (cpu_info7[2] & 0x00000040) ? kCpuHasAVX512VBMI2 : 0;
+ cpu_info |= (cpu_info7[2] & 0x00001000) ? kCpuHasAVX512VBITALG : 0;
+ cpu_info |= (cpu_info7[2] & 0x00004000) ? kCpuHasAVX512VPOPCNTDQ : 0;
+ cpu_info |= (cpu_info7[2] & 0x00000100) ? kCpuHasGFNI : 0;
+ }
+ }
+#endif
+#if defined(__mips__) && defined(__linux__)
+#if defined(__mips_msa)
+ cpu_info = MipsCpuCaps("/proc/cpuinfo", " msa");
+#endif
+ cpu_info |= kCpuHasMIPS;
+#endif
+#if defined(__arm__) || defined(__aarch64__)
+// gcc -mfpu=neon defines __ARM_NEON__
+// __ARM_NEON__ generates code that requires Neon. NaCL also requires Neon.
+// For Linux, /proc/cpuinfo can be tested but without that assume Neon.
+#if defined(__ARM_NEON__) || defined(__native_client__) || !defined(__linux__)
+ cpu_info = kCpuHasNEON;
+// For aarch64(arm64), /proc/cpuinfo's feature is not complete, e.g. no neon
+// flag in it.
+// So for aarch64, neon enabling is hard coded here.
+#endif
+#if defined(__aarch64__)
+ cpu_info = kCpuHasNEON;
+#else
+ // Linux arm parse text file for neon detect.
+ cpu_info = ArmCpuCaps("/proc/cpuinfo");
+#endif
+ cpu_info |= kCpuHasARM;
+#endif // __arm__
+ cpu_info |= kCpuInitialized;
+ return cpu_info;
+}
+
+// Note that use of this function is not thread safe.
+LIBYUV_API
+int MaskCpuFlags(int enable_flags) {
+ int cpu_info = GetCpuFlags() & enable_flags;
+ SetCpuFlags(cpu_info);
+ return cpu_info;
+}
+
+LIBYUV_API
+int InitCpuFlags(void) {
+ return MaskCpuFlags(-1);
+}
+
+#ifdef __cplusplus
+} // extern "C"
+} // namespace libyuv
+#endif
diff --git a/media/libvpx/libvpx/third_party/libyuv/source/mjpeg_decoder.cc b/media/libvpx/libvpx/third_party/libyuv/source/mjpeg_decoder.cc
new file mode 100644
index 0000000000..eaf2530130
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/libyuv/source/mjpeg_decoder.cc
@@ -0,0 +1,573 @@
+/*
+ * Copyright 2012 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/mjpeg_decoder.h"
+
+#ifdef HAVE_JPEG
+#include <assert.h>
+
+#if !defined(__pnacl__) && !defined(__CLR_VER) && \
+ !defined(COVERAGE_ENABLED) && !defined(TARGET_IPHONE_SIMULATOR)
+// Must be included before jpeglib.
+#include <setjmp.h>
+#define HAVE_SETJMP
+
+#if defined(_MSC_VER)
+// disable warning 4324: structure was padded due to __declspec(align())
+#pragma warning(disable : 4324)
+#endif
+
+#endif
+struct FILE; // For jpeglib.h.
+
+// C++ build requires extern C for jpeg internals.
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <jpeglib.h>
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#include "libyuv/planar_functions.h" // For CopyPlane().
+
+namespace libyuv {
+
+#ifdef HAVE_SETJMP
+struct SetJmpErrorMgr {
+ jpeg_error_mgr base; // Must be at the top
+ jmp_buf setjmp_buffer;
+};
+#endif
+
+const int MJpegDecoder::kColorSpaceUnknown = JCS_UNKNOWN;
+const int MJpegDecoder::kColorSpaceGrayscale = JCS_GRAYSCALE;
+const int MJpegDecoder::kColorSpaceRgb = JCS_RGB;
+const int MJpegDecoder::kColorSpaceYCbCr = JCS_YCbCr;
+const int MJpegDecoder::kColorSpaceCMYK = JCS_CMYK;
+const int MJpegDecoder::kColorSpaceYCCK = JCS_YCCK;
+
+// Methods that are passed to jpeglib.
+boolean fill_input_buffer(jpeg_decompress_struct* cinfo);
+void init_source(jpeg_decompress_struct* cinfo);
+void skip_input_data(jpeg_decompress_struct* cinfo, long num_bytes); // NOLINT
+void term_source(jpeg_decompress_struct* cinfo);
+void ErrorHandler(jpeg_common_struct* cinfo);
+void OutputHandler(jpeg_common_struct* cinfo);
+
+MJpegDecoder::MJpegDecoder()
+ : has_scanline_padding_(LIBYUV_FALSE),
+ num_outbufs_(0),
+ scanlines_(NULL),
+ scanlines_sizes_(NULL),
+ databuf_(NULL),
+ databuf_strides_(NULL) {
+ decompress_struct_ = new jpeg_decompress_struct;
+ source_mgr_ = new jpeg_source_mgr;
+#ifdef HAVE_SETJMP
+ error_mgr_ = new SetJmpErrorMgr;
+ decompress_struct_->err = jpeg_std_error(&error_mgr_->base);
+ // Override standard exit()-based error handler.
+ error_mgr_->base.error_exit = &ErrorHandler;
+ error_mgr_->base.output_message = &OutputHandler;
+#endif
+ decompress_struct_->client_data = NULL;
+ source_mgr_->init_source = &init_source;
+ source_mgr_->fill_input_buffer = &fill_input_buffer;
+ source_mgr_->skip_input_data = &skip_input_data;
+ source_mgr_->resync_to_restart = &jpeg_resync_to_restart;
+ source_mgr_->term_source = &term_source;
+ jpeg_create_decompress(decompress_struct_);
+ decompress_struct_->src = source_mgr_;
+ buf_vec_.buffers = &buf_;
+ buf_vec_.len = 1;
+}
+
+MJpegDecoder::~MJpegDecoder() {
+ jpeg_destroy_decompress(decompress_struct_);
+ delete decompress_struct_;
+ delete source_mgr_;
+#ifdef HAVE_SETJMP
+ delete error_mgr_;
+#endif
+ DestroyOutputBuffers();
+}
+
+LIBYUV_BOOL MJpegDecoder::LoadFrame(const uint8_t* src, size_t src_len) {
+ if (!ValidateJpeg(src, src_len)) {
+ return LIBYUV_FALSE;
+ }
+
+ buf_.data = src;
+ buf_.len = static_cast<int>(src_len);
+ buf_vec_.pos = 0;
+ decompress_struct_->client_data = &buf_vec_;
+#ifdef HAVE_SETJMP
+ if (setjmp(error_mgr_->setjmp_buffer)) {
+ // We called jpeg_read_header, it experienced an error, and we called
+ // longjmp() and rewound the stack to here. Return error.
+ return LIBYUV_FALSE;
+ }
+#endif
+ if (jpeg_read_header(decompress_struct_, TRUE) != JPEG_HEADER_OK) {
+ // ERROR: Bad MJPEG header
+ return LIBYUV_FALSE;
+ }
+ AllocOutputBuffers(GetNumComponents());
+ for (int i = 0; i < num_outbufs_; ++i) {
+ int scanlines_size = GetComponentScanlinesPerImcuRow(i);
+ if (scanlines_sizes_[i] != scanlines_size) {
+ if (scanlines_[i]) {
+ delete scanlines_[i];
+ }
+ scanlines_[i] = new uint8_t*[scanlines_size];
+ scanlines_sizes_[i] = scanlines_size;
+ }
+
+ // We allocate padding for the final scanline to pad it up to DCTSIZE bytes
+ // to avoid memory errors, since jpeglib only reads full MCUs blocks. For
+ // the preceding scanlines, the padding is not needed/wanted because the
+ // following addresses will already be valid (they are the initial bytes of
+ // the next scanline) and will be overwritten when jpeglib writes out that
+ // next scanline.
+ int databuf_stride = GetComponentStride(i);
+ int databuf_size = scanlines_size * databuf_stride;
+ if (databuf_strides_[i] != databuf_stride) {
+ if (databuf_[i]) {
+ delete databuf_[i];
+ }
+ databuf_[i] = new uint8_t[databuf_size];
+ databuf_strides_[i] = databuf_stride;
+ }
+
+ if (GetComponentStride(i) != GetComponentWidth(i)) {
+ has_scanline_padding_ = LIBYUV_TRUE;
+ }
+ }
+ return LIBYUV_TRUE;
+}
+
+static int DivideAndRoundUp(int numerator, int denominator) {
+ return (numerator + denominator - 1) / denominator;
+}
+
+static int DivideAndRoundDown(int numerator, int denominator) {
+ return numerator / denominator;
+}
+
+// Returns width of the last loaded frame.
+int MJpegDecoder::GetWidth() {
+ return decompress_struct_->image_width;
+}
+
+// Returns height of the last loaded frame.
+int MJpegDecoder::GetHeight() {
+ return decompress_struct_->image_height;
+}
+
+// Returns format of the last loaded frame. The return value is one of the
+// kColorSpace* constants.
+int MJpegDecoder::GetColorSpace() {
+ return decompress_struct_->jpeg_color_space;
+}
+
+// Number of color components in the color space.
+int MJpegDecoder::GetNumComponents() {
+ return decompress_struct_->num_components;
+}
+
+// Sample factors of the n-th component.
+int MJpegDecoder::GetHorizSampFactor(int component) {
+ return decompress_struct_->comp_info[component].h_samp_factor;
+}
+
+int MJpegDecoder::GetVertSampFactor(int component) {
+ return decompress_struct_->comp_info[component].v_samp_factor;
+}
+
+int MJpegDecoder::GetHorizSubSampFactor(int component) {
+ return decompress_struct_->max_h_samp_factor / GetHorizSampFactor(component);
+}
+
+int MJpegDecoder::GetVertSubSampFactor(int component) {
+ return decompress_struct_->max_v_samp_factor / GetVertSampFactor(component);
+}
+
+int MJpegDecoder::GetImageScanlinesPerImcuRow() {
+ return decompress_struct_->max_v_samp_factor * DCTSIZE;
+}
+
+int MJpegDecoder::GetComponentScanlinesPerImcuRow(int component) {
+ int vs = GetVertSubSampFactor(component);
+ return DivideAndRoundUp(GetImageScanlinesPerImcuRow(), vs);
+}
+
+int MJpegDecoder::GetComponentWidth(int component) {
+ int hs = GetHorizSubSampFactor(component);
+ return DivideAndRoundUp(GetWidth(), hs);
+}
+
+int MJpegDecoder::GetComponentHeight(int component) {
+ int vs = GetVertSubSampFactor(component);
+ return DivideAndRoundUp(GetHeight(), vs);
+}
+
+// Get width in bytes padded out to a multiple of DCTSIZE
+int MJpegDecoder::GetComponentStride(int component) {
+ return (GetComponentWidth(component) + DCTSIZE - 1) & ~(DCTSIZE - 1);
+}
+
+int MJpegDecoder::GetComponentSize(int component) {
+ return GetComponentWidth(component) * GetComponentHeight(component);
+}
+
+LIBYUV_BOOL MJpegDecoder::UnloadFrame() {
+#ifdef HAVE_SETJMP
+ if (setjmp(error_mgr_->setjmp_buffer)) {
+ // We called jpeg_abort_decompress, it experienced an error, and we called
+ // longjmp() and rewound the stack to here. Return error.
+ return LIBYUV_FALSE;
+ }
+#endif
+ jpeg_abort_decompress(decompress_struct_);
+ return LIBYUV_TRUE;
+}
+
+// TODO(fbarchard): Allow rectangle to be specified: x, y, width, height.
+LIBYUV_BOOL MJpegDecoder::DecodeToBuffers(uint8_t** planes,
+ int dst_width,
+ int dst_height) {
+ if (dst_width != GetWidth() || dst_height > GetHeight()) {
+ // ERROR: Bad dimensions
+ return LIBYUV_FALSE;
+ }
+#ifdef HAVE_SETJMP
+ if (setjmp(error_mgr_->setjmp_buffer)) {
+ // We called into jpeglib, it experienced an error sometime during this
+ // function call, and we called longjmp() and rewound the stack to here.
+ // Return error.
+ return LIBYUV_FALSE;
+ }
+#endif
+ if (!StartDecode()) {
+ return LIBYUV_FALSE;
+ }
+ SetScanlinePointers(databuf_);
+ int lines_left = dst_height;
+ // Compute amount of lines to skip to implement vertical crop.
+ // TODO(fbarchard): Ensure skip is a multiple of maximum component
+ // subsample. ie 2
+ int skip = (GetHeight() - dst_height) / 2;
+ if (skip > 0) {
+ // There is no API to skip lines in the output data, so we read them
+ // into the temp buffer.
+ while (skip >= GetImageScanlinesPerImcuRow()) {
+ if (!DecodeImcuRow()) {
+ FinishDecode();
+ return LIBYUV_FALSE;
+ }
+ skip -= GetImageScanlinesPerImcuRow();
+ }
+ if (skip > 0) {
+ // Have a partial iMCU row left over to skip. Must read it and then
+ // copy the parts we want into the destination.
+ if (!DecodeImcuRow()) {
+ FinishDecode();
+ return LIBYUV_FALSE;
+ }
+ for (int i = 0; i < num_outbufs_; ++i) {
+ // TODO(fbarchard): Compute skip to avoid this
+ assert(skip % GetVertSubSampFactor(i) == 0);
+ int rows_to_skip = DivideAndRoundDown(skip, GetVertSubSampFactor(i));
+ int scanlines_to_copy =
+ GetComponentScanlinesPerImcuRow(i) - rows_to_skip;
+ int data_to_skip = rows_to_skip * GetComponentStride(i);
+ CopyPlane(databuf_[i] + data_to_skip, GetComponentStride(i), planes[i],
+ GetComponentWidth(i), GetComponentWidth(i),
+ scanlines_to_copy);
+ planes[i] += scanlines_to_copy * GetComponentWidth(i);
+ }
+ lines_left -= (GetImageScanlinesPerImcuRow() - skip);
+ }
+ }
+
+ // Read full MCUs but cropped horizontally
+ for (; lines_left > GetImageScanlinesPerImcuRow();
+ lines_left -= GetImageScanlinesPerImcuRow()) {
+ if (!DecodeImcuRow()) {
+ FinishDecode();
+ return LIBYUV_FALSE;
+ }
+ for (int i = 0; i < num_outbufs_; ++i) {
+ int scanlines_to_copy = GetComponentScanlinesPerImcuRow(i);
+ CopyPlane(databuf_[i], GetComponentStride(i), planes[i],
+ GetComponentWidth(i), GetComponentWidth(i), scanlines_to_copy);
+ planes[i] += scanlines_to_copy * GetComponentWidth(i);
+ }
+ }
+
+ if (lines_left > 0) {
+ // Have a partial iMCU row left over to decode.
+ if (!DecodeImcuRow()) {
+ FinishDecode();
+ return LIBYUV_FALSE;
+ }
+ for (int i = 0; i < num_outbufs_; ++i) {
+ int scanlines_to_copy =
+ DivideAndRoundUp(lines_left, GetVertSubSampFactor(i));
+ CopyPlane(databuf_[i], GetComponentStride(i), planes[i],
+ GetComponentWidth(i), GetComponentWidth(i), scanlines_to_copy);
+ planes[i] += scanlines_to_copy * GetComponentWidth(i);
+ }
+ }
+ return FinishDecode();
+}
+
+LIBYUV_BOOL MJpegDecoder::DecodeToCallback(CallbackFunction fn,
+ void* opaque,
+ int dst_width,
+ int dst_height) {
+ if (dst_width != GetWidth() || dst_height > GetHeight()) {
+ // ERROR: Bad dimensions
+ return LIBYUV_FALSE;
+ }
+#ifdef HAVE_SETJMP
+ if (setjmp(error_mgr_->setjmp_buffer)) {
+ // We called into jpeglib, it experienced an error sometime during this
+ // function call, and we called longjmp() and rewound the stack to here.
+ // Return error.
+ return LIBYUV_FALSE;
+ }
+#endif
+ if (!StartDecode()) {
+ return LIBYUV_FALSE;
+ }
+ SetScanlinePointers(databuf_);
+ int lines_left = dst_height;
+ // TODO(fbarchard): Compute amount of lines to skip to implement vertical crop
+ int skip = (GetHeight() - dst_height) / 2;
+ if (skip > 0) {
+ while (skip >= GetImageScanlinesPerImcuRow()) {
+ if (!DecodeImcuRow()) {
+ FinishDecode();
+ return LIBYUV_FALSE;
+ }
+ skip -= GetImageScanlinesPerImcuRow();
+ }
+ if (skip > 0) {
+ // Have a partial iMCU row left over to skip.
+ if (!DecodeImcuRow()) {
+ FinishDecode();
+ return LIBYUV_FALSE;
+ }
+ for (int i = 0; i < num_outbufs_; ++i) {
+ // TODO(fbarchard): Compute skip to avoid this
+ assert(skip % GetVertSubSampFactor(i) == 0);
+ int rows_to_skip = DivideAndRoundDown(skip, GetVertSubSampFactor(i));
+ int data_to_skip = rows_to_skip * GetComponentStride(i);
+ // Change our own data buffer pointers so we can pass them to the
+ // callback.
+ databuf_[i] += data_to_skip;
+ }
+ int scanlines_to_copy = GetImageScanlinesPerImcuRow() - skip;
+ (*fn)(opaque, databuf_, databuf_strides_, scanlines_to_copy);
+ // Now change them back.
+ for (int i = 0; i < num_outbufs_; ++i) {
+ int rows_to_skip = DivideAndRoundDown(skip, GetVertSubSampFactor(i));
+ int data_to_skip = rows_to_skip * GetComponentStride(i);
+ databuf_[i] -= data_to_skip;
+ }
+ lines_left -= scanlines_to_copy;
+ }
+ }
+ // Read full MCUs until we get to the crop point.
+ for (; lines_left >= GetImageScanlinesPerImcuRow();
+ lines_left -= GetImageScanlinesPerImcuRow()) {
+ if (!DecodeImcuRow()) {
+ FinishDecode();
+ return LIBYUV_FALSE;
+ }
+ (*fn)(opaque, databuf_, databuf_strides_, GetImageScanlinesPerImcuRow());
+ }
+ if (lines_left > 0) {
+ // Have a partial iMCU row left over to decode.
+ if (!DecodeImcuRow()) {
+ FinishDecode();
+ return LIBYUV_FALSE;
+ }
+ (*fn)(opaque, databuf_, databuf_strides_, lines_left);
+ }
+ return FinishDecode();
+}
+
+void init_source(j_decompress_ptr cinfo) {
+ fill_input_buffer(cinfo);
+}
+
+boolean fill_input_buffer(j_decompress_ptr cinfo) {
+ BufferVector* buf_vec = reinterpret_cast<BufferVector*>(cinfo->client_data);
+ if (buf_vec->pos >= buf_vec->len) {
+ assert(0 && "No more data");
+ // ERROR: No more data
+ return FALSE;
+ }
+ cinfo->src->next_input_byte = buf_vec->buffers[buf_vec->pos].data;
+ cinfo->src->bytes_in_buffer = buf_vec->buffers[buf_vec->pos].len;
+ ++buf_vec->pos;
+ return TRUE;
+}
+
+void skip_input_data(j_decompress_ptr cinfo, long num_bytes) { // NOLINT
+ cinfo->src->next_input_byte += num_bytes;
+}
+
+void term_source(j_decompress_ptr cinfo) {
+ (void)cinfo; // Nothing to do.
+}
+
+#ifdef HAVE_SETJMP
+void ErrorHandler(j_common_ptr cinfo) {
+// This is called when a jpeglib command experiences an error. Unfortunately
+// jpeglib's error handling model is not very flexible, because it expects the
+// error handler to not return--i.e., it wants the program to terminate. To
+// recover from errors we use setjmp() as shown in their example. setjmp() is
+// C's implementation for the "call with current continuation" functionality
+// seen in some functional programming languages.
+// A formatted message can be output, but is unsafe for release.
+#ifdef DEBUG
+ char buf[JMSG_LENGTH_MAX];
+ (*cinfo->err->format_message)(cinfo, buf);
+// ERROR: Error in jpeglib: buf
+#endif
+
+ SetJmpErrorMgr* mgr = reinterpret_cast<SetJmpErrorMgr*>(cinfo->err);
+ // This rewinds the call stack to the point of the corresponding setjmp()
+ // and causes it to return (for a second time) with value 1.
+ longjmp(mgr->setjmp_buffer, 1);
+}
+
+// Suppress fprintf warnings.
+void OutputHandler(j_common_ptr cinfo) {
+ (void)cinfo;
+}
+
+#endif // HAVE_SETJMP
+
+void MJpegDecoder::AllocOutputBuffers(int num_outbufs) {
+ if (num_outbufs != num_outbufs_) {
+ // We could perhaps optimize this case to resize the output buffers without
+ // necessarily having to delete and recreate each one, but it's not worth
+ // it.
+ DestroyOutputBuffers();
+
+ scanlines_ = new uint8_t**[num_outbufs];
+ scanlines_sizes_ = new int[num_outbufs];
+ databuf_ = new uint8_t*[num_outbufs];
+ databuf_strides_ = new int[num_outbufs];
+
+ for (int i = 0; i < num_outbufs; ++i) {
+ scanlines_[i] = NULL;
+ scanlines_sizes_[i] = 0;
+ databuf_[i] = NULL;
+ databuf_strides_[i] = 0;
+ }
+
+ num_outbufs_ = num_outbufs;
+ }
+}
+
+void MJpegDecoder::DestroyOutputBuffers() {
+ for (int i = 0; i < num_outbufs_; ++i) {
+ delete[] scanlines_[i];
+ delete[] databuf_[i];
+ }
+ delete[] scanlines_;
+ delete[] databuf_;
+ delete[] scanlines_sizes_;
+ delete[] databuf_strides_;
+ scanlines_ = NULL;
+ databuf_ = NULL;
+ scanlines_sizes_ = NULL;
+ databuf_strides_ = NULL;
+ num_outbufs_ = 0;
+}
+
+// JDCT_IFAST and do_block_smoothing improve performance substantially.
+LIBYUV_BOOL MJpegDecoder::StartDecode() {
+ decompress_struct_->raw_data_out = TRUE;
+ decompress_struct_->dct_method = JDCT_IFAST; // JDCT_ISLOW is default
+ decompress_struct_->dither_mode = JDITHER_NONE;
+ // Not applicable to 'raw':
+ decompress_struct_->do_fancy_upsampling = (boolean)(LIBYUV_FALSE);
+ // Only for buffered mode:
+ decompress_struct_->enable_2pass_quant = (boolean)(LIBYUV_FALSE);
+ // Blocky but fast:
+ decompress_struct_->do_block_smoothing = (boolean)(LIBYUV_FALSE);
+
+ if (!jpeg_start_decompress(decompress_struct_)) {
+ // ERROR: Couldn't start JPEG decompressor";
+ return LIBYUV_FALSE;
+ }
+ return LIBYUV_TRUE;
+}
+
+LIBYUV_BOOL MJpegDecoder::FinishDecode() {
+ // jpeglib considers it an error if we finish without decoding the whole
+ // image, so we call "abort" rather than "finish".
+ jpeg_abort_decompress(decompress_struct_);
+ return LIBYUV_TRUE;
+}
+
+void MJpegDecoder::SetScanlinePointers(uint8_t** data) {
+ for (int i = 0; i < num_outbufs_; ++i) {
+ uint8_t* data_i = data[i];
+ for (int j = 0; j < scanlines_sizes_[i]; ++j) {
+ scanlines_[i][j] = data_i;
+ data_i += GetComponentStride(i);
+ }
+ }
+}
+
+inline LIBYUV_BOOL MJpegDecoder::DecodeImcuRow() {
+ return (unsigned int)(GetImageScanlinesPerImcuRow()) ==
+ jpeg_read_raw_data(decompress_struct_, scanlines_,
+ GetImageScanlinesPerImcuRow());
+}
+
+// The helper function which recognizes the jpeg sub-sampling type.
+JpegSubsamplingType MJpegDecoder::JpegSubsamplingTypeHelper(
+ int* subsample_x,
+ int* subsample_y,
+ int number_of_components) {
+ if (number_of_components == 3) { // Color images.
+ if (subsample_x[0] == 1 && subsample_y[0] == 1 && subsample_x[1] == 2 &&
+ subsample_y[1] == 2 && subsample_x[2] == 2 && subsample_y[2] == 2) {
+ return kJpegYuv420;
+ }
+ if (subsample_x[0] == 1 && subsample_y[0] == 1 && subsample_x[1] == 2 &&
+ subsample_y[1] == 1 && subsample_x[2] == 2 && subsample_y[2] == 1) {
+ return kJpegYuv422;
+ }
+ if (subsample_x[0] == 1 && subsample_y[0] == 1 && subsample_x[1] == 1 &&
+ subsample_y[1] == 1 && subsample_x[2] == 1 && subsample_y[2] == 1) {
+ return kJpegYuv444;
+ }
+ } else if (number_of_components == 1) { // Grey-scale images.
+ if (subsample_x[0] == 1 && subsample_y[0] == 1) {
+ return kJpegYuv400;
+ }
+ }
+ return kJpegUnknown;
+}
+
+} // namespace libyuv
+#endif // HAVE_JPEG
diff --git a/media/libvpx/libvpx/third_party/libyuv/source/mjpeg_validate.cc b/media/libvpx/libvpx/third_party/libyuv/source/mjpeg_validate.cc
new file mode 100644
index 0000000000..80c2cc0cb9
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/libyuv/source/mjpeg_validate.cc
@@ -0,0 +1,70 @@
+/*
+ * Copyright 2012 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/mjpeg_decoder.h"
+
+#include <string.h> // For memchr.
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+// Helper function to scan for EOI marker (0xff 0xd9).
+static LIBYUV_BOOL ScanEOI(const uint8_t* sample, size_t sample_size) {
+ if (sample_size >= 2) {
+ const uint8_t* end = sample + sample_size - 1;
+ const uint8_t* it = sample;
+ while (it < end) {
+ // TODO(fbarchard): scan for 0xd9 instead.
+ it = (const uint8_t*)(memchr(it, 0xff, end - it));
+ if (it == NULL) {
+ break;
+ }
+ if (it[1] == 0xd9) {
+ return LIBYUV_TRUE; // Success: Valid jpeg.
+ }
+ ++it; // Skip over current 0xff.
+ }
+ }
+ // ERROR: Invalid jpeg end code not found. Size sample_size
+ return LIBYUV_FALSE;
+}
+
+// Helper function to validate the jpeg appears intact.
+LIBYUV_BOOL ValidateJpeg(const uint8_t* sample, size_t sample_size) {
+ // Maximum size that ValidateJpeg will consider valid.
+ const size_t kMaxJpegSize = 0x7fffffffull;
+ const size_t kBackSearchSize = 1024;
+ if (sample_size < 64 || sample_size > kMaxJpegSize || !sample) {
+ // ERROR: Invalid jpeg size: sample_size
+ return LIBYUV_FALSE;
+ }
+ if (sample[0] != 0xff || sample[1] != 0xd8) { // SOI marker
+ // ERROR: Invalid jpeg initial start code
+ return LIBYUV_FALSE;
+ }
+
+ // Look for the End Of Image (EOI) marker near the end of the buffer.
+ if (sample_size > kBackSearchSize) {
+ if (ScanEOI(sample + sample_size - kBackSearchSize, kBackSearchSize)) {
+ return LIBYUV_TRUE; // Success: Valid jpeg.
+ }
+ // Reduce search size for forward search.
+ sample_size = sample_size - kBackSearchSize + 1;
+ }
+ // Step over SOI marker and scan for EOI.
+ return ScanEOI(sample + 2, sample_size - 2);
+}
+
+#ifdef __cplusplus
+} // extern "C"
+} // namespace libyuv
+#endif
diff --git a/media/libvpx/libvpx/third_party/libyuv/source/planar_functions.cc b/media/libvpx/libvpx/third_party/libyuv/source/planar_functions.cc
new file mode 100644
index 0000000000..5eae3f763a
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/libyuv/source/planar_functions.cc
@@ -0,0 +1,3587 @@
+/*
+ * Copyright 2011 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/planar_functions.h"
+
+#include <string.h> // for memset()
+
+#include "libyuv/cpu_id.h"
+#ifdef HAVE_JPEG
+#include "libyuv/mjpeg_decoder.h"
+#endif
+#include "libyuv/row.h"
+#include "libyuv/scale_row.h" // for ScaleRowDown2
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+// Copy a plane of data
+LIBYUV_API
+void CopyPlane(const uint8_t* src_y,
+ int src_stride_y,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ int width,
+ int height) {
+ int y;
+ void (*CopyRow)(const uint8_t* src, uint8_t* dst, int width) = CopyRow_C;
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ dst_y = dst_y + (height - 1) * dst_stride_y;
+ dst_stride_y = -dst_stride_y;
+ }
+ // Coalesce rows.
+ if (src_stride_y == width && dst_stride_y == width) {
+ width *= height;
+ height = 1;
+ src_stride_y = dst_stride_y = 0;
+ }
+ // Nothing to do.
+ if (src_y == dst_y && src_stride_y == dst_stride_y) {
+ return;
+ }
+
+#if defined(HAS_COPYROW_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ CopyRow = IS_ALIGNED(width, 32) ? CopyRow_SSE2 : CopyRow_Any_SSE2;
+ }
+#endif
+#if defined(HAS_COPYROW_AVX)
+ if (TestCpuFlag(kCpuHasAVX)) {
+ CopyRow = IS_ALIGNED(width, 64) ? CopyRow_AVX : CopyRow_Any_AVX;
+ }
+#endif
+#if defined(HAS_COPYROW_ERMS)
+ if (TestCpuFlag(kCpuHasERMS)) {
+ CopyRow = CopyRow_ERMS;
+ }
+#endif
+#if defined(HAS_COPYROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ CopyRow = IS_ALIGNED(width, 32) ? CopyRow_NEON : CopyRow_Any_NEON;
+ }
+#endif
+
+ // Copy plane
+ for (y = 0; y < height; ++y) {
+ CopyRow(src_y, dst_y, width);
+ src_y += src_stride_y;
+ dst_y += dst_stride_y;
+ }
+}
+
+// TODO(fbarchard): Consider support for negative height.
+// TODO(fbarchard): Consider stride measured in bytes.
+LIBYUV_API
+void CopyPlane_16(const uint16_t* src_y,
+ int src_stride_y,
+ uint16_t* dst_y,
+ int dst_stride_y,
+ int width,
+ int height) {
+ int y;
+ void (*CopyRow)(const uint16_t* src, uint16_t* dst, int width) = CopyRow_16_C;
+ // Coalesce rows.
+ if (src_stride_y == width && dst_stride_y == width) {
+ width *= height;
+ height = 1;
+ src_stride_y = dst_stride_y = 0;
+ }
+#if defined(HAS_COPYROW_16_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 32)) {
+ CopyRow = CopyRow_16_SSE2;
+ }
+#endif
+#if defined(HAS_COPYROW_16_ERMS)
+ if (TestCpuFlag(kCpuHasERMS)) {
+ CopyRow = CopyRow_16_ERMS;
+ }
+#endif
+#if defined(HAS_COPYROW_16_NEON)
+ if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 32)) {
+ CopyRow = CopyRow_16_NEON;
+ }
+#endif
+
+ // Copy plane
+ for (y = 0; y < height; ++y) {
+ CopyRow(src_y, dst_y, width);
+ src_y += src_stride_y;
+ dst_y += dst_stride_y;
+ }
+}
+
+// Convert a plane of 16 bit data to 8 bit
+LIBYUV_API
+void Convert16To8Plane(const uint16_t* src_y,
+ int src_stride_y,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ int scale, // 16384 for 10 bits
+ int width,
+ int height) {
+ int y;
+ void (*Convert16To8Row)(const uint16_t* src_y, uint8_t* dst_y, int scale,
+ int width) = Convert16To8Row_C;
+
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ dst_y = dst_y + (height - 1) * dst_stride_y;
+ dst_stride_y = -dst_stride_y;
+ }
+ // Coalesce rows.
+ if (src_stride_y == width && dst_stride_y == width) {
+ width *= height;
+ height = 1;
+ src_stride_y = dst_stride_y = 0;
+ }
+#if defined(HAS_CONVERT16TO8ROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ Convert16To8Row = Convert16To8Row_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ Convert16To8Row = Convert16To8Row_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_CONVERT16TO8ROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ Convert16To8Row = Convert16To8Row_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ Convert16To8Row = Convert16To8Row_AVX2;
+ }
+ }
+#endif
+
+ // Convert plane
+ for (y = 0; y < height; ++y) {
+ Convert16To8Row(src_y, dst_y, scale, width);
+ src_y += src_stride_y;
+ dst_y += dst_stride_y;
+ }
+}
+
+// Convert a plane of 8 bit data to 16 bit
+LIBYUV_API
+void Convert8To16Plane(const uint8_t* src_y,
+ int src_stride_y,
+ uint16_t* dst_y,
+ int dst_stride_y,
+ int scale, // 16384 for 10 bits
+ int width,
+ int height) {
+ int y;
+ void (*Convert8To16Row)(const uint8_t* src_y, uint16_t* dst_y, int scale,
+ int width) = Convert8To16Row_C;
+
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ dst_y = dst_y + (height - 1) * dst_stride_y;
+ dst_stride_y = -dst_stride_y;
+ }
+ // Coalesce rows.
+ if (src_stride_y == width && dst_stride_y == width) {
+ width *= height;
+ height = 1;
+ src_stride_y = dst_stride_y = 0;
+ }
+#if defined(HAS_CONVERT8TO16ROW_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ Convert8To16Row = Convert8To16Row_Any_SSE2;
+ if (IS_ALIGNED(width, 16)) {
+ Convert8To16Row = Convert8To16Row_SSE2;
+ }
+ }
+#endif
+#if defined(HAS_CONVERT8TO16ROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ Convert8To16Row = Convert8To16Row_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ Convert8To16Row = Convert8To16Row_AVX2;
+ }
+ }
+#endif
+
+ // Convert plane
+ for (y = 0; y < height; ++y) {
+ Convert8To16Row(src_y, dst_y, scale, width);
+ src_y += src_stride_y;
+ dst_y += dst_stride_y;
+ }
+}
+
+// Copy I422.
+LIBYUV_API
+int I422Copy(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height) {
+ int halfwidth = (width + 1) >> 1;
+ if (!src_u || !src_v || !dst_u || !dst_v || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_y = src_y + (height - 1) * src_stride_y;
+ src_u = src_u + (height - 1) * src_stride_u;
+ src_v = src_v + (height - 1) * src_stride_v;
+ src_stride_y = -src_stride_y;
+ src_stride_u = -src_stride_u;
+ src_stride_v = -src_stride_v;
+ }
+
+ if (dst_y) {
+ CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
+ }
+ CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, height);
+ CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, height);
+ return 0;
+}
+
+// Copy I444.
+LIBYUV_API
+int I444Copy(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height) {
+ if (!src_u || !src_v || !dst_u || !dst_v || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_y = src_y + (height - 1) * src_stride_y;
+ src_u = src_u + (height - 1) * src_stride_u;
+ src_v = src_v + (height - 1) * src_stride_v;
+ src_stride_y = -src_stride_y;
+ src_stride_u = -src_stride_u;
+ src_stride_v = -src_stride_v;
+ }
+
+ if (dst_y) {
+ CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
+ }
+ CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, width, height);
+ CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, width, height);
+ return 0;
+}
+
+// Copy I400.
+LIBYUV_API
+int I400ToI400(const uint8_t* src_y,
+ int src_stride_y,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ int width,
+ int height) {
+ if (!src_y || !dst_y || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_y = src_y + (height - 1) * src_stride_y;
+ src_stride_y = -src_stride_y;
+ }
+ CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
+ return 0;
+}
+
+// Convert I420 to I400.
+LIBYUV_API
+int I420ToI400(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ int width,
+ int height) {
+ (void)src_u;
+ (void)src_stride_u;
+ (void)src_v;
+ (void)src_stride_v;
+ if (!src_y || !dst_y || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_y = src_y + (height - 1) * src_stride_y;
+ src_stride_y = -src_stride_y;
+ }
+
+ CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
+ return 0;
+}
+
+// Support function for NV12 etc UV channels.
+// Width and height are plane sizes (typically half pixel width).
+LIBYUV_API
+void SplitUVPlane(const uint8_t* src_uv,
+ int src_stride_uv,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height) {
+ int y;
+ void (*SplitUVRow)(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v,
+ int width) = SplitUVRow_C;
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ dst_u = dst_u + (height - 1) * dst_stride_u;
+ dst_v = dst_v + (height - 1) * dst_stride_v;
+ dst_stride_u = -dst_stride_u;
+ dst_stride_v = -dst_stride_v;
+ }
+ // Coalesce rows.
+ if (src_stride_uv == width * 2 && dst_stride_u == width &&
+ dst_stride_v == width) {
+ width *= height;
+ height = 1;
+ src_stride_uv = dst_stride_u = dst_stride_v = 0;
+ }
+#if defined(HAS_SPLITUVROW_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ SplitUVRow = SplitUVRow_Any_SSE2;
+ if (IS_ALIGNED(width, 16)) {
+ SplitUVRow = SplitUVRow_SSE2;
+ }
+ }
+#endif
+#if defined(HAS_SPLITUVROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ SplitUVRow = SplitUVRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ SplitUVRow = SplitUVRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_SPLITUVROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ SplitUVRow = SplitUVRow_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ SplitUVRow = SplitUVRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_SPLITUVROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ SplitUVRow = SplitUVRow_Any_MSA;
+ if (IS_ALIGNED(width, 32)) {
+ SplitUVRow = SplitUVRow_MSA;
+ }
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ // Copy a row of UV.
+ SplitUVRow(src_uv, dst_u, dst_v, width);
+ dst_u += dst_stride_u;
+ dst_v += dst_stride_v;
+ src_uv += src_stride_uv;
+ }
+}
+
+LIBYUV_API
+void MergeUVPlane(const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_uv,
+ int dst_stride_uv,
+ int width,
+ int height) {
+ int y;
+ void (*MergeUVRow)(const uint8_t* src_u, const uint8_t* src_v,
+ uint8_t* dst_uv, int width) = MergeUVRow_C;
+ // Coalesce rows.
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ dst_uv = dst_uv + (height - 1) * dst_stride_uv;
+ dst_stride_uv = -dst_stride_uv;
+ }
+ // Coalesce rows.
+ if (src_stride_u == width && src_stride_v == width &&
+ dst_stride_uv == width * 2) {
+ width *= height;
+ height = 1;
+ src_stride_u = src_stride_v = dst_stride_uv = 0;
+ }
+#if defined(HAS_MERGEUVROW_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ MergeUVRow = MergeUVRow_Any_SSE2;
+ if (IS_ALIGNED(width, 16)) {
+ MergeUVRow = MergeUVRow_SSE2;
+ }
+ }
+#endif
+#if defined(HAS_MERGEUVROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ MergeUVRow = MergeUVRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ MergeUVRow = MergeUVRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_MERGEUVROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ MergeUVRow = MergeUVRow_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ MergeUVRow = MergeUVRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_MERGEUVROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ MergeUVRow = MergeUVRow_Any_MSA;
+ if (IS_ALIGNED(width, 16)) {
+ MergeUVRow = MergeUVRow_MSA;
+ }
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ // Merge a row of U and V into a row of UV.
+ MergeUVRow(src_u, src_v, dst_uv, width);
+ src_u += src_stride_u;
+ src_v += src_stride_v;
+ dst_uv += dst_stride_uv;
+ }
+}
+
+// Support function for NV12 etc RGB channels.
+// Width and height are plane sizes (typically half pixel width).
+LIBYUV_API
+void SplitRGBPlane(const uint8_t* src_rgb,
+ int src_stride_rgb,
+ uint8_t* dst_r,
+ int dst_stride_r,
+ uint8_t* dst_g,
+ int dst_stride_g,
+ uint8_t* dst_b,
+ int dst_stride_b,
+ int width,
+ int height) {
+ int y;
+ void (*SplitRGBRow)(const uint8_t* src_rgb, uint8_t* dst_r, uint8_t* dst_g,
+ uint8_t* dst_b, int width) = SplitRGBRow_C;
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ dst_r = dst_r + (height - 1) * dst_stride_r;
+ dst_g = dst_g + (height - 1) * dst_stride_g;
+ dst_b = dst_b + (height - 1) * dst_stride_b;
+ dst_stride_r = -dst_stride_r;
+ dst_stride_g = -dst_stride_g;
+ dst_stride_b = -dst_stride_b;
+ }
+ // Coalesce rows.
+ if (src_stride_rgb == width * 3 && dst_stride_r == width &&
+ dst_stride_g == width && dst_stride_b == width) {
+ width *= height;
+ height = 1;
+ src_stride_rgb = dst_stride_r = dst_stride_g = dst_stride_b = 0;
+ }
+#if defined(HAS_SPLITRGBROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ SplitRGBRow = SplitRGBRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ SplitRGBRow = SplitRGBRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_SPLITRGBROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ SplitRGBRow = SplitRGBRow_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ SplitRGBRow = SplitRGBRow_NEON;
+ }
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ // Copy a row of RGB.
+ SplitRGBRow(src_rgb, dst_r, dst_g, dst_b, width);
+ dst_r += dst_stride_r;
+ dst_g += dst_stride_g;
+ dst_b += dst_stride_b;
+ src_rgb += src_stride_rgb;
+ }
+}
+
+LIBYUV_API
+void MergeRGBPlane(const uint8_t* src_r,
+ int src_stride_r,
+ const uint8_t* src_g,
+ int src_stride_g,
+ const uint8_t* src_b,
+ int src_stride_b,
+ uint8_t* dst_rgb,
+ int dst_stride_rgb,
+ int width,
+ int height) {
+ int y;
+ void (*MergeRGBRow)(const uint8_t* src_r, const uint8_t* src_g,
+ const uint8_t* src_b, uint8_t* dst_rgb, int width) =
+ MergeRGBRow_C;
+ // Coalesce rows.
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ dst_rgb = dst_rgb + (height - 1) * dst_stride_rgb;
+ dst_stride_rgb = -dst_stride_rgb;
+ }
+ // Coalesce rows.
+ if (src_stride_r == width && src_stride_g == width && src_stride_b == width &&
+ dst_stride_rgb == width * 3) {
+ width *= height;
+ height = 1;
+ src_stride_r = src_stride_g = src_stride_b = dst_stride_rgb = 0;
+ }
+#if defined(HAS_MERGERGBROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ MergeRGBRow = MergeRGBRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ MergeRGBRow = MergeRGBRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_MERGERGBROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ MergeRGBRow = MergeRGBRow_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ MergeRGBRow = MergeRGBRow_NEON;
+ }
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ // Merge a row of U and V into a row of RGB.
+ MergeRGBRow(src_r, src_g, src_b, dst_rgb, width);
+ src_r += src_stride_r;
+ src_g += src_stride_g;
+ src_b += src_stride_b;
+ dst_rgb += dst_stride_rgb;
+ }
+}
+
+// Mirror a plane of data.
+void MirrorPlane(const uint8_t* src_y,
+ int src_stride_y,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ int width,
+ int height) {
+ int y;
+ void (*MirrorRow)(const uint8_t* src, uint8_t* dst, int width) = MirrorRow_C;
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_y = src_y + (height - 1) * src_stride_y;
+ src_stride_y = -src_stride_y;
+ }
+#if defined(HAS_MIRRORROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ MirrorRow = MirrorRow_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ MirrorRow = MirrorRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_MIRRORROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ MirrorRow = MirrorRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ MirrorRow = MirrorRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_MIRRORROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ MirrorRow = MirrorRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ MirrorRow = MirrorRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_MIRRORROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ MirrorRow = MirrorRow_Any_MSA;
+ if (IS_ALIGNED(width, 64)) {
+ MirrorRow = MirrorRow_MSA;
+ }
+ }
+#endif
+
+ // Mirror plane
+ for (y = 0; y < height; ++y) {
+ MirrorRow(src_y, dst_y, width);
+ src_y += src_stride_y;
+ dst_y += dst_stride_y;
+ }
+}
+
+// Convert YUY2 to I422.
+LIBYUV_API
+int YUY2ToI422(const uint8_t* src_yuy2,
+ int src_stride_yuy2,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height) {
+ int y;
+ void (*YUY2ToUV422Row)(const uint8_t* src_yuy2, uint8_t* dst_u,
+ uint8_t* dst_v, int width) = YUY2ToUV422Row_C;
+ void (*YUY2ToYRow)(const uint8_t* src_yuy2, uint8_t* dst_y, int width) =
+ YUY2ToYRow_C;
+ if (!src_yuy2 || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2;
+ src_stride_yuy2 = -src_stride_yuy2;
+ }
+ // Coalesce rows.
+ if (src_stride_yuy2 == width * 2 && dst_stride_y == width &&
+ dst_stride_u * 2 == width && dst_stride_v * 2 == width &&
+ width * height <= 32768) {
+ width *= height;
+ height = 1;
+ src_stride_yuy2 = dst_stride_y = dst_stride_u = dst_stride_v = 0;
+ }
+#if defined(HAS_YUY2TOYROW_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ YUY2ToUV422Row = YUY2ToUV422Row_Any_SSE2;
+ YUY2ToYRow = YUY2ToYRow_Any_SSE2;
+ if (IS_ALIGNED(width, 16)) {
+ YUY2ToUV422Row = YUY2ToUV422Row_SSE2;
+ YUY2ToYRow = YUY2ToYRow_SSE2;
+ }
+ }
+#endif
+#if defined(HAS_YUY2TOYROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ YUY2ToUV422Row = YUY2ToUV422Row_Any_AVX2;
+ YUY2ToYRow = YUY2ToYRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ YUY2ToUV422Row = YUY2ToUV422Row_AVX2;
+ YUY2ToYRow = YUY2ToYRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_YUY2TOYROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ YUY2ToYRow = YUY2ToYRow_Any_NEON;
+ YUY2ToUV422Row = YUY2ToUV422Row_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ YUY2ToYRow = YUY2ToYRow_NEON;
+ YUY2ToUV422Row = YUY2ToUV422Row_NEON;
+ }
+ }
+#endif
+#if defined(HAS_YUY2TOYROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ YUY2ToYRow = YUY2ToYRow_Any_MSA;
+ YUY2ToUV422Row = YUY2ToUV422Row_Any_MSA;
+ if (IS_ALIGNED(width, 32)) {
+ YUY2ToYRow = YUY2ToYRow_MSA;
+ YUY2ToUV422Row = YUY2ToUV422Row_MSA;
+ }
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ YUY2ToUV422Row(src_yuy2, dst_u, dst_v, width);
+ YUY2ToYRow(src_yuy2, dst_y, width);
+ src_yuy2 += src_stride_yuy2;
+ dst_y += dst_stride_y;
+ dst_u += dst_stride_u;
+ dst_v += dst_stride_v;
+ }
+ return 0;
+}
+
+// Convert UYVY to I422.
+LIBYUV_API
+int UYVYToI422(const uint8_t* src_uyvy,
+ int src_stride_uyvy,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height) {
+ int y;
+ void (*UYVYToUV422Row)(const uint8_t* src_uyvy, uint8_t* dst_u,
+ uint8_t* dst_v, int width) = UYVYToUV422Row_C;
+ void (*UYVYToYRow)(const uint8_t* src_uyvy, uint8_t* dst_y, int width) =
+ UYVYToYRow_C;
+ if (!src_uyvy || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_uyvy = src_uyvy + (height - 1) * src_stride_uyvy;
+ src_stride_uyvy = -src_stride_uyvy;
+ }
+ // Coalesce rows.
+ if (src_stride_uyvy == width * 2 && dst_stride_y == width &&
+ dst_stride_u * 2 == width && dst_stride_v * 2 == width &&
+ width * height <= 32768) {
+ width *= height;
+ height = 1;
+ src_stride_uyvy = dst_stride_y = dst_stride_u = dst_stride_v = 0;
+ }
+#if defined(HAS_UYVYTOYROW_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ UYVYToUV422Row = UYVYToUV422Row_Any_SSE2;
+ UYVYToYRow = UYVYToYRow_Any_SSE2;
+ if (IS_ALIGNED(width, 16)) {
+ UYVYToUV422Row = UYVYToUV422Row_SSE2;
+ UYVYToYRow = UYVYToYRow_SSE2;
+ }
+ }
+#endif
+#if defined(HAS_UYVYTOYROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ UYVYToUV422Row = UYVYToUV422Row_Any_AVX2;
+ UYVYToYRow = UYVYToYRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ UYVYToUV422Row = UYVYToUV422Row_AVX2;
+ UYVYToYRow = UYVYToYRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_UYVYTOYROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ UYVYToYRow = UYVYToYRow_Any_NEON;
+ UYVYToUV422Row = UYVYToUV422Row_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ UYVYToYRow = UYVYToYRow_NEON;
+ UYVYToUV422Row = UYVYToUV422Row_NEON;
+ }
+ }
+#endif
+#if defined(HAS_UYVYTOYROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ UYVYToYRow = UYVYToYRow_Any_MSA;
+ UYVYToUV422Row = UYVYToUV422Row_Any_MSA;
+ if (IS_ALIGNED(width, 32)) {
+ UYVYToYRow = UYVYToYRow_MSA;
+ UYVYToUV422Row = UYVYToUV422Row_MSA;
+ }
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ UYVYToUV422Row(src_uyvy, dst_u, dst_v, width);
+ UYVYToYRow(src_uyvy, dst_y, width);
+ src_uyvy += src_stride_uyvy;
+ dst_y += dst_stride_y;
+ dst_u += dst_stride_u;
+ dst_v += dst_stride_v;
+ }
+ return 0;
+}
+
+// Convert YUY2 to Y.
+LIBYUV_API
+int YUY2ToY(const uint8_t* src_yuy2,
+ int src_stride_yuy2,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ int width,
+ int height) {
+ int y;
+ void (*YUY2ToYRow)(const uint8_t* src_yuy2, uint8_t* dst_y, int width) =
+ YUY2ToYRow_C;
+ if (!src_yuy2 || !dst_y || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2;
+ src_stride_yuy2 = -src_stride_yuy2;
+ }
+ // Coalesce rows.
+ if (src_stride_yuy2 == width * 2 && dst_stride_y == width) {
+ width *= height;
+ height = 1;
+ src_stride_yuy2 = dst_stride_y = 0;
+ }
+#if defined(HAS_YUY2TOYROW_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ YUY2ToYRow = YUY2ToYRow_Any_SSE2;
+ if (IS_ALIGNED(width, 16)) {
+ YUY2ToYRow = YUY2ToYRow_SSE2;
+ }
+ }
+#endif
+#if defined(HAS_YUY2TOYROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ YUY2ToYRow = YUY2ToYRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ YUY2ToYRow = YUY2ToYRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_YUY2TOYROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ YUY2ToYRow = YUY2ToYRow_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ YUY2ToYRow = YUY2ToYRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_YUY2TOYROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ YUY2ToYRow = YUY2ToYRow_Any_MSA;
+ if (IS_ALIGNED(width, 32)) {
+ YUY2ToYRow = YUY2ToYRow_MSA;
+ }
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ YUY2ToYRow(src_yuy2, dst_y, width);
+ src_yuy2 += src_stride_yuy2;
+ dst_y += dst_stride_y;
+ }
+ return 0;
+}
+
+// Mirror I400 with optional flipping
+LIBYUV_API
+int I400Mirror(const uint8_t* src_y,
+ int src_stride_y,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ int width,
+ int height) {
+ if (!src_y || !dst_y || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_y = src_y + (height - 1) * src_stride_y;
+ src_stride_y = -src_stride_y;
+ }
+
+ MirrorPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
+ return 0;
+}
+
+// Mirror I420 with optional flipping
+LIBYUV_API
+int I420Mirror(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height) {
+ int halfwidth = (width + 1) >> 1;
+ int halfheight = (height + 1) >> 1;
+ if (!src_y || !src_u || !src_v || !dst_y || !dst_u || !dst_v || width <= 0 ||
+ height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ halfheight = (height + 1) >> 1;
+ src_y = src_y + (height - 1) * src_stride_y;
+ src_u = src_u + (halfheight - 1) * src_stride_u;
+ src_v = src_v + (halfheight - 1) * src_stride_v;
+ src_stride_y = -src_stride_y;
+ src_stride_u = -src_stride_u;
+ src_stride_v = -src_stride_v;
+ }
+
+ if (dst_y) {
+ MirrorPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
+ }
+ MirrorPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, halfheight);
+ MirrorPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, halfheight);
+ return 0;
+}
+
+// ARGB mirror.
+LIBYUV_API
+int ARGBMirror(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height) {
+ int y;
+ void (*ARGBMirrorRow)(const uint8_t* src, uint8_t* dst, int width) =
+ ARGBMirrorRow_C;
+ if (!src_argb || !dst_argb || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_argb = src_argb + (height - 1) * src_stride_argb;
+ src_stride_argb = -src_stride_argb;
+ }
+#if defined(HAS_ARGBMIRRORROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ARGBMirrorRow = ARGBMirrorRow_Any_NEON;
+ if (IS_ALIGNED(width, 4)) {
+ ARGBMirrorRow = ARGBMirrorRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_ARGBMIRRORROW_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ ARGBMirrorRow = ARGBMirrorRow_Any_SSE2;
+ if (IS_ALIGNED(width, 4)) {
+ ARGBMirrorRow = ARGBMirrorRow_SSE2;
+ }
+ }
+#endif
+#if defined(HAS_ARGBMIRRORROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBMirrorRow = ARGBMirrorRow_Any_AVX2;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBMirrorRow = ARGBMirrorRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_ARGBMIRRORROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ ARGBMirrorRow = ARGBMirrorRow_Any_MSA;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBMirrorRow = ARGBMirrorRow_MSA;
+ }
+ }
+#endif
+
+ // Mirror plane
+ for (y = 0; y < height; ++y) {
+ ARGBMirrorRow(src_argb, dst_argb, width);
+ src_argb += src_stride_argb;
+ dst_argb += dst_stride_argb;
+ }
+ return 0;
+}
+
+// Get a blender that optimized for the CPU and pixel count.
+// As there are 6 blenders to choose from, the caller should try to use
+// the same blend function for all pixels if possible.
+LIBYUV_API
+ARGBBlendRow GetARGBBlend() {
+ void (*ARGBBlendRow)(const uint8_t* src_argb, const uint8_t* src_argb1,
+ uint8_t* dst_argb, int width) = ARGBBlendRow_C;
+#if defined(HAS_ARGBBLENDROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ ARGBBlendRow = ARGBBlendRow_SSSE3;
+ return ARGBBlendRow;
+ }
+#endif
+#if defined(HAS_ARGBBLENDROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ARGBBlendRow = ARGBBlendRow_NEON;
+ }
+#endif
+#if defined(HAS_ARGBBLENDROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ ARGBBlendRow = ARGBBlendRow_MSA;
+ }
+#endif
+ return ARGBBlendRow;
+}
+
+// Alpha Blend 2 ARGB images and store to destination.
+LIBYUV_API
+int ARGBBlend(const uint8_t* src_argb0,
+ int src_stride_argb0,
+ const uint8_t* src_argb1,
+ int src_stride_argb1,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height) {
+ int y;
+ void (*ARGBBlendRow)(const uint8_t* src_argb, const uint8_t* src_argb1,
+ uint8_t* dst_argb, int width) = GetARGBBlend();
+ if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ dst_argb = dst_argb + (height - 1) * dst_stride_argb;
+ dst_stride_argb = -dst_stride_argb;
+ }
+ // Coalesce rows.
+ if (src_stride_argb0 == width * 4 && src_stride_argb1 == width * 4 &&
+ dst_stride_argb == width * 4) {
+ width *= height;
+ height = 1;
+ src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
+ }
+
+ for (y = 0; y < height; ++y) {
+ ARGBBlendRow(src_argb0, src_argb1, dst_argb, width);
+ src_argb0 += src_stride_argb0;
+ src_argb1 += src_stride_argb1;
+ dst_argb += dst_stride_argb;
+ }
+ return 0;
+}
+
+// Alpha Blend plane and store to destination.
+LIBYUV_API
+int BlendPlane(const uint8_t* src_y0,
+ int src_stride_y0,
+ const uint8_t* src_y1,
+ int src_stride_y1,
+ const uint8_t* alpha,
+ int alpha_stride,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ int width,
+ int height) {
+ int y;
+ void (*BlendPlaneRow)(const uint8_t* src0, const uint8_t* src1,
+ const uint8_t* alpha, uint8_t* dst, int width) =
+ BlendPlaneRow_C;
+ if (!src_y0 || !src_y1 || !alpha || !dst_y || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ dst_y = dst_y + (height - 1) * dst_stride_y;
+ dst_stride_y = -dst_stride_y;
+ }
+
+ // Coalesce rows for Y plane.
+ if (src_stride_y0 == width && src_stride_y1 == width &&
+ alpha_stride == width && dst_stride_y == width) {
+ width *= height;
+ height = 1;
+ src_stride_y0 = src_stride_y1 = alpha_stride = dst_stride_y = 0;
+ }
+
+#if defined(HAS_BLENDPLANEROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ BlendPlaneRow = BlendPlaneRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 8)) {
+ BlendPlaneRow = BlendPlaneRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_BLENDPLANEROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ BlendPlaneRow = BlendPlaneRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ BlendPlaneRow = BlendPlaneRow_AVX2;
+ }
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ BlendPlaneRow(src_y0, src_y1, alpha, dst_y, width);
+ src_y0 += src_stride_y0;
+ src_y1 += src_stride_y1;
+ alpha += alpha_stride;
+ dst_y += dst_stride_y;
+ }
+ return 0;
+}
+
+#define MAXTWIDTH 2048
+// Alpha Blend YUV images and store to destination.
+LIBYUV_API
+int I420Blend(const uint8_t* src_y0,
+ int src_stride_y0,
+ const uint8_t* src_u0,
+ int src_stride_u0,
+ const uint8_t* src_v0,
+ int src_stride_v0,
+ const uint8_t* src_y1,
+ int src_stride_y1,
+ const uint8_t* src_u1,
+ int src_stride_u1,
+ const uint8_t* src_v1,
+ int src_stride_v1,
+ const uint8_t* alpha,
+ int alpha_stride,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height) {
+ int y;
+ // Half width/height for UV.
+ int halfwidth = (width + 1) >> 1;
+ void (*BlendPlaneRow)(const uint8_t* src0, const uint8_t* src1,
+ const uint8_t* alpha, uint8_t* dst, int width) =
+ BlendPlaneRow_C;
+ void (*ScaleRowDown2)(const uint8_t* src_ptr, ptrdiff_t src_stride,
+ uint8_t* dst_ptr, int dst_width) = ScaleRowDown2Box_C;
+ if (!src_y0 || !src_u0 || !src_v0 || !src_y1 || !src_u1 || !src_v1 ||
+ !alpha || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
+ return -1;
+ }
+
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ dst_y = dst_y + (height - 1) * dst_stride_y;
+ dst_stride_y = -dst_stride_y;
+ }
+
+ // Blend Y plane.
+ BlendPlane(src_y0, src_stride_y0, src_y1, src_stride_y1, alpha, alpha_stride,
+ dst_y, dst_stride_y, width, height);
+
+#if defined(HAS_BLENDPLANEROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ BlendPlaneRow = BlendPlaneRow_Any_SSSE3;
+ if (IS_ALIGNED(halfwidth, 8)) {
+ BlendPlaneRow = BlendPlaneRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_BLENDPLANEROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ BlendPlaneRow = BlendPlaneRow_Any_AVX2;
+ if (IS_ALIGNED(halfwidth, 32)) {
+ BlendPlaneRow = BlendPlaneRow_AVX2;
+ }
+ }
+#endif
+ if (!IS_ALIGNED(width, 2)) {
+ ScaleRowDown2 = ScaleRowDown2Box_Odd_C;
+ }
+#if defined(HAS_SCALEROWDOWN2_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ScaleRowDown2 = ScaleRowDown2Box_Odd_NEON;
+ if (IS_ALIGNED(width, 2)) {
+ ScaleRowDown2 = ScaleRowDown2Box_Any_NEON;
+ if (IS_ALIGNED(halfwidth, 16)) {
+ ScaleRowDown2 = ScaleRowDown2Box_NEON;
+ }
+ }
+ }
+#endif
+#if defined(HAS_SCALEROWDOWN2_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ ScaleRowDown2 = ScaleRowDown2Box_Odd_SSSE3;
+ if (IS_ALIGNED(width, 2)) {
+ ScaleRowDown2 = ScaleRowDown2Box_Any_SSSE3;
+ if (IS_ALIGNED(halfwidth, 16)) {
+ ScaleRowDown2 = ScaleRowDown2Box_SSSE3;
+ }
+ }
+ }
+#endif
+#if defined(HAS_SCALEROWDOWN2_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ScaleRowDown2 = ScaleRowDown2Box_Odd_AVX2;
+ if (IS_ALIGNED(width, 2)) {
+ ScaleRowDown2 = ScaleRowDown2Box_Any_AVX2;
+ if (IS_ALIGNED(halfwidth, 32)) {
+ ScaleRowDown2 = ScaleRowDown2Box_AVX2;
+ }
+ }
+ }
+#endif
+
+ // Row buffer for intermediate alpha pixels.
+ align_buffer_64(halfalpha, halfwidth);
+ for (y = 0; y < height; y += 2) {
+ // last row of odd height image use 1 row of alpha instead of 2.
+ if (y == (height - 1)) {
+ alpha_stride = 0;
+ }
+ // Subsample 2 rows of UV to half width and half height.
+ ScaleRowDown2(alpha, alpha_stride, halfalpha, halfwidth);
+ alpha += alpha_stride * 2;
+ BlendPlaneRow(src_u0, src_u1, halfalpha, dst_u, halfwidth);
+ BlendPlaneRow(src_v0, src_v1, halfalpha, dst_v, halfwidth);
+ src_u0 += src_stride_u0;
+ src_u1 += src_stride_u1;
+ dst_u += dst_stride_u;
+ src_v0 += src_stride_v0;
+ src_v1 += src_stride_v1;
+ dst_v += dst_stride_v;
+ }
+ free_aligned_buffer_64(halfalpha);
+ return 0;
+}
+
+// Multiply 2 ARGB images and store to destination.
+LIBYUV_API
+int ARGBMultiply(const uint8_t* src_argb0,
+ int src_stride_argb0,
+ const uint8_t* src_argb1,
+ int src_stride_argb1,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height) {
+ int y;
+ void (*ARGBMultiplyRow)(const uint8_t* src0, const uint8_t* src1,
+ uint8_t* dst, int width) = ARGBMultiplyRow_C;
+ if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ dst_argb = dst_argb + (height - 1) * dst_stride_argb;
+ dst_stride_argb = -dst_stride_argb;
+ }
+ // Coalesce rows.
+ if (src_stride_argb0 == width * 4 && src_stride_argb1 == width * 4 &&
+ dst_stride_argb == width * 4) {
+ width *= height;
+ height = 1;
+ src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
+ }
+#if defined(HAS_ARGBMULTIPLYROW_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ ARGBMultiplyRow = ARGBMultiplyRow_Any_SSE2;
+ if (IS_ALIGNED(width, 4)) {
+ ARGBMultiplyRow = ARGBMultiplyRow_SSE2;
+ }
+ }
+#endif
+#if defined(HAS_ARGBMULTIPLYROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBMultiplyRow = ARGBMultiplyRow_Any_AVX2;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBMultiplyRow = ARGBMultiplyRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_ARGBMULTIPLYROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ARGBMultiplyRow = ARGBMultiplyRow_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBMultiplyRow = ARGBMultiplyRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_ARGBMULTIPLYROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ ARGBMultiplyRow = ARGBMultiplyRow_Any_MSA;
+ if (IS_ALIGNED(width, 4)) {
+ ARGBMultiplyRow = ARGBMultiplyRow_MSA;
+ }
+ }
+#endif
+
+ // Multiply plane
+ for (y = 0; y < height; ++y) {
+ ARGBMultiplyRow(src_argb0, src_argb1, dst_argb, width);
+ src_argb0 += src_stride_argb0;
+ src_argb1 += src_stride_argb1;
+ dst_argb += dst_stride_argb;
+ }
+ return 0;
+}
+
+// Add 2 ARGB images and store to destination.
+LIBYUV_API
+int ARGBAdd(const uint8_t* src_argb0,
+ int src_stride_argb0,
+ const uint8_t* src_argb1,
+ int src_stride_argb1,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height) {
+ int y;
+ void (*ARGBAddRow)(const uint8_t* src0, const uint8_t* src1, uint8_t* dst,
+ int width) = ARGBAddRow_C;
+ if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ dst_argb = dst_argb + (height - 1) * dst_stride_argb;
+ dst_stride_argb = -dst_stride_argb;
+ }
+ // Coalesce rows.
+ if (src_stride_argb0 == width * 4 && src_stride_argb1 == width * 4 &&
+ dst_stride_argb == width * 4) {
+ width *= height;
+ height = 1;
+ src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
+ }
+#if defined(HAS_ARGBADDROW_SSE2) && (defined(_MSC_VER) && !defined(__clang__))
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ ARGBAddRow = ARGBAddRow_SSE2;
+ }
+#endif
+#if defined(HAS_ARGBADDROW_SSE2) && !(defined(_MSC_VER) && !defined(__clang__))
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ ARGBAddRow = ARGBAddRow_Any_SSE2;
+ if (IS_ALIGNED(width, 4)) {
+ ARGBAddRow = ARGBAddRow_SSE2;
+ }
+ }
+#endif
+#if defined(HAS_ARGBADDROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBAddRow = ARGBAddRow_Any_AVX2;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBAddRow = ARGBAddRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_ARGBADDROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ARGBAddRow = ARGBAddRow_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBAddRow = ARGBAddRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_ARGBADDROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ ARGBAddRow = ARGBAddRow_Any_MSA;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBAddRow = ARGBAddRow_MSA;
+ }
+ }
+#endif
+
+ // Add plane
+ for (y = 0; y < height; ++y) {
+ ARGBAddRow(src_argb0, src_argb1, dst_argb, width);
+ src_argb0 += src_stride_argb0;
+ src_argb1 += src_stride_argb1;
+ dst_argb += dst_stride_argb;
+ }
+ return 0;
+}
+
+// Subtract 2 ARGB images and store to destination.
+LIBYUV_API
+int ARGBSubtract(const uint8_t* src_argb0,
+ int src_stride_argb0,
+ const uint8_t* src_argb1,
+ int src_stride_argb1,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height) {
+ int y;
+ void (*ARGBSubtractRow)(const uint8_t* src0, const uint8_t* src1,
+ uint8_t* dst, int width) = ARGBSubtractRow_C;
+ if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ dst_argb = dst_argb + (height - 1) * dst_stride_argb;
+ dst_stride_argb = -dst_stride_argb;
+ }
+ // Coalesce rows.
+ if (src_stride_argb0 == width * 4 && src_stride_argb1 == width * 4 &&
+ dst_stride_argb == width * 4) {
+ width *= height;
+ height = 1;
+ src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
+ }
+#if defined(HAS_ARGBSUBTRACTROW_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ ARGBSubtractRow = ARGBSubtractRow_Any_SSE2;
+ if (IS_ALIGNED(width, 4)) {
+ ARGBSubtractRow = ARGBSubtractRow_SSE2;
+ }
+ }
+#endif
+#if defined(HAS_ARGBSUBTRACTROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBSubtractRow = ARGBSubtractRow_Any_AVX2;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBSubtractRow = ARGBSubtractRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_ARGBSUBTRACTROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ARGBSubtractRow = ARGBSubtractRow_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBSubtractRow = ARGBSubtractRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_ARGBSUBTRACTROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ ARGBSubtractRow = ARGBSubtractRow_Any_MSA;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBSubtractRow = ARGBSubtractRow_MSA;
+ }
+ }
+#endif
+
+ // Subtract plane
+ for (y = 0; y < height; ++y) {
+ ARGBSubtractRow(src_argb0, src_argb1, dst_argb, width);
+ src_argb0 += src_stride_argb0;
+ src_argb1 += src_stride_argb1;
+ dst_argb += dst_stride_argb;
+ }
+ return 0;
+}
+// Convert I422 to RGBA with matrix
+static int I422ToRGBAMatrix(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_rgba,
+ int dst_stride_rgba,
+ const struct YuvConstants* yuvconstants,
+ int width,
+ int height) {
+ int y;
+ void (*I422ToRGBARow)(const uint8_t* y_buf, const uint8_t* u_buf,
+ const uint8_t* v_buf, uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants, int width) =
+ I422ToRGBARow_C;
+ if (!src_y || !src_u || !src_v || !dst_rgba || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ dst_rgba = dst_rgba + (height - 1) * dst_stride_rgba;
+ dst_stride_rgba = -dst_stride_rgba;
+ }
+#if defined(HAS_I422TORGBAROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ I422ToRGBARow = I422ToRGBARow_Any_SSSE3;
+ if (IS_ALIGNED(width, 8)) {
+ I422ToRGBARow = I422ToRGBARow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_I422TORGBAROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ I422ToRGBARow = I422ToRGBARow_Any_AVX2;
+ if (IS_ALIGNED(width, 16)) {
+ I422ToRGBARow = I422ToRGBARow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_I422TORGBAROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ I422ToRGBARow = I422ToRGBARow_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ I422ToRGBARow = I422ToRGBARow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_I422TORGBAROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ I422ToRGBARow = I422ToRGBARow_Any_MSA;
+ if (IS_ALIGNED(width, 8)) {
+ I422ToRGBARow = I422ToRGBARow_MSA;
+ }
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ I422ToRGBARow(src_y, src_u, src_v, dst_rgba, yuvconstants, width);
+ dst_rgba += dst_stride_rgba;
+ src_y += src_stride_y;
+ src_u += src_stride_u;
+ src_v += src_stride_v;
+ }
+ return 0;
+}
+
+// Convert I422 to RGBA.
+LIBYUV_API
+int I422ToRGBA(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_rgba,
+ int dst_stride_rgba,
+ int width,
+ int height) {
+ return I422ToRGBAMatrix(src_y, src_stride_y, src_u, src_stride_u, src_v,
+ src_stride_v, dst_rgba, dst_stride_rgba,
+ &kYuvI601Constants, width, height);
+}
+
+// Convert I422 to BGRA.
+LIBYUV_API
+int I422ToBGRA(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_bgra,
+ int dst_stride_bgra,
+ int width,
+ int height) {
+ return I422ToRGBAMatrix(src_y, src_stride_y, src_v,
+ src_stride_v, // Swap U and V
+ src_u, src_stride_u, dst_bgra, dst_stride_bgra,
+ &kYvuI601Constants, // Use Yvu matrix
+ width, height);
+}
+
+// Convert NV12 to RGB565.
+LIBYUV_API
+int NV12ToRGB565(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_uv,
+ int src_stride_uv,
+ uint8_t* dst_rgb565,
+ int dst_stride_rgb565,
+ int width,
+ int height) {
+ int y;
+ void (*NV12ToRGB565Row)(
+ const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants, int width) = NV12ToRGB565Row_C;
+ if (!src_y || !src_uv || !dst_rgb565 || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ dst_rgb565 = dst_rgb565 + (height - 1) * dst_stride_rgb565;
+ dst_stride_rgb565 = -dst_stride_rgb565;
+ }
+#if defined(HAS_NV12TORGB565ROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ NV12ToRGB565Row = NV12ToRGB565Row_Any_SSSE3;
+ if (IS_ALIGNED(width, 8)) {
+ NV12ToRGB565Row = NV12ToRGB565Row_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_NV12TORGB565ROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ NV12ToRGB565Row = NV12ToRGB565Row_Any_AVX2;
+ if (IS_ALIGNED(width, 16)) {
+ NV12ToRGB565Row = NV12ToRGB565Row_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_NV12TORGB565ROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ NV12ToRGB565Row = NV12ToRGB565Row_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ NV12ToRGB565Row = NV12ToRGB565Row_NEON;
+ }
+ }
+#endif
+#if defined(HAS_NV12TORGB565ROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ NV12ToRGB565Row = NV12ToRGB565Row_Any_MSA;
+ if (IS_ALIGNED(width, 8)) {
+ NV12ToRGB565Row = NV12ToRGB565Row_MSA;
+ }
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ NV12ToRGB565Row(src_y, src_uv, dst_rgb565, &kYuvI601Constants, width);
+ dst_rgb565 += dst_stride_rgb565;
+ src_y += src_stride_y;
+ if (y & 1) {
+ src_uv += src_stride_uv;
+ }
+ }
+ return 0;
+}
+
+// Convert RAW to RGB24.
+LIBYUV_API
+int RAWToRGB24(const uint8_t* src_raw,
+ int src_stride_raw,
+ uint8_t* dst_rgb24,
+ int dst_stride_rgb24,
+ int width,
+ int height) {
+ int y;
+ void (*RAWToRGB24Row)(const uint8_t* src_rgb, uint8_t* dst_rgb24, int width) =
+ RAWToRGB24Row_C;
+ if (!src_raw || !dst_rgb24 || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_raw = src_raw + (height - 1) * src_stride_raw;
+ src_stride_raw = -src_stride_raw;
+ }
+ // Coalesce rows.
+ if (src_stride_raw == width * 3 && dst_stride_rgb24 == width * 3) {
+ width *= height;
+ height = 1;
+ src_stride_raw = dst_stride_rgb24 = 0;
+ }
+#if defined(HAS_RAWTORGB24ROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ RAWToRGB24Row = RAWToRGB24Row_Any_SSSE3;
+ if (IS_ALIGNED(width, 8)) {
+ RAWToRGB24Row = RAWToRGB24Row_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_RAWTORGB24ROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ RAWToRGB24Row = RAWToRGB24Row_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ RAWToRGB24Row = RAWToRGB24Row_NEON;
+ }
+ }
+#endif
+#if defined(HAS_RAWTORGB24ROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ RAWToRGB24Row = RAWToRGB24Row_Any_MSA;
+ if (IS_ALIGNED(width, 16)) {
+ RAWToRGB24Row = RAWToRGB24Row_MSA;
+ }
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ RAWToRGB24Row(src_raw, dst_rgb24, width);
+ src_raw += src_stride_raw;
+ dst_rgb24 += dst_stride_rgb24;
+ }
+ return 0;
+}
+
+LIBYUV_API
+void SetPlane(uint8_t* dst_y,
+ int dst_stride_y,
+ int width,
+ int height,
+ uint32_t value) {
+ int y;
+ void (*SetRow)(uint8_t * dst, uint8_t value, int width) = SetRow_C;
+ if (height < 0) {
+ height = -height;
+ dst_y = dst_y + (height - 1) * dst_stride_y;
+ dst_stride_y = -dst_stride_y;
+ }
+ // Coalesce rows.
+ if (dst_stride_y == width) {
+ width *= height;
+ height = 1;
+ dst_stride_y = 0;
+ }
+#if defined(HAS_SETROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ SetRow = SetRow_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ SetRow = SetRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_SETROW_X86)
+ if (TestCpuFlag(kCpuHasX86)) {
+ SetRow = SetRow_Any_X86;
+ if (IS_ALIGNED(width, 4)) {
+ SetRow = SetRow_X86;
+ }
+ }
+#endif
+#if defined(HAS_SETROW_ERMS)
+ if (TestCpuFlag(kCpuHasERMS)) {
+ SetRow = SetRow_ERMS;
+ }
+#endif
+#if defined(HAS_SETROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 16)) {
+ SetRow = SetRow_MSA;
+ }
+#endif
+
+ // Set plane
+ for (y = 0; y < height; ++y) {
+ SetRow(dst_y, value, width);
+ dst_y += dst_stride_y;
+ }
+}
+
+// Draw a rectangle into I420
+LIBYUV_API
+int I420Rect(uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int x,
+ int y,
+ int width,
+ int height,
+ int value_y,
+ int value_u,
+ int value_v) {
+ int halfwidth = (width + 1) >> 1;
+ int halfheight = (height + 1) >> 1;
+ uint8_t* start_y = dst_y + y * dst_stride_y + x;
+ uint8_t* start_u = dst_u + (y / 2) * dst_stride_u + (x / 2);
+ uint8_t* start_v = dst_v + (y / 2) * dst_stride_v + (x / 2);
+ if (!dst_y || !dst_u || !dst_v || width <= 0 || height == 0 || x < 0 ||
+ y < 0 || value_y < 0 || value_y > 255 || value_u < 0 || value_u > 255 ||
+ value_v < 0 || value_v > 255) {
+ return -1;
+ }
+
+ SetPlane(start_y, dst_stride_y, width, height, value_y);
+ SetPlane(start_u, dst_stride_u, halfwidth, halfheight, value_u);
+ SetPlane(start_v, dst_stride_v, halfwidth, halfheight, value_v);
+ return 0;
+}
+
+// Draw a rectangle into ARGB
+LIBYUV_API
+int ARGBRect(uint8_t* dst_argb,
+ int dst_stride_argb,
+ int dst_x,
+ int dst_y,
+ int width,
+ int height,
+ uint32_t value) {
+ int y;
+ void (*ARGBSetRow)(uint8_t * dst_argb, uint32_t value, int width) =
+ ARGBSetRow_C;
+ if (!dst_argb || width <= 0 || height == 0 || dst_x < 0 || dst_y < 0) {
+ return -1;
+ }
+ if (height < 0) {
+ height = -height;
+ dst_argb = dst_argb + (height - 1) * dst_stride_argb;
+ dst_stride_argb = -dst_stride_argb;
+ }
+ dst_argb += dst_y * dst_stride_argb + dst_x * 4;
+ // Coalesce rows.
+ if (dst_stride_argb == width * 4) {
+ width *= height;
+ height = 1;
+ dst_stride_argb = 0;
+ }
+
+#if defined(HAS_ARGBSETROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ARGBSetRow = ARGBSetRow_Any_NEON;
+ if (IS_ALIGNED(width, 4)) {
+ ARGBSetRow = ARGBSetRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_ARGBSETROW_X86)
+ if (TestCpuFlag(kCpuHasX86)) {
+ ARGBSetRow = ARGBSetRow_X86;
+ }
+#endif
+#if defined(HAS_ARGBSETROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ ARGBSetRow = ARGBSetRow_Any_MSA;
+ if (IS_ALIGNED(width, 4)) {
+ ARGBSetRow = ARGBSetRow_MSA;
+ }
+ }
+#endif
+
+ // Set plane
+ for (y = 0; y < height; ++y) {
+ ARGBSetRow(dst_argb, value, width);
+ dst_argb += dst_stride_argb;
+ }
+ return 0;
+}
+
+// Convert unattentuated ARGB to preattenuated ARGB.
+// An unattenutated ARGB alpha blend uses the formula
+// p = a * f + (1 - a) * b
+// where
+// p is output pixel
+// f is foreground pixel
+// b is background pixel
+// a is alpha value from foreground pixel
+// An preattenutated ARGB alpha blend uses the formula
+// p = f + (1 - a) * b
+// where
+// f is foreground pixel premultiplied by alpha
+
+LIBYUV_API
+int ARGBAttenuate(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height) {
+ int y;
+ void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb,
+ int width) = ARGBAttenuateRow_C;
+ if (!src_argb || !dst_argb || width <= 0 || height == 0) {
+ return -1;
+ }
+ if (height < 0) {
+ height = -height;
+ src_argb = src_argb + (height - 1) * src_stride_argb;
+ src_stride_argb = -src_stride_argb;
+ }
+ // Coalesce rows.
+ if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) {
+ width *= height;
+ height = 1;
+ src_stride_argb = dst_stride_argb = 0;
+ }
+#if defined(HAS_ARGBATTENUATEROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ ARGBAttenuateRow = ARGBAttenuateRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 4)) {
+ ARGBAttenuateRow = ARGBAttenuateRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_ARGBATTENUATEROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBAttenuateRow = ARGBAttenuateRow_Any_AVX2;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBAttenuateRow = ARGBAttenuateRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_ARGBATTENUATEROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ARGBAttenuateRow = ARGBAttenuateRow_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBAttenuateRow = ARGBAttenuateRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_ARGBATTENUATEROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ ARGBAttenuateRow = ARGBAttenuateRow_Any_MSA;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBAttenuateRow = ARGBAttenuateRow_MSA;
+ }
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ ARGBAttenuateRow(src_argb, dst_argb, width);
+ src_argb += src_stride_argb;
+ dst_argb += dst_stride_argb;
+ }
+ return 0;
+}
+
+// Convert preattentuated ARGB to unattenuated ARGB.
+LIBYUV_API
+int ARGBUnattenuate(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height) {
+ int y;
+ void (*ARGBUnattenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb,
+ int width) = ARGBUnattenuateRow_C;
+ if (!src_argb || !dst_argb || width <= 0 || height == 0) {
+ return -1;
+ }
+ if (height < 0) {
+ height = -height;
+ src_argb = src_argb + (height - 1) * src_stride_argb;
+ src_stride_argb = -src_stride_argb;
+ }
+ // Coalesce rows.
+ if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) {
+ width *= height;
+ height = 1;
+ src_stride_argb = dst_stride_argb = 0;
+ }
+#if defined(HAS_ARGBUNATTENUATEROW_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ ARGBUnattenuateRow = ARGBUnattenuateRow_Any_SSE2;
+ if (IS_ALIGNED(width, 4)) {
+ ARGBUnattenuateRow = ARGBUnattenuateRow_SSE2;
+ }
+ }
+#endif
+#if defined(HAS_ARGBUNATTENUATEROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBUnattenuateRow = ARGBUnattenuateRow_Any_AVX2;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBUnattenuateRow = ARGBUnattenuateRow_AVX2;
+ }
+ }
+#endif
+ // TODO(fbarchard): Neon version.
+
+ for (y = 0; y < height; ++y) {
+ ARGBUnattenuateRow(src_argb, dst_argb, width);
+ src_argb += src_stride_argb;
+ dst_argb += dst_stride_argb;
+ }
+ return 0;
+}
+
+// Convert ARGB to Grayed ARGB.
+LIBYUV_API
+int ARGBGrayTo(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height) {
+ int y;
+ void (*ARGBGrayRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) =
+ ARGBGrayRow_C;
+ if (!src_argb || !dst_argb || width <= 0 || height == 0) {
+ return -1;
+ }
+ if (height < 0) {
+ height = -height;
+ src_argb = src_argb + (height - 1) * src_stride_argb;
+ src_stride_argb = -src_stride_argb;
+ }
+ // Coalesce rows.
+ if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) {
+ width *= height;
+ height = 1;
+ src_stride_argb = dst_stride_argb = 0;
+ }
+#if defined(HAS_ARGBGRAYROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) {
+ ARGBGrayRow = ARGBGrayRow_SSSE3;
+ }
+#endif
+#if defined(HAS_ARGBGRAYROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
+ ARGBGrayRow = ARGBGrayRow_NEON;
+ }
+#endif
+#if defined(HAS_ARGBGRAYROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 8)) {
+ ARGBGrayRow = ARGBGrayRow_MSA;
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ ARGBGrayRow(src_argb, dst_argb, width);
+ src_argb += src_stride_argb;
+ dst_argb += dst_stride_argb;
+ }
+ return 0;
+}
+
+// Make a rectangle of ARGB gray scale.
+LIBYUV_API
+int ARGBGray(uint8_t* dst_argb,
+ int dst_stride_argb,
+ int dst_x,
+ int dst_y,
+ int width,
+ int height) {
+ int y;
+ void (*ARGBGrayRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) =
+ ARGBGrayRow_C;
+ uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
+ if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) {
+ return -1;
+ }
+ // Coalesce rows.
+ if (dst_stride_argb == width * 4) {
+ width *= height;
+ height = 1;
+ dst_stride_argb = 0;
+ }
+#if defined(HAS_ARGBGRAYROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) {
+ ARGBGrayRow = ARGBGrayRow_SSSE3;
+ }
+#endif
+#if defined(HAS_ARGBGRAYROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
+ ARGBGrayRow = ARGBGrayRow_NEON;
+ }
+#endif
+#if defined(HAS_ARGBGRAYROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 8)) {
+ ARGBGrayRow = ARGBGrayRow_MSA;
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ ARGBGrayRow(dst, dst, width);
+ dst += dst_stride_argb;
+ }
+ return 0;
+}
+
+// Make a rectangle of ARGB Sepia tone.
+LIBYUV_API
+int ARGBSepia(uint8_t* dst_argb,
+ int dst_stride_argb,
+ int dst_x,
+ int dst_y,
+ int width,
+ int height) {
+ int y;
+ void (*ARGBSepiaRow)(uint8_t * dst_argb, int width) = ARGBSepiaRow_C;
+ uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
+ if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) {
+ return -1;
+ }
+ // Coalesce rows.
+ if (dst_stride_argb == width * 4) {
+ width *= height;
+ height = 1;
+ dst_stride_argb = 0;
+ }
+#if defined(HAS_ARGBSEPIAROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) {
+ ARGBSepiaRow = ARGBSepiaRow_SSSE3;
+ }
+#endif
+#if defined(HAS_ARGBSEPIAROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
+ ARGBSepiaRow = ARGBSepiaRow_NEON;
+ }
+#endif
+#if defined(HAS_ARGBSEPIAROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 8)) {
+ ARGBSepiaRow = ARGBSepiaRow_MSA;
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ ARGBSepiaRow(dst, width);
+ dst += dst_stride_argb;
+ }
+ return 0;
+}
+
+// Apply a 4x4 matrix to each ARGB pixel.
+// Note: Normally for shading, but can be used to swizzle or invert.
+LIBYUV_API
+int ARGBColorMatrix(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ const int8_t* matrix_argb,
+ int width,
+ int height) {
+ int y;
+ void (*ARGBColorMatrixRow)(const uint8_t* src_argb, uint8_t* dst_argb,
+ const int8_t* matrix_argb, int width) =
+ ARGBColorMatrixRow_C;
+ if (!src_argb || !dst_argb || !matrix_argb || width <= 0 || height == 0) {
+ return -1;
+ }
+ if (height < 0) {
+ height = -height;
+ src_argb = src_argb + (height - 1) * src_stride_argb;
+ src_stride_argb = -src_stride_argb;
+ }
+ // Coalesce rows.
+ if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) {
+ width *= height;
+ height = 1;
+ src_stride_argb = dst_stride_argb = 0;
+ }
+#if defined(HAS_ARGBCOLORMATRIXROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) {
+ ARGBColorMatrixRow = ARGBColorMatrixRow_SSSE3;
+ }
+#endif
+#if defined(HAS_ARGBCOLORMATRIXROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
+ ARGBColorMatrixRow = ARGBColorMatrixRow_NEON;
+ }
+#endif
+#if defined(HAS_ARGBCOLORMATRIXROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 8)) {
+ ARGBColorMatrixRow = ARGBColorMatrixRow_MSA;
+ }
+#endif
+ for (y = 0; y < height; ++y) {
+ ARGBColorMatrixRow(src_argb, dst_argb, matrix_argb, width);
+ src_argb += src_stride_argb;
+ dst_argb += dst_stride_argb;
+ }
+ return 0;
+}
+
+// Apply a 4x3 matrix to each ARGB pixel.
+// Deprecated.
+LIBYUV_API
+int RGBColorMatrix(uint8_t* dst_argb,
+ int dst_stride_argb,
+ const int8_t* matrix_rgb,
+ int dst_x,
+ int dst_y,
+ int width,
+ int height) {
+ SIMD_ALIGNED(int8_t matrix_argb[16]);
+ uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
+ if (!dst_argb || !matrix_rgb || width <= 0 || height <= 0 || dst_x < 0 ||
+ dst_y < 0) {
+ return -1;
+ }
+
+ // Convert 4x3 7 bit matrix to 4x4 6 bit matrix.
+ matrix_argb[0] = matrix_rgb[0] / 2;
+ matrix_argb[1] = matrix_rgb[1] / 2;
+ matrix_argb[2] = matrix_rgb[2] / 2;
+ matrix_argb[3] = matrix_rgb[3] / 2;
+ matrix_argb[4] = matrix_rgb[4] / 2;
+ matrix_argb[5] = matrix_rgb[5] / 2;
+ matrix_argb[6] = matrix_rgb[6] / 2;
+ matrix_argb[7] = matrix_rgb[7] / 2;
+ matrix_argb[8] = matrix_rgb[8] / 2;
+ matrix_argb[9] = matrix_rgb[9] / 2;
+ matrix_argb[10] = matrix_rgb[10] / 2;
+ matrix_argb[11] = matrix_rgb[11] / 2;
+ matrix_argb[14] = matrix_argb[13] = matrix_argb[12] = 0;
+ matrix_argb[15] = 64; // 1.0
+
+ return ARGBColorMatrix((const uint8_t*)(dst), dst_stride_argb, dst,
+ dst_stride_argb, &matrix_argb[0], width, height);
+}
+
+// Apply a color table each ARGB pixel.
+// Table contains 256 ARGB values.
+LIBYUV_API
+int ARGBColorTable(uint8_t* dst_argb,
+ int dst_stride_argb,
+ const uint8_t* table_argb,
+ int dst_x,
+ int dst_y,
+ int width,
+ int height) {
+ int y;
+ void (*ARGBColorTableRow)(uint8_t * dst_argb, const uint8_t* table_argb,
+ int width) = ARGBColorTableRow_C;
+ uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
+ if (!dst_argb || !table_argb || width <= 0 || height <= 0 || dst_x < 0 ||
+ dst_y < 0) {
+ return -1;
+ }
+ // Coalesce rows.
+ if (dst_stride_argb == width * 4) {
+ width *= height;
+ height = 1;
+ dst_stride_argb = 0;
+ }
+#if defined(HAS_ARGBCOLORTABLEROW_X86)
+ if (TestCpuFlag(kCpuHasX86)) {
+ ARGBColorTableRow = ARGBColorTableRow_X86;
+ }
+#endif
+ for (y = 0; y < height; ++y) {
+ ARGBColorTableRow(dst, table_argb, width);
+ dst += dst_stride_argb;
+ }
+ return 0;
+}
+
+// Apply a color table each ARGB pixel but preserve destination alpha.
+// Table contains 256 ARGB values.
+LIBYUV_API
+int RGBColorTable(uint8_t* dst_argb,
+ int dst_stride_argb,
+ const uint8_t* table_argb,
+ int dst_x,
+ int dst_y,
+ int width,
+ int height) {
+ int y;
+ void (*RGBColorTableRow)(uint8_t * dst_argb, const uint8_t* table_argb,
+ int width) = RGBColorTableRow_C;
+ uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
+ if (!dst_argb || !table_argb || width <= 0 || height <= 0 || dst_x < 0 ||
+ dst_y < 0) {
+ return -1;
+ }
+ // Coalesce rows.
+ if (dst_stride_argb == width * 4) {
+ width *= height;
+ height = 1;
+ dst_stride_argb = 0;
+ }
+#if defined(HAS_RGBCOLORTABLEROW_X86)
+ if (TestCpuFlag(kCpuHasX86)) {
+ RGBColorTableRow = RGBColorTableRow_X86;
+ }
+#endif
+ for (y = 0; y < height; ++y) {
+ RGBColorTableRow(dst, table_argb, width);
+ dst += dst_stride_argb;
+ }
+ return 0;
+}
+
+// ARGBQuantize is used to posterize art.
+// e.g. rgb / qvalue * qvalue + qvalue / 2
+// But the low levels implement efficiently with 3 parameters, and could be
+// used for other high level operations.
+// dst_argb[0] = (b * scale >> 16) * interval_size + interval_offset;
+// where scale is 1 / interval_size as a fixed point value.
+// The divide is replaces with a multiply by reciprocal fixed point multiply.
+// Caveat - although SSE2 saturates, the C function does not and should be used
+// with care if doing anything but quantization.
+LIBYUV_API
+int ARGBQuantize(uint8_t* dst_argb,
+ int dst_stride_argb,
+ int scale,
+ int interval_size,
+ int interval_offset,
+ int dst_x,
+ int dst_y,
+ int width,
+ int height) {
+ int y;
+ void (*ARGBQuantizeRow)(uint8_t * dst_argb, int scale, int interval_size,
+ int interval_offset, int width) = ARGBQuantizeRow_C;
+ uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
+ if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0 ||
+ interval_size < 1 || interval_size > 255) {
+ return -1;
+ }
+ // Coalesce rows.
+ if (dst_stride_argb == width * 4) {
+ width *= height;
+ height = 1;
+ dst_stride_argb = 0;
+ }
+#if defined(HAS_ARGBQUANTIZEROW_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4)) {
+ ARGBQuantizeRow = ARGBQuantizeRow_SSE2;
+ }
+#endif
+#if defined(HAS_ARGBQUANTIZEROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
+ ARGBQuantizeRow = ARGBQuantizeRow_NEON;
+ }
+#endif
+#if defined(HAS_ARGBQUANTIZEROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 8)) {
+ ARGBQuantizeRow = ARGBQuantizeRow_MSA;
+ }
+#endif
+ for (y = 0; y < height; ++y) {
+ ARGBQuantizeRow(dst, scale, interval_size, interval_offset, width);
+ dst += dst_stride_argb;
+ }
+ return 0;
+}
+
+// Computes table of cumulative sum for image where the value is the sum
+// of all values above and to the left of the entry. Used by ARGBBlur.
+LIBYUV_API
+int ARGBComputeCumulativeSum(const uint8_t* src_argb,
+ int src_stride_argb,
+ int32_t* dst_cumsum,
+ int dst_stride32_cumsum,
+ int width,
+ int height) {
+ int y;
+ void (*ComputeCumulativeSumRow)(const uint8_t* row, int32_t* cumsum,
+ const int32_t* previous_cumsum, int width) =
+ ComputeCumulativeSumRow_C;
+ int32_t* previous_cumsum = dst_cumsum;
+ if (!dst_cumsum || !src_argb || width <= 0 || height <= 0) {
+ return -1;
+ }
+#if defined(HAS_CUMULATIVESUMTOAVERAGEROW_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ ComputeCumulativeSumRow = ComputeCumulativeSumRow_SSE2;
+ }
+#endif
+ memset(dst_cumsum, 0, width * sizeof(dst_cumsum[0]) * 4); // 4 int per pixel.
+ for (y = 0; y < height; ++y) {
+ ComputeCumulativeSumRow(src_argb, dst_cumsum, previous_cumsum, width);
+ previous_cumsum = dst_cumsum;
+ dst_cumsum += dst_stride32_cumsum;
+ src_argb += src_stride_argb;
+ }
+ return 0;
+}
+
+// Blur ARGB image.
+// Caller should allocate CumulativeSum table of width * height * 16 bytes
+// aligned to 16 byte boundary. height can be radius * 2 + 2 to save memory
+// as the buffer is treated as circular.
+LIBYUV_API
+int ARGBBlur(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int32_t* dst_cumsum,
+ int dst_stride32_cumsum,
+ int width,
+ int height,
+ int radius) {
+ int y;
+ void (*ComputeCumulativeSumRow)(const uint8_t* row, int32_t* cumsum,
+ const int32_t* previous_cumsum, int width) =
+ ComputeCumulativeSumRow_C;
+ void (*CumulativeSumToAverageRow)(
+ const int32_t* topleft, const int32_t* botleft, int width, int area,
+ uint8_t* dst, int count) = CumulativeSumToAverageRow_C;
+ int32_t* cumsum_bot_row;
+ int32_t* max_cumsum_bot_row;
+ int32_t* cumsum_top_row;
+
+ if (!src_argb || !dst_argb || width <= 0 || height == 0) {
+ return -1;
+ }
+ if (height < 0) {
+ height = -height;
+ src_argb = src_argb + (height - 1) * src_stride_argb;
+ src_stride_argb = -src_stride_argb;
+ }
+ if (radius > height) {
+ radius = height;
+ }
+ if (radius > (width / 2 - 1)) {
+ radius = width / 2 - 1;
+ }
+ if (radius <= 0) {
+ return -1;
+ }
+#if defined(HAS_CUMULATIVESUMTOAVERAGEROW_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ ComputeCumulativeSumRow = ComputeCumulativeSumRow_SSE2;
+ CumulativeSumToAverageRow = CumulativeSumToAverageRow_SSE2;
+ }
+#endif
+ // Compute enough CumulativeSum for first row to be blurred. After this
+ // one row of CumulativeSum is updated at a time.
+ ARGBComputeCumulativeSum(src_argb, src_stride_argb, dst_cumsum,
+ dst_stride32_cumsum, width, radius);
+
+ src_argb = src_argb + radius * src_stride_argb;
+ cumsum_bot_row = &dst_cumsum[(radius - 1) * dst_stride32_cumsum];
+
+ max_cumsum_bot_row = &dst_cumsum[(radius * 2 + 2) * dst_stride32_cumsum];
+ cumsum_top_row = &dst_cumsum[0];
+
+ for (y = 0; y < height; ++y) {
+ int top_y = ((y - radius - 1) >= 0) ? (y - radius - 1) : 0;
+ int bot_y = ((y + radius) < height) ? (y + radius) : (height - 1);
+ int area = radius * (bot_y - top_y);
+ int boxwidth = radius * 4;
+ int x;
+ int n;
+
+ // Increment cumsum_top_row pointer with circular buffer wrap around.
+ if (top_y) {
+ cumsum_top_row += dst_stride32_cumsum;
+ if (cumsum_top_row >= max_cumsum_bot_row) {
+ cumsum_top_row = dst_cumsum;
+ }
+ }
+ // Increment cumsum_bot_row pointer with circular buffer wrap around and
+ // then fill in a row of CumulativeSum.
+ if ((y + radius) < height) {
+ const int32_t* prev_cumsum_bot_row = cumsum_bot_row;
+ cumsum_bot_row += dst_stride32_cumsum;
+ if (cumsum_bot_row >= max_cumsum_bot_row) {
+ cumsum_bot_row = dst_cumsum;
+ }
+ ComputeCumulativeSumRow(src_argb, cumsum_bot_row, prev_cumsum_bot_row,
+ width);
+ src_argb += src_stride_argb;
+ }
+
+ // Left clipped.
+ for (x = 0; x < radius + 1; ++x) {
+ CumulativeSumToAverageRow(cumsum_top_row, cumsum_bot_row, boxwidth, area,
+ &dst_argb[x * 4], 1);
+ area += (bot_y - top_y);
+ boxwidth += 4;
+ }
+
+ // Middle unclipped.
+ n = (width - 1) - radius - x + 1;
+ CumulativeSumToAverageRow(cumsum_top_row, cumsum_bot_row, boxwidth, area,
+ &dst_argb[x * 4], n);
+
+ // Right clipped.
+ for (x += n; x <= width - 1; ++x) {
+ area -= (bot_y - top_y);
+ boxwidth -= 4;
+ CumulativeSumToAverageRow(cumsum_top_row + (x - radius - 1) * 4,
+ cumsum_bot_row + (x - radius - 1) * 4, boxwidth,
+ area, &dst_argb[x * 4], 1);
+ }
+ dst_argb += dst_stride_argb;
+ }
+ return 0;
+}
+
+// Multiply ARGB image by a specified ARGB value.
+LIBYUV_API
+int ARGBShade(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height,
+ uint32_t value) {
+ int y;
+ void (*ARGBShadeRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width,
+ uint32_t value) = ARGBShadeRow_C;
+ if (!src_argb || !dst_argb || width <= 0 || height == 0 || value == 0u) {
+ return -1;
+ }
+ if (height < 0) {
+ height = -height;
+ src_argb = src_argb + (height - 1) * src_stride_argb;
+ src_stride_argb = -src_stride_argb;
+ }
+ // Coalesce rows.
+ if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) {
+ width *= height;
+ height = 1;
+ src_stride_argb = dst_stride_argb = 0;
+ }
+#if defined(HAS_ARGBSHADEROW_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4)) {
+ ARGBShadeRow = ARGBShadeRow_SSE2;
+ }
+#endif
+#if defined(HAS_ARGBSHADEROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
+ ARGBShadeRow = ARGBShadeRow_NEON;
+ }
+#endif
+#if defined(HAS_ARGBSHADEROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 4)) {
+ ARGBShadeRow = ARGBShadeRow_MSA;
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ ARGBShadeRow(src_argb, dst_argb, width, value);
+ src_argb += src_stride_argb;
+ dst_argb += dst_stride_argb;
+ }
+ return 0;
+}
+
+// Interpolate 2 planes by specified amount (0 to 255).
+LIBYUV_API
+int InterpolatePlane(const uint8_t* src0,
+ int src_stride0,
+ const uint8_t* src1,
+ int src_stride1,
+ uint8_t* dst,
+ int dst_stride,
+ int width,
+ int height,
+ int interpolation) {
+ int y;
+ void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr,
+ ptrdiff_t src_stride, int dst_width,
+ int source_y_fraction) = InterpolateRow_C;
+ if (!src0 || !src1 || !dst || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ dst = dst + (height - 1) * dst_stride;
+ dst_stride = -dst_stride;
+ }
+ // Coalesce rows.
+ if (src_stride0 == width && src_stride1 == width && dst_stride == width) {
+ width *= height;
+ height = 1;
+ src_stride0 = src_stride1 = dst_stride = 0;
+ }
+#if defined(HAS_INTERPOLATEROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ InterpolateRow = InterpolateRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ InterpolateRow = InterpolateRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_INTERPOLATEROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ InterpolateRow = InterpolateRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ InterpolateRow = InterpolateRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_INTERPOLATEROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ InterpolateRow = InterpolateRow_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ InterpolateRow = InterpolateRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_INTERPOLATEROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ InterpolateRow = InterpolateRow_Any_MSA;
+ if (IS_ALIGNED(width, 32)) {
+ InterpolateRow = InterpolateRow_MSA;
+ }
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ InterpolateRow(dst, src0, src1 - src0, width, interpolation);
+ src0 += src_stride0;
+ src1 += src_stride1;
+ dst += dst_stride;
+ }
+ return 0;
+}
+
+// Interpolate 2 ARGB images by specified amount (0 to 255).
+LIBYUV_API
+int ARGBInterpolate(const uint8_t* src_argb0,
+ int src_stride_argb0,
+ const uint8_t* src_argb1,
+ int src_stride_argb1,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height,
+ int interpolation) {
+ return InterpolatePlane(src_argb0, src_stride_argb0, src_argb1,
+ src_stride_argb1, dst_argb, dst_stride_argb,
+ width * 4, height, interpolation);
+}
+
+// Interpolate 2 YUV images by specified amount (0 to 255).
+LIBYUV_API
+int I420Interpolate(const uint8_t* src0_y,
+ int src0_stride_y,
+ const uint8_t* src0_u,
+ int src0_stride_u,
+ const uint8_t* src0_v,
+ int src0_stride_v,
+ const uint8_t* src1_y,
+ int src1_stride_y,
+ const uint8_t* src1_u,
+ int src1_stride_u,
+ const uint8_t* src1_v,
+ int src1_stride_v,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height,
+ int interpolation) {
+ int halfwidth = (width + 1) >> 1;
+ int halfheight = (height + 1) >> 1;
+ if (!src0_y || !src0_u || !src0_v || !src1_y || !src1_u || !src1_v ||
+ !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
+ return -1;
+ }
+ InterpolatePlane(src0_y, src0_stride_y, src1_y, src1_stride_y, dst_y,
+ dst_stride_y, width, height, interpolation);
+ InterpolatePlane(src0_u, src0_stride_u, src1_u, src1_stride_u, dst_u,
+ dst_stride_u, halfwidth, halfheight, interpolation);
+ InterpolatePlane(src0_v, src0_stride_v, src1_v, src1_stride_v, dst_v,
+ dst_stride_v, halfwidth, halfheight, interpolation);
+ return 0;
+}
+
+// Shuffle ARGB channel order. e.g. BGRA to ARGB.
+LIBYUV_API
+int ARGBShuffle(const uint8_t* src_bgra,
+ int src_stride_bgra,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ const uint8_t* shuffler,
+ int width,
+ int height) {
+ int y;
+ void (*ARGBShuffleRow)(const uint8_t* src_bgra, uint8_t* dst_argb,
+ const uint8_t* shuffler, int width) = ARGBShuffleRow_C;
+ if (!src_bgra || !dst_argb || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_bgra = src_bgra + (height - 1) * src_stride_bgra;
+ src_stride_bgra = -src_stride_bgra;
+ }
+ // Coalesce rows.
+ if (src_stride_bgra == width * 4 && dst_stride_argb == width * 4) {
+ width *= height;
+ height = 1;
+ src_stride_bgra = dst_stride_argb = 0;
+ }
+#if defined(HAS_ARGBSHUFFLEROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ ARGBShuffleRow = ARGBShuffleRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBShuffleRow = ARGBShuffleRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_ARGBSHUFFLEROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBShuffleRow = ARGBShuffleRow_Any_AVX2;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBShuffleRow = ARGBShuffleRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_ARGBSHUFFLEROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ARGBShuffleRow = ARGBShuffleRow_Any_NEON;
+ if (IS_ALIGNED(width, 4)) {
+ ARGBShuffleRow = ARGBShuffleRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_ARGBSHUFFLEROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ ARGBShuffleRow = ARGBShuffleRow_Any_MSA;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBShuffleRow = ARGBShuffleRow_MSA;
+ }
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ ARGBShuffleRow(src_bgra, dst_argb, shuffler, width);
+ src_bgra += src_stride_bgra;
+ dst_argb += dst_stride_argb;
+ }
+ return 0;
+}
+
+// Sobel ARGB effect.
+static int ARGBSobelize(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height,
+ void (*SobelRow)(const uint8_t* src_sobelx,
+ const uint8_t* src_sobely,
+ uint8_t* dst,
+ int width)) {
+ int y;
+ void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_g, int width) =
+ ARGBToYJRow_C;
+ void (*SobelYRow)(const uint8_t* src_y0, const uint8_t* src_y1,
+ uint8_t* dst_sobely, int width) = SobelYRow_C;
+ void (*SobelXRow)(const uint8_t* src_y0, const uint8_t* src_y1,
+ const uint8_t* src_y2, uint8_t* dst_sobely, int width) =
+ SobelXRow_C;
+ const int kEdge = 16; // Extra pixels at start of row for extrude/align.
+ if (!src_argb || !dst_argb || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_argb = src_argb + (height - 1) * src_stride_argb;
+ src_stride_argb = -src_stride_argb;
+ }
+
+#if defined(HAS_ARGBTOYJROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ ARGBToYJRow = ARGBToYJRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToYJRow = ARGBToYJRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYJROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBToYJRow = ARGBToYJRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ ARGBToYJRow = ARGBToYJRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYJROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ARGBToYJRow = ARGBToYJRow_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBToYJRow = ARGBToYJRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYJROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ ARGBToYJRow = ARGBToYJRow_Any_MSA;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToYJRow = ARGBToYJRow_MSA;
+ }
+ }
+#endif
+
+#if defined(HAS_SOBELYROW_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ SobelYRow = SobelYRow_SSE2;
+ }
+#endif
+#if defined(HAS_SOBELYROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ SobelYRow = SobelYRow_NEON;
+ }
+#endif
+#if defined(HAS_SOBELYROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ SobelYRow = SobelYRow_MSA;
+ }
+#endif
+#if defined(HAS_SOBELXROW_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ SobelXRow = SobelXRow_SSE2;
+ }
+#endif
+#if defined(HAS_SOBELXROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ SobelXRow = SobelXRow_NEON;
+ }
+#endif
+#if defined(HAS_SOBELXROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ SobelXRow = SobelXRow_MSA;
+ }
+#endif
+ {
+ // 3 rows with edges before/after.
+ const int kRowSize = (width + kEdge + 31) & ~31;
+ align_buffer_64(rows, kRowSize * 2 + (kEdge + kRowSize * 3 + kEdge));
+ uint8_t* row_sobelx = rows;
+ uint8_t* row_sobely = rows + kRowSize;
+ uint8_t* row_y = rows + kRowSize * 2;
+
+ // Convert first row.
+ uint8_t* row_y0 = row_y + kEdge;
+ uint8_t* row_y1 = row_y0 + kRowSize;
+ uint8_t* row_y2 = row_y1 + kRowSize;
+ ARGBToYJRow(src_argb, row_y0, width);
+ row_y0[-1] = row_y0[0];
+ memset(row_y0 + width, row_y0[width - 1], 16); // Extrude 16 for valgrind.
+ ARGBToYJRow(src_argb, row_y1, width);
+ row_y1[-1] = row_y1[0];
+ memset(row_y1 + width, row_y1[width - 1], 16);
+ memset(row_y2 + width, 0, 16);
+
+ for (y = 0; y < height; ++y) {
+ // Convert next row of ARGB to G.
+ if (y < (height - 1)) {
+ src_argb += src_stride_argb;
+ }
+ ARGBToYJRow(src_argb, row_y2, width);
+ row_y2[-1] = row_y2[0];
+ row_y2[width] = row_y2[width - 1];
+
+ SobelXRow(row_y0 - 1, row_y1 - 1, row_y2 - 1, row_sobelx, width);
+ SobelYRow(row_y0 - 1, row_y2 - 1, row_sobely, width);
+ SobelRow(row_sobelx, row_sobely, dst_argb, width);
+
+ // Cycle thru circular queue of 3 row_y buffers.
+ {
+ uint8_t* row_yt = row_y0;
+ row_y0 = row_y1;
+ row_y1 = row_y2;
+ row_y2 = row_yt;
+ }
+
+ dst_argb += dst_stride_argb;
+ }
+ free_aligned_buffer_64(rows);
+ }
+ return 0;
+}
+
+// Sobel ARGB effect.
+LIBYUV_API
+int ARGBSobel(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height) {
+ void (*SobelRow)(const uint8_t* src_sobelx, const uint8_t* src_sobely,
+ uint8_t* dst_argb, int width) = SobelRow_C;
+#if defined(HAS_SOBELROW_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ SobelRow = SobelRow_Any_SSE2;
+ if (IS_ALIGNED(width, 16)) {
+ SobelRow = SobelRow_SSE2;
+ }
+ }
+#endif
+#if defined(HAS_SOBELROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ SobelRow = SobelRow_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ SobelRow = SobelRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_SOBELROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ SobelRow = SobelRow_Any_MSA;
+ if (IS_ALIGNED(width, 16)) {
+ SobelRow = SobelRow_MSA;
+ }
+ }
+#endif
+ return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
+ width, height, SobelRow);
+}
+
+// Sobel ARGB effect with planar output.
+LIBYUV_API
+int ARGBSobelToPlane(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ int width,
+ int height) {
+ void (*SobelToPlaneRow)(const uint8_t* src_sobelx, const uint8_t* src_sobely,
+ uint8_t* dst_, int width) = SobelToPlaneRow_C;
+#if defined(HAS_SOBELTOPLANEROW_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ SobelToPlaneRow = SobelToPlaneRow_Any_SSE2;
+ if (IS_ALIGNED(width, 16)) {
+ SobelToPlaneRow = SobelToPlaneRow_SSE2;
+ }
+ }
+#endif
+#if defined(HAS_SOBELTOPLANEROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ SobelToPlaneRow = SobelToPlaneRow_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ SobelToPlaneRow = SobelToPlaneRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_SOBELTOPLANEROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ SobelToPlaneRow = SobelToPlaneRow_Any_MSA;
+ if (IS_ALIGNED(width, 32)) {
+ SobelToPlaneRow = SobelToPlaneRow_MSA;
+ }
+ }
+#endif
+ return ARGBSobelize(src_argb, src_stride_argb, dst_y, dst_stride_y, width,
+ height, SobelToPlaneRow);
+}
+
+// SobelXY ARGB effect.
+// Similar to Sobel, but also stores Sobel X in R and Sobel Y in B. G = Sobel.
+LIBYUV_API
+int ARGBSobelXY(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height) {
+ void (*SobelXYRow)(const uint8_t* src_sobelx, const uint8_t* src_sobely,
+ uint8_t* dst_argb, int width) = SobelXYRow_C;
+#if defined(HAS_SOBELXYROW_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ SobelXYRow = SobelXYRow_Any_SSE2;
+ if (IS_ALIGNED(width, 16)) {
+ SobelXYRow = SobelXYRow_SSE2;
+ }
+ }
+#endif
+#if defined(HAS_SOBELXYROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ SobelXYRow = SobelXYRow_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ SobelXYRow = SobelXYRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_SOBELXYROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ SobelXYRow = SobelXYRow_Any_MSA;
+ if (IS_ALIGNED(width, 16)) {
+ SobelXYRow = SobelXYRow_MSA;
+ }
+ }
+#endif
+ return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
+ width, height, SobelXYRow);
+}
+
+// Apply a 4x4 polynomial to each ARGB pixel.
+LIBYUV_API
+int ARGBPolynomial(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ const float* poly,
+ int width,
+ int height) {
+ int y;
+ void (*ARGBPolynomialRow)(const uint8_t* src_argb, uint8_t* dst_argb,
+ const float* poly, int width) = ARGBPolynomialRow_C;
+ if (!src_argb || !dst_argb || !poly || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_argb = src_argb + (height - 1) * src_stride_argb;
+ src_stride_argb = -src_stride_argb;
+ }
+ // Coalesce rows.
+ if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) {
+ width *= height;
+ height = 1;
+ src_stride_argb = dst_stride_argb = 0;
+ }
+#if defined(HAS_ARGBPOLYNOMIALROW_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 2)) {
+ ARGBPolynomialRow = ARGBPolynomialRow_SSE2;
+ }
+#endif
+#if defined(HAS_ARGBPOLYNOMIALROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2) && TestCpuFlag(kCpuHasFMA3) &&
+ IS_ALIGNED(width, 2)) {
+ ARGBPolynomialRow = ARGBPolynomialRow_AVX2;
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ ARGBPolynomialRow(src_argb, dst_argb, poly, width);
+ src_argb += src_stride_argb;
+ dst_argb += dst_stride_argb;
+ }
+ return 0;
+}
+
+// Convert plane of 16 bit shorts to half floats.
+// Source values are multiplied by scale before storing as half float.
+LIBYUV_API
+int HalfFloatPlane(const uint16_t* src_y,
+ int src_stride_y,
+ uint16_t* dst_y,
+ int dst_stride_y,
+ float scale,
+ int width,
+ int height) {
+ int y;
+ void (*HalfFloatRow)(const uint16_t* src, uint16_t* dst, float scale,
+ int width) = HalfFloatRow_C;
+ if (!src_y || !dst_y || width <= 0 || height == 0) {
+ return -1;
+ }
+ src_stride_y >>= 1;
+ dst_stride_y >>= 1;
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_y = src_y + (height - 1) * src_stride_y;
+ src_stride_y = -src_stride_y;
+ }
+ // Coalesce rows.
+ if (src_stride_y == width && dst_stride_y == width) {
+ width *= height;
+ height = 1;
+ src_stride_y = dst_stride_y = 0;
+ }
+#if defined(HAS_HALFFLOATROW_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ HalfFloatRow = HalfFloatRow_Any_SSE2;
+ if (IS_ALIGNED(width, 8)) {
+ HalfFloatRow = HalfFloatRow_SSE2;
+ }
+ }
+#endif
+#if defined(HAS_HALFFLOATROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ HalfFloatRow = HalfFloatRow_Any_AVX2;
+ if (IS_ALIGNED(width, 16)) {
+ HalfFloatRow = HalfFloatRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_HALFFLOATROW_F16C)
+ if (TestCpuFlag(kCpuHasAVX2) && TestCpuFlag(kCpuHasF16C)) {
+ HalfFloatRow =
+ (scale == 1.0f) ? HalfFloat1Row_Any_F16C : HalfFloatRow_Any_F16C;
+ if (IS_ALIGNED(width, 16)) {
+ HalfFloatRow = (scale == 1.0f) ? HalfFloat1Row_F16C : HalfFloatRow_F16C;
+ }
+ }
+#endif
+#if defined(HAS_HALFFLOATROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ HalfFloatRow =
+ (scale == 1.0f) ? HalfFloat1Row_Any_NEON : HalfFloatRow_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ HalfFloatRow = (scale == 1.0f) ? HalfFloat1Row_NEON : HalfFloatRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_HALFFLOATROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ HalfFloatRow = HalfFloatRow_Any_MSA;
+ if (IS_ALIGNED(width, 32)) {
+ HalfFloatRow = HalfFloatRow_MSA;
+ }
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ HalfFloatRow(src_y, dst_y, scale, width);
+ src_y += src_stride_y;
+ dst_y += dst_stride_y;
+ }
+ return 0;
+}
+
+// Convert a buffer of bytes to floats, scale the values and store as floats.
+LIBYUV_API
+int ByteToFloat(const uint8_t* src_y, float* dst_y, float scale, int width) {
+ void (*ByteToFloatRow)(const uint8_t* src, float* dst, float scale,
+ int width) = ByteToFloatRow_C;
+ if (!src_y || !dst_y || width <= 0) {
+ return -1;
+ }
+#if defined(HAS_BYTETOFLOATROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ByteToFloatRow = ByteToFloatRow_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ ByteToFloatRow = ByteToFloatRow_NEON;
+ }
+ }
+#endif
+
+ ByteToFloatRow(src_y, dst_y, scale, width);
+ return 0;
+}
+
+// Apply a lumacolortable to each ARGB pixel.
+LIBYUV_API
+int ARGBLumaColorTable(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ const uint8_t* luma,
+ int width,
+ int height) {
+ int y;
+ void (*ARGBLumaColorTableRow)(
+ const uint8_t* src_argb, uint8_t* dst_argb, int width,
+ const uint8_t* luma, const uint32_t lumacoeff) = ARGBLumaColorTableRow_C;
+ if (!src_argb || !dst_argb || !luma || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_argb = src_argb + (height - 1) * src_stride_argb;
+ src_stride_argb = -src_stride_argb;
+ }
+ // Coalesce rows.
+ if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) {
+ width *= height;
+ height = 1;
+ src_stride_argb = dst_stride_argb = 0;
+ }
+#if defined(HAS_ARGBLUMACOLORTABLEROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4)) {
+ ARGBLumaColorTableRow = ARGBLumaColorTableRow_SSSE3;
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ ARGBLumaColorTableRow(src_argb, dst_argb, width, luma, 0x00264b0f);
+ src_argb += src_stride_argb;
+ dst_argb += dst_stride_argb;
+ }
+ return 0;
+}
+
+// Copy Alpha from one ARGB image to another.
+LIBYUV_API
+int ARGBCopyAlpha(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height) {
+ int y;
+ void (*ARGBCopyAlphaRow)(const uint8_t* src_argb, uint8_t* dst_argb,
+ int width) = ARGBCopyAlphaRow_C;
+ if (!src_argb || !dst_argb || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_argb = src_argb + (height - 1) * src_stride_argb;
+ src_stride_argb = -src_stride_argb;
+ }
+ // Coalesce rows.
+ if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) {
+ width *= height;
+ height = 1;
+ src_stride_argb = dst_stride_argb = 0;
+ }
+#if defined(HAS_ARGBCOPYALPHAROW_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ ARGBCopyAlphaRow = ARGBCopyAlphaRow_Any_SSE2;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBCopyAlphaRow = ARGBCopyAlphaRow_SSE2;
+ }
+ }
+#endif
+#if defined(HAS_ARGBCOPYALPHAROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBCopyAlphaRow = ARGBCopyAlphaRow_Any_AVX2;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBCopyAlphaRow = ARGBCopyAlphaRow_AVX2;
+ }
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ ARGBCopyAlphaRow(src_argb, dst_argb, width);
+ src_argb += src_stride_argb;
+ dst_argb += dst_stride_argb;
+ }
+ return 0;
+}
+
+// Extract just the alpha channel from ARGB.
+LIBYUV_API
+int ARGBExtractAlpha(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_a,
+ int dst_stride_a,
+ int width,
+ int height) {
+ if (!src_argb || !dst_a || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_argb += (height - 1) * src_stride_argb;
+ src_stride_argb = -src_stride_argb;
+ }
+ // Coalesce rows.
+ if (src_stride_argb == width * 4 && dst_stride_a == width) {
+ width *= height;
+ height = 1;
+ src_stride_argb = dst_stride_a = 0;
+ }
+ void (*ARGBExtractAlphaRow)(const uint8_t* src_argb, uint8_t* dst_a,
+ int width) = ARGBExtractAlphaRow_C;
+#if defined(HAS_ARGBEXTRACTALPHAROW_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ ARGBExtractAlphaRow = IS_ALIGNED(width, 8) ? ARGBExtractAlphaRow_SSE2
+ : ARGBExtractAlphaRow_Any_SSE2;
+ }
+#endif
+#if defined(HAS_ARGBEXTRACTALPHAROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBExtractAlphaRow = IS_ALIGNED(width, 32) ? ARGBExtractAlphaRow_AVX2
+ : ARGBExtractAlphaRow_Any_AVX2;
+ }
+#endif
+#if defined(HAS_ARGBEXTRACTALPHAROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ARGBExtractAlphaRow = IS_ALIGNED(width, 16) ? ARGBExtractAlphaRow_NEON
+ : ARGBExtractAlphaRow_Any_NEON;
+ }
+#endif
+#if defined(HAS_ARGBEXTRACTALPHAROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ ARGBExtractAlphaRow = IS_ALIGNED(width, 16) ? ARGBExtractAlphaRow_MSA
+ : ARGBExtractAlphaRow_Any_MSA;
+ }
+#endif
+
+ for (int y = 0; y < height; ++y) {
+ ARGBExtractAlphaRow(src_argb, dst_a, width);
+ src_argb += src_stride_argb;
+ dst_a += dst_stride_a;
+ }
+ return 0;
+}
+
+// Copy a planar Y channel to the alpha channel of a destination ARGB image.
+LIBYUV_API
+int ARGBCopyYToAlpha(const uint8_t* src_y,
+ int src_stride_y,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height) {
+ int y;
+ void (*ARGBCopyYToAlphaRow)(const uint8_t* src_y, uint8_t* dst_argb,
+ int width) = ARGBCopyYToAlphaRow_C;
+ if (!src_y || !dst_argb || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_y = src_y + (height - 1) * src_stride_y;
+ src_stride_y = -src_stride_y;
+ }
+ // Coalesce rows.
+ if (src_stride_y == width && dst_stride_argb == width * 4) {
+ width *= height;
+ height = 1;
+ src_stride_y = dst_stride_argb = 0;
+ }
+#if defined(HAS_ARGBCOPYYTOALPHAROW_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_Any_SSE2;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_SSE2;
+ }
+ }
+#endif
+#if defined(HAS_ARGBCOPYYTOALPHAROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_Any_AVX2;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_AVX2;
+ }
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ ARGBCopyYToAlphaRow(src_y, dst_argb, width);
+ src_y += src_stride_y;
+ dst_argb += dst_stride_argb;
+ }
+ return 0;
+}
+
+// TODO(fbarchard): Consider if width is even Y channel can be split
+// directly. A SplitUVRow_Odd function could copy the remaining chroma.
+
+LIBYUV_API
+int YUY2ToNV12(const uint8_t* src_yuy2,
+ int src_stride_yuy2,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_uv,
+ int dst_stride_uv,
+ int width,
+ int height) {
+ int y;
+ int halfwidth = (width + 1) >> 1;
+ void (*SplitUVRow)(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v,
+ int width) = SplitUVRow_C;
+ void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr,
+ ptrdiff_t src_stride, int dst_width,
+ int source_y_fraction) = InterpolateRow_C;
+ if (!src_yuy2 || !dst_y || !dst_uv || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2;
+ src_stride_yuy2 = -src_stride_yuy2;
+ }
+#if defined(HAS_SPLITUVROW_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ SplitUVRow = SplitUVRow_Any_SSE2;
+ if (IS_ALIGNED(width, 16)) {
+ SplitUVRow = SplitUVRow_SSE2;
+ }
+ }
+#endif
+#if defined(HAS_SPLITUVROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ SplitUVRow = SplitUVRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ SplitUVRow = SplitUVRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_SPLITUVROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ SplitUVRow = SplitUVRow_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ SplitUVRow = SplitUVRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_SPLITUVROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ SplitUVRow = SplitUVRow_Any_MSA;
+ if (IS_ALIGNED(width, 32)) {
+ SplitUVRow = SplitUVRow_MSA;
+ }
+ }
+#endif
+#if defined(HAS_INTERPOLATEROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ InterpolateRow = InterpolateRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ InterpolateRow = InterpolateRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_INTERPOLATEROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ InterpolateRow = InterpolateRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ InterpolateRow = InterpolateRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_INTERPOLATEROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ InterpolateRow = InterpolateRow_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ InterpolateRow = InterpolateRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_INTERPOLATEROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ InterpolateRow = InterpolateRow_Any_MSA;
+ if (IS_ALIGNED(width, 32)) {
+ InterpolateRow = InterpolateRow_MSA;
+ }
+ }
+#endif
+
+ {
+ int awidth = halfwidth * 2;
+ // row of y and 2 rows of uv
+ align_buffer_64(rows, awidth * 3);
+
+ for (y = 0; y < height - 1; y += 2) {
+ // Split Y from UV.
+ SplitUVRow(src_yuy2, rows, rows + awidth, awidth);
+ memcpy(dst_y, rows, width);
+ SplitUVRow(src_yuy2 + src_stride_yuy2, rows, rows + awidth * 2, awidth);
+ memcpy(dst_y + dst_stride_y, rows, width);
+ InterpolateRow(dst_uv, rows + awidth, awidth, awidth, 128);
+ src_yuy2 += src_stride_yuy2 * 2;
+ dst_y += dst_stride_y * 2;
+ dst_uv += dst_stride_uv;
+ }
+ if (height & 1) {
+ // Split Y from UV.
+ SplitUVRow(src_yuy2, rows, dst_uv, awidth);
+ memcpy(dst_y, rows, width);
+ }
+ free_aligned_buffer_64(rows);
+ }
+ return 0;
+}
+
+LIBYUV_API
+int UYVYToNV12(const uint8_t* src_uyvy,
+ int src_stride_uyvy,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_uv,
+ int dst_stride_uv,
+ int width,
+ int height) {
+ int y;
+ int halfwidth = (width + 1) >> 1;
+ void (*SplitUVRow)(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v,
+ int width) = SplitUVRow_C;
+ void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr,
+ ptrdiff_t src_stride, int dst_width,
+ int source_y_fraction) = InterpolateRow_C;
+ if (!src_uyvy || !dst_y || !dst_uv || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_uyvy = src_uyvy + (height - 1) * src_stride_uyvy;
+ src_stride_uyvy = -src_stride_uyvy;
+ }
+#if defined(HAS_SPLITUVROW_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ SplitUVRow = SplitUVRow_Any_SSE2;
+ if (IS_ALIGNED(width, 16)) {
+ SplitUVRow = SplitUVRow_SSE2;
+ }
+ }
+#endif
+#if defined(HAS_SPLITUVROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ SplitUVRow = SplitUVRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ SplitUVRow = SplitUVRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_SPLITUVROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ SplitUVRow = SplitUVRow_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ SplitUVRow = SplitUVRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_SPLITUVROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ SplitUVRow = SplitUVRow_Any_MSA;
+ if (IS_ALIGNED(width, 32)) {
+ SplitUVRow = SplitUVRow_MSA;
+ }
+ }
+#endif
+#if defined(HAS_INTERPOLATEROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ InterpolateRow = InterpolateRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ InterpolateRow = InterpolateRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_INTERPOLATEROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ InterpolateRow = InterpolateRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ InterpolateRow = InterpolateRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_INTERPOLATEROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ InterpolateRow = InterpolateRow_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ InterpolateRow = InterpolateRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_INTERPOLATEROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ InterpolateRow = InterpolateRow_Any_MSA;
+ if (IS_ALIGNED(width, 32)) {
+ InterpolateRow = InterpolateRow_MSA;
+ }
+ }
+#endif
+
+ {
+ int awidth = halfwidth * 2;
+ // row of y and 2 rows of uv
+ align_buffer_64(rows, awidth * 3);
+
+ for (y = 0; y < height - 1; y += 2) {
+ // Split Y from UV.
+ SplitUVRow(src_uyvy, rows + awidth, rows, awidth);
+ memcpy(dst_y, rows, width);
+ SplitUVRow(src_uyvy + src_stride_uyvy, rows + awidth * 2, rows, awidth);
+ memcpy(dst_y + dst_stride_y, rows, width);
+ InterpolateRow(dst_uv, rows + awidth, awidth, awidth, 128);
+ src_uyvy += src_stride_uyvy * 2;
+ dst_y += dst_stride_y * 2;
+ dst_uv += dst_stride_uv;
+ }
+ if (height & 1) {
+ // Split Y from UV.
+ SplitUVRow(src_uyvy, dst_uv, rows, awidth);
+ memcpy(dst_y, rows, width);
+ }
+ free_aligned_buffer_64(rows);
+ }
+ return 0;
+}
+
+#ifdef __cplusplus
+} // extern "C"
+} // namespace libyuv
+#endif
diff --git a/media/libvpx/libvpx/third_party/libyuv/source/rotate.cc b/media/libvpx/libvpx/third_party/libyuv/source/rotate.cc
new file mode 100644
index 0000000000..f2bed85b75
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/libyuv/source/rotate.cc
@@ -0,0 +1,514 @@
+/*
+ * Copyright 2011 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/rotate.h"
+
+#include "libyuv/convert.h"
+#include "libyuv/cpu_id.h"
+#include "libyuv/planar_functions.h"
+#include "libyuv/rotate_row.h"
+#include "libyuv/row.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+LIBYUV_API
+void TransposePlane(const uint8_t* src,
+ int src_stride,
+ uint8_t* dst,
+ int dst_stride,
+ int width,
+ int height) {
+ int i = height;
+#if defined(HAS_TRANSPOSEWX16_MSA)
+ void (*TransposeWx16)(const uint8_t* src, int src_stride, uint8_t* dst,
+ int dst_stride, int width) = TransposeWx16_C;
+#else
+ void (*TransposeWx8)(const uint8_t* src, int src_stride, uint8_t* dst,
+ int dst_stride, int width) = TransposeWx8_C;
+#endif
+#if defined(HAS_TRANSPOSEWX8_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ TransposeWx8 = TransposeWx8_NEON;
+ }
+#endif
+#if defined(HAS_TRANSPOSEWX8_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ TransposeWx8 = TransposeWx8_Any_SSSE3;
+ if (IS_ALIGNED(width, 8)) {
+ TransposeWx8 = TransposeWx8_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_TRANSPOSEWX8_FAST_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ TransposeWx8 = TransposeWx8_Fast_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ TransposeWx8 = TransposeWx8_Fast_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_TRANSPOSEWX16_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ TransposeWx16 = TransposeWx16_Any_MSA;
+ if (IS_ALIGNED(width, 16)) {
+ TransposeWx16 = TransposeWx16_MSA;
+ }
+ }
+#endif
+
+#if defined(HAS_TRANSPOSEWX16_MSA)
+ // Work across the source in 16x16 tiles
+ while (i >= 16) {
+ TransposeWx16(src, src_stride, dst, dst_stride, width);
+ src += 16 * src_stride; // Go down 16 rows.
+ dst += 16; // Move over 16 columns.
+ i -= 16;
+ }
+#else
+ // Work across the source in 8x8 tiles
+ while (i >= 8) {
+ TransposeWx8(src, src_stride, dst, dst_stride, width);
+ src += 8 * src_stride; // Go down 8 rows.
+ dst += 8; // Move over 8 columns.
+ i -= 8;
+ }
+#endif
+
+ if (i > 0) {
+ TransposeWxH_C(src, src_stride, dst, dst_stride, width, i);
+ }
+}
+
+LIBYUV_API
+void RotatePlane90(const uint8_t* src,
+ int src_stride,
+ uint8_t* dst,
+ int dst_stride,
+ int width,
+ int height) {
+ // Rotate by 90 is a transpose with the source read
+ // from bottom to top. So set the source pointer to the end
+ // of the buffer and flip the sign of the source stride.
+ src += src_stride * (height - 1);
+ src_stride = -src_stride;
+ TransposePlane(src, src_stride, dst, dst_stride, width, height);
+}
+
+LIBYUV_API
+void RotatePlane270(const uint8_t* src,
+ int src_stride,
+ uint8_t* dst,
+ int dst_stride,
+ int width,
+ int height) {
+ // Rotate by 270 is a transpose with the destination written
+ // from bottom to top. So set the destination pointer to the end
+ // of the buffer and flip the sign of the destination stride.
+ dst += dst_stride * (width - 1);
+ dst_stride = -dst_stride;
+ TransposePlane(src, src_stride, dst, dst_stride, width, height);
+}
+
+LIBYUV_API
+void RotatePlane180(const uint8_t* src,
+ int src_stride,
+ uint8_t* dst,
+ int dst_stride,
+ int width,
+ int height) {
+ // Swap first and last row and mirror the content. Uses a temporary row.
+ align_buffer_64(row, width);
+ const uint8_t* src_bot = src + src_stride * (height - 1);
+ uint8_t* dst_bot = dst + dst_stride * (height - 1);
+ int half_height = (height + 1) >> 1;
+ int y;
+ void (*MirrorRow)(const uint8_t* src, uint8_t* dst, int width) = MirrorRow_C;
+ void (*CopyRow)(const uint8_t* src, uint8_t* dst, int width) = CopyRow_C;
+#if defined(HAS_MIRRORROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ MirrorRow = MirrorRow_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ MirrorRow = MirrorRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_MIRRORROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ MirrorRow = MirrorRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ MirrorRow = MirrorRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_MIRRORROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ MirrorRow = MirrorRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ MirrorRow = MirrorRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_MIRRORROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ MirrorRow = MirrorRow_Any_MSA;
+ if (IS_ALIGNED(width, 64)) {
+ MirrorRow = MirrorRow_MSA;
+ }
+ }
+#endif
+#if defined(HAS_COPYROW_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ CopyRow = IS_ALIGNED(width, 32) ? CopyRow_SSE2 : CopyRow_Any_SSE2;
+ }
+#endif
+#if defined(HAS_COPYROW_AVX)
+ if (TestCpuFlag(kCpuHasAVX)) {
+ CopyRow = IS_ALIGNED(width, 64) ? CopyRow_AVX : CopyRow_Any_AVX;
+ }
+#endif
+#if defined(HAS_COPYROW_ERMS)
+ if (TestCpuFlag(kCpuHasERMS)) {
+ CopyRow = CopyRow_ERMS;
+ }
+#endif
+#if defined(HAS_COPYROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ CopyRow = IS_ALIGNED(width, 32) ? CopyRow_NEON : CopyRow_Any_NEON;
+ }
+#endif
+
+ // Odd height will harmlessly mirror the middle row twice.
+ for (y = 0; y < half_height; ++y) {
+ MirrorRow(src, row, width); // Mirror first row into a buffer
+ src += src_stride;
+ MirrorRow(src_bot, dst, width); // Mirror last row into first row
+ dst += dst_stride;
+ CopyRow(row, dst_bot, width); // Copy first mirrored row into last
+ src_bot -= src_stride;
+ dst_bot -= dst_stride;
+ }
+ free_aligned_buffer_64(row);
+}
+
+LIBYUV_API
+void TransposeUV(const uint8_t* src,
+ int src_stride,
+ uint8_t* dst_a,
+ int dst_stride_a,
+ uint8_t* dst_b,
+ int dst_stride_b,
+ int width,
+ int height) {
+ int i = height;
+#if defined(HAS_TRANSPOSEUVWX16_MSA)
+ void (*TransposeUVWx16)(const uint8_t* src, int src_stride, uint8_t* dst_a,
+ int dst_stride_a, uint8_t* dst_b, int dst_stride_b,
+ int width) = TransposeUVWx16_C;
+#else
+ void (*TransposeUVWx8)(const uint8_t* src, int src_stride, uint8_t* dst_a,
+ int dst_stride_a, uint8_t* dst_b, int dst_stride_b,
+ int width) = TransposeUVWx8_C;
+#endif
+#if defined(HAS_TRANSPOSEUVWX8_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ TransposeUVWx8 = TransposeUVWx8_NEON;
+ }
+#endif
+#if defined(HAS_TRANSPOSEUVWX8_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ TransposeUVWx8 = TransposeUVWx8_Any_SSE2;
+ if (IS_ALIGNED(width, 8)) {
+ TransposeUVWx8 = TransposeUVWx8_SSE2;
+ }
+ }
+#endif
+#if defined(HAS_TRANSPOSEUVWX16_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ TransposeUVWx16 = TransposeUVWx16_Any_MSA;
+ if (IS_ALIGNED(width, 8)) {
+ TransposeUVWx16 = TransposeUVWx16_MSA;
+ }
+ }
+#endif
+
+#if defined(HAS_TRANSPOSEUVWX16_MSA)
+ // Work through the source in 8x8 tiles.
+ while (i >= 16) {
+ TransposeUVWx16(src, src_stride, dst_a, dst_stride_a, dst_b, dst_stride_b,
+ width);
+ src += 16 * src_stride; // Go down 16 rows.
+ dst_a += 16; // Move over 8 columns.
+ dst_b += 16; // Move over 8 columns.
+ i -= 16;
+ }
+#else
+ // Work through the source in 8x8 tiles.
+ while (i >= 8) {
+ TransposeUVWx8(src, src_stride, dst_a, dst_stride_a, dst_b, dst_stride_b,
+ width);
+ src += 8 * src_stride; // Go down 8 rows.
+ dst_a += 8; // Move over 8 columns.
+ dst_b += 8; // Move over 8 columns.
+ i -= 8;
+ }
+#endif
+
+ if (i > 0) {
+ TransposeUVWxH_C(src, src_stride, dst_a, dst_stride_a, dst_b, dst_stride_b,
+ width, i);
+ }
+}
+
+LIBYUV_API
+void RotateUV90(const uint8_t* src,
+ int src_stride,
+ uint8_t* dst_a,
+ int dst_stride_a,
+ uint8_t* dst_b,
+ int dst_stride_b,
+ int width,
+ int height) {
+ src += src_stride * (height - 1);
+ src_stride = -src_stride;
+
+ TransposeUV(src, src_stride, dst_a, dst_stride_a, dst_b, dst_stride_b, width,
+ height);
+}
+
+LIBYUV_API
+void RotateUV270(const uint8_t* src,
+ int src_stride,
+ uint8_t* dst_a,
+ int dst_stride_a,
+ uint8_t* dst_b,
+ int dst_stride_b,
+ int width,
+ int height) {
+ dst_a += dst_stride_a * (width - 1);
+ dst_b += dst_stride_b * (width - 1);
+ dst_stride_a = -dst_stride_a;
+ dst_stride_b = -dst_stride_b;
+
+ TransposeUV(src, src_stride, dst_a, dst_stride_a, dst_b, dst_stride_b, width,
+ height);
+}
+
+// Rotate 180 is a horizontal and vertical flip.
+LIBYUV_API
+void RotateUV180(const uint8_t* src,
+ int src_stride,
+ uint8_t* dst_a,
+ int dst_stride_a,
+ uint8_t* dst_b,
+ int dst_stride_b,
+ int width,
+ int height) {
+ int i;
+ void (*MirrorUVRow)(const uint8_t* src, uint8_t* dst_u, uint8_t* dst_v,
+ int width) = MirrorUVRow_C;
+#if defined(HAS_MIRRORUVROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
+ MirrorUVRow = MirrorUVRow_NEON;
+ }
+#endif
+#if defined(HAS_MIRRORUVROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16)) {
+ MirrorUVRow = MirrorUVRow_SSSE3;
+ }
+#endif
+#if defined(HAS_MIRRORUVROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 32)) {
+ MirrorUVRow = MirrorUVRow_MSA;
+ }
+#endif
+
+ dst_a += dst_stride_a * (height - 1);
+ dst_b += dst_stride_b * (height - 1);
+
+ for (i = 0; i < height; ++i) {
+ MirrorUVRow(src, dst_a, dst_b, width);
+ src += src_stride;
+ dst_a -= dst_stride_a;
+ dst_b -= dst_stride_b;
+ }
+}
+
+LIBYUV_API
+int RotatePlane(const uint8_t* src,
+ int src_stride,
+ uint8_t* dst,
+ int dst_stride,
+ int width,
+ int height,
+ enum RotationMode mode) {
+ if (!src || width <= 0 || height == 0 || !dst) {
+ return -1;
+ }
+
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src = src + (height - 1) * src_stride;
+ src_stride = -src_stride;
+ }
+
+ switch (mode) {
+ case kRotate0:
+ // copy frame
+ CopyPlane(src, src_stride, dst, dst_stride, width, height);
+ return 0;
+ case kRotate90:
+ RotatePlane90(src, src_stride, dst, dst_stride, width, height);
+ return 0;
+ case kRotate270:
+ RotatePlane270(src, src_stride, dst, dst_stride, width, height);
+ return 0;
+ case kRotate180:
+ RotatePlane180(src, src_stride, dst, dst_stride, width, height);
+ return 0;
+ default:
+ break;
+ }
+ return -1;
+}
+
+LIBYUV_API
+int I420Rotate(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height,
+ enum RotationMode mode) {
+ int halfwidth = (width + 1) >> 1;
+ int halfheight = (height + 1) >> 1;
+ if (!src_y || !src_u || !src_v || width <= 0 || height == 0 || !dst_y ||
+ !dst_u || !dst_v) {
+ return -1;
+ }
+
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ halfheight = (height + 1) >> 1;
+ src_y = src_y + (height - 1) * src_stride_y;
+ src_u = src_u + (halfheight - 1) * src_stride_u;
+ src_v = src_v + (halfheight - 1) * src_stride_v;
+ src_stride_y = -src_stride_y;
+ src_stride_u = -src_stride_u;
+ src_stride_v = -src_stride_v;
+ }
+
+ switch (mode) {
+ case kRotate0:
+ // copy frame
+ return I420Copy(src_y, src_stride_y, src_u, src_stride_u, src_v,
+ src_stride_v, dst_y, dst_stride_y, dst_u, dst_stride_u,
+ dst_v, dst_stride_v, width, height);
+ case kRotate90:
+ RotatePlane90(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
+ RotatePlane90(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth,
+ halfheight);
+ RotatePlane90(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth,
+ halfheight);
+ return 0;
+ case kRotate270:
+ RotatePlane270(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
+ RotatePlane270(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth,
+ halfheight);
+ RotatePlane270(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth,
+ halfheight);
+ return 0;
+ case kRotate180:
+ RotatePlane180(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
+ RotatePlane180(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth,
+ halfheight);
+ RotatePlane180(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth,
+ halfheight);
+ return 0;
+ default:
+ break;
+ }
+ return -1;
+}
+
+LIBYUV_API
+int NV12ToI420Rotate(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_uv,
+ int src_stride_uv,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height,
+ enum RotationMode mode) {
+ int halfwidth = (width + 1) >> 1;
+ int halfheight = (height + 1) >> 1;
+ if (!src_y || !src_uv || width <= 0 || height == 0 || !dst_y || !dst_u ||
+ !dst_v) {
+ return -1;
+ }
+
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ halfheight = (height + 1) >> 1;
+ src_y = src_y + (height - 1) * src_stride_y;
+ src_uv = src_uv + (halfheight - 1) * src_stride_uv;
+ src_stride_y = -src_stride_y;
+ src_stride_uv = -src_stride_uv;
+ }
+
+ switch (mode) {
+ case kRotate0:
+ // copy frame
+ return NV12ToI420(src_y, src_stride_y, src_uv, src_stride_uv, dst_y,
+ dst_stride_y, dst_u, dst_stride_u, dst_v, dst_stride_v,
+ width, height);
+ case kRotate90:
+ RotatePlane90(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
+ RotateUV90(src_uv, src_stride_uv, dst_u, dst_stride_u, dst_v,
+ dst_stride_v, halfwidth, halfheight);
+ return 0;
+ case kRotate270:
+ RotatePlane270(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
+ RotateUV270(src_uv, src_stride_uv, dst_u, dst_stride_u, dst_v,
+ dst_stride_v, halfwidth, halfheight);
+ return 0;
+ case kRotate180:
+ RotatePlane180(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
+ RotateUV180(src_uv, src_stride_uv, dst_u, dst_stride_u, dst_v,
+ dst_stride_v, halfwidth, halfheight);
+ return 0;
+ default:
+ break;
+ }
+ return -1;
+}
+
+#ifdef __cplusplus
+} // extern "C"
+} // namespace libyuv
+#endif
diff --git a/media/libvpx/libvpx/third_party/libyuv/source/rotate_any.cc b/media/libvpx/libvpx/third_party/libyuv/source/rotate_any.cc
new file mode 100644
index 0000000000..c2752e6222
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/libyuv/source/rotate_any.cc
@@ -0,0 +1,73 @@
+/*
+ * Copyright 2015 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/rotate.h"
+#include "libyuv/rotate_row.h"
+
+#include "libyuv/basic_types.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+#define TANY(NAMEANY, TPOS_SIMD, MASK) \
+ void NAMEANY(const uint8_t* src, int src_stride, uint8_t* dst, \
+ int dst_stride, int width) { \
+ int r = width & MASK; \
+ int n = width - r; \
+ if (n > 0) { \
+ TPOS_SIMD(src, src_stride, dst, dst_stride, n); \
+ } \
+ TransposeWx8_C(src + n, src_stride, dst + n * dst_stride, dst_stride, r); \
+ }
+
+#ifdef HAS_TRANSPOSEWX8_NEON
+TANY(TransposeWx8_Any_NEON, TransposeWx8_NEON, 7)
+#endif
+#ifdef HAS_TRANSPOSEWX8_SSSE3
+TANY(TransposeWx8_Any_SSSE3, TransposeWx8_SSSE3, 7)
+#endif
+#ifdef HAS_TRANSPOSEWX8_FAST_SSSE3
+TANY(TransposeWx8_Fast_Any_SSSE3, TransposeWx8_Fast_SSSE3, 15)
+#endif
+#ifdef HAS_TRANSPOSEWX16_MSA
+TANY(TransposeWx16_Any_MSA, TransposeWx16_MSA, 15)
+#endif
+#undef TANY
+
+#define TUVANY(NAMEANY, TPOS_SIMD, MASK) \
+ void NAMEANY(const uint8_t* src, int src_stride, uint8_t* dst_a, \
+ int dst_stride_a, uint8_t* dst_b, int dst_stride_b, \
+ int width) { \
+ int r = width & MASK; \
+ int n = width - r; \
+ if (n > 0) { \
+ TPOS_SIMD(src, src_stride, dst_a, dst_stride_a, dst_b, dst_stride_b, n); \
+ } \
+ TransposeUVWx8_C(src + n * 2, src_stride, dst_a + n * dst_stride_a, \
+ dst_stride_a, dst_b + n * dst_stride_b, dst_stride_b, r); \
+ }
+
+#ifdef HAS_TRANSPOSEUVWX8_NEON
+TUVANY(TransposeUVWx8_Any_NEON, TransposeUVWx8_NEON, 7)
+#endif
+#ifdef HAS_TRANSPOSEUVWX8_SSE2
+TUVANY(TransposeUVWx8_Any_SSE2, TransposeUVWx8_SSE2, 7)
+#endif
+#ifdef HAS_TRANSPOSEUVWX16_MSA
+TUVANY(TransposeUVWx16_Any_MSA, TransposeUVWx16_MSA, 7)
+#endif
+#undef TUVANY
+
+#ifdef __cplusplus
+} // extern "C"
+} // namespace libyuv
+#endif
diff --git a/media/libvpx/libvpx/third_party/libyuv/source/rotate_argb.cc b/media/libvpx/libvpx/third_party/libyuv/source/rotate_argb.cc
new file mode 100644
index 0000000000..5a6e05376f
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/libyuv/source/rotate_argb.cc
@@ -0,0 +1,224 @@
+/*
+ * Copyright 2012 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/rotate.h"
+
+#include "libyuv/convert.h"
+#include "libyuv/cpu_id.h"
+#include "libyuv/planar_functions.h"
+#include "libyuv/row.h"
+#include "libyuv/scale_row.h" /* for ScaleARGBRowDownEven_ */
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+static void ARGBTranspose(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height) {
+ int i;
+ int src_pixel_step = src_stride_argb >> 2;
+ void (*ScaleARGBRowDownEven)(
+ const uint8_t* src_argb, ptrdiff_t src_stride_argb, int src_step,
+ uint8_t* dst_argb, int dst_width) = ScaleARGBRowDownEven_C;
+#if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ ScaleARGBRowDownEven = ScaleARGBRowDownEven_Any_SSE2;
+ if (IS_ALIGNED(height, 4)) { // Width of dest.
+ ScaleARGBRowDownEven = ScaleARGBRowDownEven_SSE2;
+ }
+ }
+#endif
+#if defined(HAS_SCALEARGBROWDOWNEVEN_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ScaleARGBRowDownEven = ScaleARGBRowDownEven_Any_NEON;
+ if (IS_ALIGNED(height, 4)) { // Width of dest.
+ ScaleARGBRowDownEven = ScaleARGBRowDownEven_NEON;
+ }
+ }
+#endif
+#if defined(HAS_SCALEARGBROWDOWNEVEN_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ ScaleARGBRowDownEven = ScaleARGBRowDownEven_Any_MSA;
+ if (IS_ALIGNED(height, 4)) { // Width of dest.
+ ScaleARGBRowDownEven = ScaleARGBRowDownEven_MSA;
+ }
+ }
+#endif
+
+ for (i = 0; i < width; ++i) { // column of source to row of dest.
+ ScaleARGBRowDownEven(src_argb, 0, src_pixel_step, dst_argb, height);
+ dst_argb += dst_stride_argb;
+ src_argb += 4;
+ }
+}
+
+void ARGBRotate90(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height) {
+ // Rotate by 90 is a ARGBTranspose with the source read
+ // from bottom to top. So set the source pointer to the end
+ // of the buffer and flip the sign of the source stride.
+ src_argb += src_stride_argb * (height - 1);
+ src_stride_argb = -src_stride_argb;
+ ARGBTranspose(src_argb, src_stride_argb, dst_argb, dst_stride_argb, width,
+ height);
+}
+
+void ARGBRotate270(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height) {
+ // Rotate by 270 is a ARGBTranspose with the destination written
+ // from bottom to top. So set the destination pointer to the end
+ // of the buffer and flip the sign of the destination stride.
+ dst_argb += dst_stride_argb * (width - 1);
+ dst_stride_argb = -dst_stride_argb;
+ ARGBTranspose(src_argb, src_stride_argb, dst_argb, dst_stride_argb, width,
+ height);
+}
+
+void ARGBRotate180(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height) {
+ // Swap first and last row and mirror the content. Uses a temporary row.
+ align_buffer_64(row, width * 4);
+ const uint8_t* src_bot = src_argb + src_stride_argb * (height - 1);
+ uint8_t* dst_bot = dst_argb + dst_stride_argb * (height - 1);
+ int half_height = (height + 1) >> 1;
+ int y;
+ void (*ARGBMirrorRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) =
+ ARGBMirrorRow_C;
+ void (*CopyRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) =
+ CopyRow_C;
+#if defined(HAS_ARGBMIRRORROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ARGBMirrorRow = ARGBMirrorRow_Any_NEON;
+ if (IS_ALIGNED(width, 4)) {
+ ARGBMirrorRow = ARGBMirrorRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_ARGBMIRRORROW_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ ARGBMirrorRow = ARGBMirrorRow_Any_SSE2;
+ if (IS_ALIGNED(width, 4)) {
+ ARGBMirrorRow = ARGBMirrorRow_SSE2;
+ }
+ }
+#endif
+#if defined(HAS_ARGBMIRRORROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBMirrorRow = ARGBMirrorRow_Any_AVX2;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBMirrorRow = ARGBMirrorRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_ARGBMIRRORROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ ARGBMirrorRow = ARGBMirrorRow_Any_MSA;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBMirrorRow = ARGBMirrorRow_MSA;
+ }
+ }
+#endif
+#if defined(HAS_COPYROW_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ CopyRow = IS_ALIGNED(width * 4, 32) ? CopyRow_SSE2 : CopyRow_Any_SSE2;
+ }
+#endif
+#if defined(HAS_COPYROW_AVX)
+ if (TestCpuFlag(kCpuHasAVX)) {
+ CopyRow = IS_ALIGNED(width * 4, 64) ? CopyRow_AVX : CopyRow_Any_AVX;
+ }
+#endif
+#if defined(HAS_COPYROW_ERMS)
+ if (TestCpuFlag(kCpuHasERMS)) {
+ CopyRow = CopyRow_ERMS;
+ }
+#endif
+#if defined(HAS_COPYROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ CopyRow = IS_ALIGNED(width * 4, 32) ? CopyRow_NEON : CopyRow_Any_NEON;
+ }
+#endif
+
+ // Odd height will harmlessly mirror the middle row twice.
+ for (y = 0; y < half_height; ++y) {
+ ARGBMirrorRow(src_argb, row, width); // Mirror first row into a buffer
+ ARGBMirrorRow(src_bot, dst_argb, width); // Mirror last row into first row
+ CopyRow(row, dst_bot, width * 4); // Copy first mirrored row into last
+ src_argb += src_stride_argb;
+ dst_argb += dst_stride_argb;
+ src_bot -= src_stride_argb;
+ dst_bot -= dst_stride_argb;
+ }
+ free_aligned_buffer_64(row);
+}
+
+LIBYUV_API
+int ARGBRotate(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height,
+ enum RotationMode mode) {
+ if (!src_argb || width <= 0 || height == 0 || !dst_argb) {
+ return -1;
+ }
+
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_argb = src_argb + (height - 1) * src_stride_argb;
+ src_stride_argb = -src_stride_argb;
+ }
+
+ switch (mode) {
+ case kRotate0:
+ // copy frame
+ return ARGBCopy(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
+ width, height);
+ case kRotate90:
+ ARGBRotate90(src_argb, src_stride_argb, dst_argb, dst_stride_argb, width,
+ height);
+ return 0;
+ case kRotate270:
+ ARGBRotate270(src_argb, src_stride_argb, dst_argb, dst_stride_argb, width,
+ height);
+ return 0;
+ case kRotate180:
+ ARGBRotate180(src_argb, src_stride_argb, dst_argb, dst_stride_argb, width,
+ height);
+ return 0;
+ default:
+ break;
+ }
+ return -1;
+}
+
+#ifdef __cplusplus
+} // extern "C"
+} // namespace libyuv
+#endif
diff --git a/media/libvpx/libvpx/third_party/libyuv/source/rotate_common.cc b/media/libvpx/libvpx/third_party/libyuv/source/rotate_common.cc
new file mode 100644
index 0000000000..ff212adebc
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/libyuv/source/rotate_common.cc
@@ -0,0 +1,106 @@
+/*
+ * Copyright 2011 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/rotate_row.h"
+#include "libyuv/row.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+void TransposeWx8_C(const uint8_t* src,
+ int src_stride,
+ uint8_t* dst,
+ int dst_stride,
+ int width) {
+ int i;
+ for (i = 0; i < width; ++i) {
+ dst[0] = src[0 * src_stride];
+ dst[1] = src[1 * src_stride];
+ dst[2] = src[2 * src_stride];
+ dst[3] = src[3 * src_stride];
+ dst[4] = src[4 * src_stride];
+ dst[5] = src[5 * src_stride];
+ dst[6] = src[6 * src_stride];
+ dst[7] = src[7 * src_stride];
+ ++src;
+ dst += dst_stride;
+ }
+}
+
+void TransposeUVWx8_C(const uint8_t* src,
+ int src_stride,
+ uint8_t* dst_a,
+ int dst_stride_a,
+ uint8_t* dst_b,
+ int dst_stride_b,
+ int width) {
+ int i;
+ for (i = 0; i < width; ++i) {
+ dst_a[0] = src[0 * src_stride + 0];
+ dst_b[0] = src[0 * src_stride + 1];
+ dst_a[1] = src[1 * src_stride + 0];
+ dst_b[1] = src[1 * src_stride + 1];
+ dst_a[2] = src[2 * src_stride + 0];
+ dst_b[2] = src[2 * src_stride + 1];
+ dst_a[3] = src[3 * src_stride + 0];
+ dst_b[3] = src[3 * src_stride + 1];
+ dst_a[4] = src[4 * src_stride + 0];
+ dst_b[4] = src[4 * src_stride + 1];
+ dst_a[5] = src[5 * src_stride + 0];
+ dst_b[5] = src[5 * src_stride + 1];
+ dst_a[6] = src[6 * src_stride + 0];
+ dst_b[6] = src[6 * src_stride + 1];
+ dst_a[7] = src[7 * src_stride + 0];
+ dst_b[7] = src[7 * src_stride + 1];
+ src += 2;
+ dst_a += dst_stride_a;
+ dst_b += dst_stride_b;
+ }
+}
+
+void TransposeWxH_C(const uint8_t* src,
+ int src_stride,
+ uint8_t* dst,
+ int dst_stride,
+ int width,
+ int height) {
+ int i;
+ for (i = 0; i < width; ++i) {
+ int j;
+ for (j = 0; j < height; ++j) {
+ dst[i * dst_stride + j] = src[j * src_stride + i];
+ }
+ }
+}
+
+void TransposeUVWxH_C(const uint8_t* src,
+ int src_stride,
+ uint8_t* dst_a,
+ int dst_stride_a,
+ uint8_t* dst_b,
+ int dst_stride_b,
+ int width,
+ int height) {
+ int i;
+ for (i = 0; i < width * 2; i += 2) {
+ int j;
+ for (j = 0; j < height; ++j) {
+ dst_a[j + ((i >> 1) * dst_stride_a)] = src[i + (j * src_stride)];
+ dst_b[j + ((i >> 1) * dst_stride_b)] = src[i + (j * src_stride) + 1];
+ }
+ }
+}
+
+#ifdef __cplusplus
+} // extern "C"
+} // namespace libyuv
+#endif
diff --git a/media/libvpx/libvpx/third_party/libyuv/source/rotate_gcc.cc b/media/libvpx/libvpx/third_party/libyuv/source/rotate_gcc.cc
new file mode 100644
index 0000000000..04e19e29ee
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/libyuv/source/rotate_gcc.cc
@@ -0,0 +1,374 @@
+/*
+ * Copyright 2015 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/rotate_row.h"
+#include "libyuv/row.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+// This module is for GCC x86 and x64.
+#if !defined(LIBYUV_DISABLE_X86) && \
+ (defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER)))
+
+// Transpose 8x8. 32 or 64 bit, but not NaCL for 64 bit.
+#if defined(HAS_TRANSPOSEWX8_SSSE3)
+void TransposeWx8_SSSE3(const uint8_t* src,
+ int src_stride,
+ uint8_t* dst,
+ int dst_stride,
+ int width) {
+ asm volatile(
+ // Read in the data from the source pointer.
+ // First round of bit swap.
+ LABELALIGN
+ "1: \n"
+ "movq (%0),%%xmm0 \n"
+ "movq (%0,%3),%%xmm1 \n"
+ "lea (%0,%3,2),%0 \n"
+ "punpcklbw %%xmm1,%%xmm0 \n"
+ "movq (%0),%%xmm2 \n"
+ "movdqa %%xmm0,%%xmm1 \n"
+ "palignr $0x8,%%xmm1,%%xmm1 \n"
+ "movq (%0,%3),%%xmm3 \n"
+ "lea (%0,%3,2),%0 \n"
+ "punpcklbw %%xmm3,%%xmm2 \n"
+ "movdqa %%xmm2,%%xmm3 \n"
+ "movq (%0),%%xmm4 \n"
+ "palignr $0x8,%%xmm3,%%xmm3 \n"
+ "movq (%0,%3),%%xmm5 \n"
+ "lea (%0,%3,2),%0 \n"
+ "punpcklbw %%xmm5,%%xmm4 \n"
+ "movdqa %%xmm4,%%xmm5 \n"
+ "movq (%0),%%xmm6 \n"
+ "palignr $0x8,%%xmm5,%%xmm5 \n"
+ "movq (%0,%3),%%xmm7 \n"
+ "lea (%0,%3,2),%0 \n"
+ "punpcklbw %%xmm7,%%xmm6 \n"
+ "neg %3 \n"
+ "movdqa %%xmm6,%%xmm7 \n"
+ "lea 0x8(%0,%3,8),%0 \n"
+ "palignr $0x8,%%xmm7,%%xmm7 \n"
+ "neg %3 \n"
+ // Second round of bit swap.
+ "punpcklwd %%xmm2,%%xmm0 \n"
+ "punpcklwd %%xmm3,%%xmm1 \n"
+ "movdqa %%xmm0,%%xmm2 \n"
+ "movdqa %%xmm1,%%xmm3 \n"
+ "palignr $0x8,%%xmm2,%%xmm2 \n"
+ "palignr $0x8,%%xmm3,%%xmm3 \n"
+ "punpcklwd %%xmm6,%%xmm4 \n"
+ "punpcklwd %%xmm7,%%xmm5 \n"
+ "movdqa %%xmm4,%%xmm6 \n"
+ "movdqa %%xmm5,%%xmm7 \n"
+ "palignr $0x8,%%xmm6,%%xmm6 \n"
+ "palignr $0x8,%%xmm7,%%xmm7 \n"
+ // Third round of bit swap.
+ // Write to the destination pointer.
+ "punpckldq %%xmm4,%%xmm0 \n"
+ "movq %%xmm0,(%1) \n"
+ "movdqa %%xmm0,%%xmm4 \n"
+ "palignr $0x8,%%xmm4,%%xmm4 \n"
+ "movq %%xmm4,(%1,%4) \n"
+ "lea (%1,%4,2),%1 \n"
+ "punpckldq %%xmm6,%%xmm2 \n"
+ "movdqa %%xmm2,%%xmm6 \n"
+ "movq %%xmm2,(%1) \n"
+ "palignr $0x8,%%xmm6,%%xmm6 \n"
+ "punpckldq %%xmm5,%%xmm1 \n"
+ "movq %%xmm6,(%1,%4) \n"
+ "lea (%1,%4,2),%1 \n"
+ "movdqa %%xmm1,%%xmm5 \n"
+ "movq %%xmm1,(%1) \n"
+ "palignr $0x8,%%xmm5,%%xmm5 \n"
+ "movq %%xmm5,(%1,%4) \n"
+ "lea (%1,%4,2),%1 \n"
+ "punpckldq %%xmm7,%%xmm3 \n"
+ "movq %%xmm3,(%1) \n"
+ "movdqa %%xmm3,%%xmm7 \n"
+ "palignr $0x8,%%xmm7,%%xmm7 \n"
+ "sub $0x8,%2 \n"
+ "movq %%xmm7,(%1,%4) \n"
+ "lea (%1,%4,2),%1 \n"
+ "jg 1b \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width) // %2
+ : "r"((intptr_t)(src_stride)), // %3
+ "r"((intptr_t)(dst_stride)) // %4
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
+ "xmm7");
+}
+#endif // defined(HAS_TRANSPOSEWX8_SSSE3)
+
+// Transpose 16x8. 64 bit
+#if defined(HAS_TRANSPOSEWX8_FAST_SSSE3)
+void TransposeWx8_Fast_SSSE3(const uint8_t* src,
+ int src_stride,
+ uint8_t* dst,
+ int dst_stride,
+ int width) {
+ asm volatile(
+ // Read in the data from the source pointer.
+ // First round of bit swap.
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu (%0,%3),%%xmm1 \n"
+ "lea (%0,%3,2),%0 \n"
+ "movdqa %%xmm0,%%xmm8 \n"
+ "punpcklbw %%xmm1,%%xmm0 \n"
+ "punpckhbw %%xmm1,%%xmm8 \n"
+ "movdqu (%0),%%xmm2 \n"
+ "movdqa %%xmm0,%%xmm1 \n"
+ "movdqa %%xmm8,%%xmm9 \n"
+ "palignr $0x8,%%xmm1,%%xmm1 \n"
+ "palignr $0x8,%%xmm9,%%xmm9 \n"
+ "movdqu (%0,%3),%%xmm3 \n"
+ "lea (%0,%3,2),%0 \n"
+ "movdqa %%xmm2,%%xmm10 \n"
+ "punpcklbw %%xmm3,%%xmm2 \n"
+ "punpckhbw %%xmm3,%%xmm10 \n"
+ "movdqa %%xmm2,%%xmm3 \n"
+ "movdqa %%xmm10,%%xmm11 \n"
+ "movdqu (%0),%%xmm4 \n"
+ "palignr $0x8,%%xmm3,%%xmm3 \n"
+ "palignr $0x8,%%xmm11,%%xmm11 \n"
+ "movdqu (%0,%3),%%xmm5 \n"
+ "lea (%0,%3,2),%0 \n"
+ "movdqa %%xmm4,%%xmm12 \n"
+ "punpcklbw %%xmm5,%%xmm4 \n"
+ "punpckhbw %%xmm5,%%xmm12 \n"
+ "movdqa %%xmm4,%%xmm5 \n"
+ "movdqa %%xmm12,%%xmm13 \n"
+ "movdqu (%0),%%xmm6 \n"
+ "palignr $0x8,%%xmm5,%%xmm5 \n"
+ "palignr $0x8,%%xmm13,%%xmm13 \n"
+ "movdqu (%0,%3),%%xmm7 \n"
+ "lea (%0,%3,2),%0 \n"
+ "movdqa %%xmm6,%%xmm14 \n"
+ "punpcklbw %%xmm7,%%xmm6 \n"
+ "punpckhbw %%xmm7,%%xmm14 \n"
+ "neg %3 \n"
+ "movdqa %%xmm6,%%xmm7 \n"
+ "movdqa %%xmm14,%%xmm15 \n"
+ "lea 0x10(%0,%3,8),%0 \n"
+ "palignr $0x8,%%xmm7,%%xmm7 \n"
+ "palignr $0x8,%%xmm15,%%xmm15 \n"
+ "neg %3 \n"
+ // Second round of bit swap.
+ "punpcklwd %%xmm2,%%xmm0 \n"
+ "punpcklwd %%xmm3,%%xmm1 \n"
+ "movdqa %%xmm0,%%xmm2 \n"
+ "movdqa %%xmm1,%%xmm3 \n"
+ "palignr $0x8,%%xmm2,%%xmm2 \n"
+ "palignr $0x8,%%xmm3,%%xmm3 \n"
+ "punpcklwd %%xmm6,%%xmm4 \n"
+ "punpcklwd %%xmm7,%%xmm5 \n"
+ "movdqa %%xmm4,%%xmm6 \n"
+ "movdqa %%xmm5,%%xmm7 \n"
+ "palignr $0x8,%%xmm6,%%xmm6 \n"
+ "palignr $0x8,%%xmm7,%%xmm7 \n"
+ "punpcklwd %%xmm10,%%xmm8 \n"
+ "punpcklwd %%xmm11,%%xmm9 \n"
+ "movdqa %%xmm8,%%xmm10 \n"
+ "movdqa %%xmm9,%%xmm11 \n"
+ "palignr $0x8,%%xmm10,%%xmm10 \n"
+ "palignr $0x8,%%xmm11,%%xmm11 \n"
+ "punpcklwd %%xmm14,%%xmm12 \n"
+ "punpcklwd %%xmm15,%%xmm13 \n"
+ "movdqa %%xmm12,%%xmm14 \n"
+ "movdqa %%xmm13,%%xmm15 \n"
+ "palignr $0x8,%%xmm14,%%xmm14 \n"
+ "palignr $0x8,%%xmm15,%%xmm15 \n"
+ // Third round of bit swap.
+ // Write to the destination pointer.
+ "punpckldq %%xmm4,%%xmm0 \n"
+ "movq %%xmm0,(%1) \n"
+ "movdqa %%xmm0,%%xmm4 \n"
+ "palignr $0x8,%%xmm4,%%xmm4 \n"
+ "movq %%xmm4,(%1,%4) \n"
+ "lea (%1,%4,2),%1 \n"
+ "punpckldq %%xmm6,%%xmm2 \n"
+ "movdqa %%xmm2,%%xmm6 \n"
+ "movq %%xmm2,(%1) \n"
+ "palignr $0x8,%%xmm6,%%xmm6 \n"
+ "punpckldq %%xmm5,%%xmm1 \n"
+ "movq %%xmm6,(%1,%4) \n"
+ "lea (%1,%4,2),%1 \n"
+ "movdqa %%xmm1,%%xmm5 \n"
+ "movq %%xmm1,(%1) \n"
+ "palignr $0x8,%%xmm5,%%xmm5 \n"
+ "movq %%xmm5,(%1,%4) \n"
+ "lea (%1,%4,2),%1 \n"
+ "punpckldq %%xmm7,%%xmm3 \n"
+ "movq %%xmm3,(%1) \n"
+ "movdqa %%xmm3,%%xmm7 \n"
+ "palignr $0x8,%%xmm7,%%xmm7 \n"
+ "movq %%xmm7,(%1,%4) \n"
+ "lea (%1,%4,2),%1 \n"
+ "punpckldq %%xmm12,%%xmm8 \n"
+ "movq %%xmm8,(%1) \n"
+ "movdqa %%xmm8,%%xmm12 \n"
+ "palignr $0x8,%%xmm12,%%xmm12 \n"
+ "movq %%xmm12,(%1,%4) \n"
+ "lea (%1,%4,2),%1 \n"
+ "punpckldq %%xmm14,%%xmm10 \n"
+ "movdqa %%xmm10,%%xmm14 \n"
+ "movq %%xmm10,(%1) \n"
+ "palignr $0x8,%%xmm14,%%xmm14 \n"
+ "punpckldq %%xmm13,%%xmm9 \n"
+ "movq %%xmm14,(%1,%4) \n"
+ "lea (%1,%4,2),%1 \n"
+ "movdqa %%xmm9,%%xmm13 \n"
+ "movq %%xmm9,(%1) \n"
+ "palignr $0x8,%%xmm13,%%xmm13 \n"
+ "movq %%xmm13,(%1,%4) \n"
+ "lea (%1,%4,2),%1 \n"
+ "punpckldq %%xmm15,%%xmm11 \n"
+ "movq %%xmm11,(%1) \n"
+ "movdqa %%xmm11,%%xmm15 \n"
+ "palignr $0x8,%%xmm15,%%xmm15 \n"
+ "sub $0x10,%2 \n"
+ "movq %%xmm15,(%1,%4) \n"
+ "lea (%1,%4,2),%1 \n"
+ "jg 1b \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width) // %2
+ : "r"((intptr_t)(src_stride)), // %3
+ "r"((intptr_t)(dst_stride)) // %4
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
+ "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14",
+ "xmm15");
+}
+#endif // defined(HAS_TRANSPOSEWX8_FAST_SSSE3)
+
+// Transpose UV 8x8. 64 bit.
+#if defined(HAS_TRANSPOSEUVWX8_SSE2)
+void TransposeUVWx8_SSE2(const uint8_t* src,
+ int src_stride,
+ uint8_t* dst_a,
+ int dst_stride_a,
+ uint8_t* dst_b,
+ int dst_stride_b,
+ int width) {
+ asm volatile(
+ // Read in the data from the source pointer.
+ // First round of bit swap.
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu (%0,%4),%%xmm1 \n"
+ "lea (%0,%4,2),%0 \n"
+ "movdqa %%xmm0,%%xmm8 \n"
+ "punpcklbw %%xmm1,%%xmm0 \n"
+ "punpckhbw %%xmm1,%%xmm8 \n"
+ "movdqa %%xmm8,%%xmm1 \n"
+ "movdqu (%0),%%xmm2 \n"
+ "movdqu (%0,%4),%%xmm3 \n"
+ "lea (%0,%4,2),%0 \n"
+ "movdqa %%xmm2,%%xmm8 \n"
+ "punpcklbw %%xmm3,%%xmm2 \n"
+ "punpckhbw %%xmm3,%%xmm8 \n"
+ "movdqa %%xmm8,%%xmm3 \n"
+ "movdqu (%0),%%xmm4 \n"
+ "movdqu (%0,%4),%%xmm5 \n"
+ "lea (%0,%4,2),%0 \n"
+ "movdqa %%xmm4,%%xmm8 \n"
+ "punpcklbw %%xmm5,%%xmm4 \n"
+ "punpckhbw %%xmm5,%%xmm8 \n"
+ "movdqa %%xmm8,%%xmm5 \n"
+ "movdqu (%0),%%xmm6 \n"
+ "movdqu (%0,%4),%%xmm7 \n"
+ "lea (%0,%4,2),%0 \n"
+ "movdqa %%xmm6,%%xmm8 \n"
+ "punpcklbw %%xmm7,%%xmm6 \n"
+ "neg %4 \n"
+ "lea 0x10(%0,%4,8),%0 \n"
+ "punpckhbw %%xmm7,%%xmm8 \n"
+ "movdqa %%xmm8,%%xmm7 \n"
+ "neg %4 \n"
+ // Second round of bit swap.
+ "movdqa %%xmm0,%%xmm8 \n"
+ "movdqa %%xmm1,%%xmm9 \n"
+ "punpckhwd %%xmm2,%%xmm8 \n"
+ "punpckhwd %%xmm3,%%xmm9 \n"
+ "punpcklwd %%xmm2,%%xmm0 \n"
+ "punpcklwd %%xmm3,%%xmm1 \n"
+ "movdqa %%xmm8,%%xmm2 \n"
+ "movdqa %%xmm9,%%xmm3 \n"
+ "movdqa %%xmm4,%%xmm8 \n"
+ "movdqa %%xmm5,%%xmm9 \n"
+ "punpckhwd %%xmm6,%%xmm8 \n"
+ "punpckhwd %%xmm7,%%xmm9 \n"
+ "punpcklwd %%xmm6,%%xmm4 \n"
+ "punpcklwd %%xmm7,%%xmm5 \n"
+ "movdqa %%xmm8,%%xmm6 \n"
+ "movdqa %%xmm9,%%xmm7 \n"
+ // Third round of bit swap.
+ // Write to the destination pointer.
+ "movdqa %%xmm0,%%xmm8 \n"
+ "punpckldq %%xmm4,%%xmm0 \n"
+ "movlpd %%xmm0,(%1) \n" // Write back U channel
+ "movhpd %%xmm0,(%2) \n" // Write back V channel
+ "punpckhdq %%xmm4,%%xmm8 \n"
+ "movlpd %%xmm8,(%1,%5) \n"
+ "lea (%1,%5,2),%1 \n"
+ "movhpd %%xmm8,(%2,%6) \n"
+ "lea (%2,%6,2),%2 \n"
+ "movdqa %%xmm2,%%xmm8 \n"
+ "punpckldq %%xmm6,%%xmm2 \n"
+ "movlpd %%xmm2,(%1) \n"
+ "movhpd %%xmm2,(%2) \n"
+ "punpckhdq %%xmm6,%%xmm8 \n"
+ "movlpd %%xmm8,(%1,%5) \n"
+ "lea (%1,%5,2),%1 \n"
+ "movhpd %%xmm8,(%2,%6) \n"
+ "lea (%2,%6,2),%2 \n"
+ "movdqa %%xmm1,%%xmm8 \n"
+ "punpckldq %%xmm5,%%xmm1 \n"
+ "movlpd %%xmm1,(%1) \n"
+ "movhpd %%xmm1,(%2) \n"
+ "punpckhdq %%xmm5,%%xmm8 \n"
+ "movlpd %%xmm8,(%1,%5) \n"
+ "lea (%1,%5,2),%1 \n"
+ "movhpd %%xmm8,(%2,%6) \n"
+ "lea (%2,%6,2),%2 \n"
+ "movdqa %%xmm3,%%xmm8 \n"
+ "punpckldq %%xmm7,%%xmm3 \n"
+ "movlpd %%xmm3,(%1) \n"
+ "movhpd %%xmm3,(%2) \n"
+ "punpckhdq %%xmm7,%%xmm8 \n"
+ "sub $0x8,%3 \n"
+ "movlpd %%xmm8,(%1,%5) \n"
+ "lea (%1,%5,2),%1 \n"
+ "movhpd %%xmm8,(%2,%6) \n"
+ "lea (%2,%6,2),%2 \n"
+ "jg 1b \n"
+ : "+r"(src), // %0
+ "+r"(dst_a), // %1
+ "+r"(dst_b), // %2
+ "+r"(width) // %3
+ : "r"((intptr_t)(src_stride)), // %4
+ "r"((intptr_t)(dst_stride_a)), // %5
+ "r"((intptr_t)(dst_stride_b)) // %6
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
+ "xmm7", "xmm8", "xmm9");
+}
+#endif // defined(HAS_TRANSPOSEUVWX8_SSE2)
+#endif // defined(__x86_64__) || defined(__i386__)
+
+#ifdef __cplusplus
+} // extern "C"
+} // namespace libyuv
+#endif
diff --git a/media/libvpx/libvpx/third_party/libyuv/source/rotate_msa.cc b/media/libvpx/libvpx/third_party/libyuv/source/rotate_msa.cc
new file mode 100644
index 0000000000..99bdca65b3
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/libyuv/source/rotate_msa.cc
@@ -0,0 +1,250 @@
+/*
+ * Copyright 2016 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/rotate_row.h"
+
+// This module is for GCC MSA
+#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
+#include "libyuv/macros_msa.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+#define ILVRL_B(in0, in1, in2, in3, out0, out1, out2, out3) \
+ { \
+ out0 = (v16u8)__msa_ilvr_b((v16i8)in1, (v16i8)in0); \
+ out1 = (v16u8)__msa_ilvl_b((v16i8)in1, (v16i8)in0); \
+ out2 = (v16u8)__msa_ilvr_b((v16i8)in3, (v16i8)in2); \
+ out3 = (v16u8)__msa_ilvl_b((v16i8)in3, (v16i8)in2); \
+ }
+
+#define ILVRL_H(in0, in1, in2, in3, out0, out1, out2, out3) \
+ { \
+ out0 = (v16u8)__msa_ilvr_h((v8i16)in1, (v8i16)in0); \
+ out1 = (v16u8)__msa_ilvl_h((v8i16)in1, (v8i16)in0); \
+ out2 = (v16u8)__msa_ilvr_h((v8i16)in3, (v8i16)in2); \
+ out3 = (v16u8)__msa_ilvl_h((v8i16)in3, (v8i16)in2); \
+ }
+
+#define ILVRL_W(in0, in1, in2, in3, out0, out1, out2, out3) \
+ { \
+ out0 = (v16u8)__msa_ilvr_w((v4i32)in1, (v4i32)in0); \
+ out1 = (v16u8)__msa_ilvl_w((v4i32)in1, (v4i32)in0); \
+ out2 = (v16u8)__msa_ilvr_w((v4i32)in3, (v4i32)in2); \
+ out3 = (v16u8)__msa_ilvl_w((v4i32)in3, (v4i32)in2); \
+ }
+
+#define ILVRL_D(in0, in1, in2, in3, out0, out1, out2, out3) \
+ { \
+ out0 = (v16u8)__msa_ilvr_d((v2i64)in1, (v2i64)in0); \
+ out1 = (v16u8)__msa_ilvl_d((v2i64)in1, (v2i64)in0); \
+ out2 = (v16u8)__msa_ilvr_d((v2i64)in3, (v2i64)in2); \
+ out3 = (v16u8)__msa_ilvl_d((v2i64)in3, (v2i64)in2); \
+ }
+
+void TransposeWx16_C(const uint8_t* src,
+ int src_stride,
+ uint8_t* dst,
+ int dst_stride,
+ int width) {
+ TransposeWx8_C(src, src_stride, dst, dst_stride, width);
+ TransposeWx8_C((src + 8 * src_stride), src_stride, (dst + 8), dst_stride,
+ width);
+}
+
+void TransposeUVWx16_C(const uint8_t* src,
+ int src_stride,
+ uint8_t* dst_a,
+ int dst_stride_a,
+ uint8_t* dst_b,
+ int dst_stride_b,
+ int width) {
+ TransposeUVWx8_C(src, src_stride, dst_a, dst_stride_a, dst_b, dst_stride_b,
+ width);
+ TransposeUVWx8_C((src + 8 * src_stride), src_stride, (dst_a + 8),
+ dst_stride_a, (dst_b + 8), dst_stride_b, width);
+}
+
+void TransposeWx16_MSA(const uint8_t* src,
+ int src_stride,
+ uint8_t* dst,
+ int dst_stride,
+ int width) {
+ int x;
+ const uint8_t* s;
+ v16u8 src0, src1, src2, src3, dst0, dst1, dst2, dst3, vec0, vec1, vec2, vec3;
+ v16u8 reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7;
+ v16u8 res0, res1, res2, res3, res4, res5, res6, res7, res8, res9;
+
+ for (x = 0; x < width; x += 16) {
+ s = src;
+ src0 = (v16u8)__msa_ld_b((v16i8*)s, 0);
+ s += src_stride;
+ src1 = (v16u8)__msa_ld_b((v16i8*)s, 0);
+ s += src_stride;
+ src2 = (v16u8)__msa_ld_b((v16i8*)s, 0);
+ s += src_stride;
+ src3 = (v16u8)__msa_ld_b((v16i8*)s, 0);
+ s += src_stride;
+ ILVRL_B(src0, src1, src2, src3, vec0, vec1, vec2, vec3);
+ ILVRL_H(vec0, vec2, vec1, vec3, reg0, reg1, reg2, reg3);
+ src0 = (v16u8)__msa_ld_b((v16i8*)s, 0);
+ s += src_stride;
+ src1 = (v16u8)__msa_ld_b((v16i8*)s, 0);
+ s += src_stride;
+ src2 = (v16u8)__msa_ld_b((v16i8*)s, 0);
+ s += src_stride;
+ src3 = (v16u8)__msa_ld_b((v16i8*)s, 0);
+ s += src_stride;
+ ILVRL_B(src0, src1, src2, src3, vec0, vec1, vec2, vec3);
+ ILVRL_H(vec0, vec2, vec1, vec3, reg4, reg5, reg6, reg7);
+ ILVRL_W(reg0, reg4, reg1, reg5, res0, res1, res2, res3);
+ ILVRL_W(reg2, reg6, reg3, reg7, res4, res5, res6, res7);
+ src0 = (v16u8)__msa_ld_b((v16i8*)s, 0);
+ s += src_stride;
+ src1 = (v16u8)__msa_ld_b((v16i8*)s, 0);
+ s += src_stride;
+ src2 = (v16u8)__msa_ld_b((v16i8*)s, 0);
+ s += src_stride;
+ src3 = (v16u8)__msa_ld_b((v16i8*)s, 0);
+ s += src_stride;
+ ILVRL_B(src0, src1, src2, src3, vec0, vec1, vec2, vec3);
+ ILVRL_H(vec0, vec2, vec1, vec3, reg0, reg1, reg2, reg3);
+ src0 = (v16u8)__msa_ld_b((v16i8*)s, 0);
+ s += src_stride;
+ src1 = (v16u8)__msa_ld_b((v16i8*)s, 0);
+ s += src_stride;
+ src2 = (v16u8)__msa_ld_b((v16i8*)s, 0);
+ s += src_stride;
+ src3 = (v16u8)__msa_ld_b((v16i8*)s, 0);
+ s += src_stride;
+ ILVRL_B(src0, src1, src2, src3, vec0, vec1, vec2, vec3);
+ ILVRL_H(vec0, vec2, vec1, vec3, reg4, reg5, reg6, reg7);
+ res8 = (v16u8)__msa_ilvr_w((v4i32)reg4, (v4i32)reg0);
+ res9 = (v16u8)__msa_ilvl_w((v4i32)reg4, (v4i32)reg0);
+ ILVRL_D(res0, res8, res1, res9, dst0, dst1, dst2, dst3);
+ ST_UB4(dst0, dst1, dst2, dst3, dst, dst_stride);
+ dst += dst_stride * 4;
+ res8 = (v16u8)__msa_ilvr_w((v4i32)reg5, (v4i32)reg1);
+ res9 = (v16u8)__msa_ilvl_w((v4i32)reg5, (v4i32)reg1);
+ ILVRL_D(res2, res8, res3, res9, dst0, dst1, dst2, dst3);
+ ST_UB4(dst0, dst1, dst2, dst3, dst, dst_stride);
+ dst += dst_stride * 4;
+ res8 = (v16u8)__msa_ilvr_w((v4i32)reg6, (v4i32)reg2);
+ res9 = (v16u8)__msa_ilvl_w((v4i32)reg6, (v4i32)reg2);
+ ILVRL_D(res4, res8, res5, res9, dst0, dst1, dst2, dst3);
+ ST_UB4(dst0, dst1, dst2, dst3, dst, dst_stride);
+ dst += dst_stride * 4;
+ res8 = (v16u8)__msa_ilvr_w((v4i32)reg7, (v4i32)reg3);
+ res9 = (v16u8)__msa_ilvl_w((v4i32)reg7, (v4i32)reg3);
+ ILVRL_D(res6, res8, res7, res9, dst0, dst1, dst2, dst3);
+ ST_UB4(dst0, dst1, dst2, dst3, dst, dst_stride);
+ src += 16;
+ dst += dst_stride * 4;
+ }
+}
+
+void TransposeUVWx16_MSA(const uint8_t* src,
+ int src_stride,
+ uint8_t* dst_a,
+ int dst_stride_a,
+ uint8_t* dst_b,
+ int dst_stride_b,
+ int width) {
+ int x;
+ const uint8_t* s;
+ v16u8 src0, src1, src2, src3, dst0, dst1, dst2, dst3, vec0, vec1, vec2, vec3;
+ v16u8 reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7;
+ v16u8 res0, res1, res2, res3, res4, res5, res6, res7, res8, res9;
+
+ for (x = 0; x < width; x += 8) {
+ s = src;
+ src0 = (v16u8)__msa_ld_b((v16i8*)s, 0);
+ s += src_stride;
+ src1 = (v16u8)__msa_ld_b((v16i8*)s, 0);
+ s += src_stride;
+ src2 = (v16u8)__msa_ld_b((v16i8*)s, 0);
+ s += src_stride;
+ src3 = (v16u8)__msa_ld_b((v16i8*)s, 0);
+ s += src_stride;
+ ILVRL_B(src0, src1, src2, src3, vec0, vec1, vec2, vec3);
+ ILVRL_H(vec0, vec2, vec1, vec3, reg0, reg1, reg2, reg3);
+ src0 = (v16u8)__msa_ld_b((v16i8*)s, 0);
+ s += src_stride;
+ src1 = (v16u8)__msa_ld_b((v16i8*)s, 0);
+ s += src_stride;
+ src2 = (v16u8)__msa_ld_b((v16i8*)s, 0);
+ s += src_stride;
+ src3 = (v16u8)__msa_ld_b((v16i8*)s, 0);
+ s += src_stride;
+ ILVRL_B(src0, src1, src2, src3, vec0, vec1, vec2, vec3);
+ ILVRL_H(vec0, vec2, vec1, vec3, reg4, reg5, reg6, reg7);
+ ILVRL_W(reg0, reg4, reg1, reg5, res0, res1, res2, res3);
+ ILVRL_W(reg2, reg6, reg3, reg7, res4, res5, res6, res7);
+ src0 = (v16u8)__msa_ld_b((v16i8*)s, 0);
+ s += src_stride;
+ src1 = (v16u8)__msa_ld_b((v16i8*)s, 0);
+ s += src_stride;
+ src2 = (v16u8)__msa_ld_b((v16i8*)s, 0);
+ s += src_stride;
+ src3 = (v16u8)__msa_ld_b((v16i8*)s, 0);
+ s += src_stride;
+ ILVRL_B(src0, src1, src2, src3, vec0, vec1, vec2, vec3);
+ ILVRL_H(vec0, vec2, vec1, vec3, reg0, reg1, reg2, reg3);
+ src0 = (v16u8)__msa_ld_b((v16i8*)s, 0);
+ s += src_stride;
+ src1 = (v16u8)__msa_ld_b((v16i8*)s, 0);
+ s += src_stride;
+ src2 = (v16u8)__msa_ld_b((v16i8*)s, 0);
+ s += src_stride;
+ src3 = (v16u8)__msa_ld_b((v16i8*)s, 0);
+ s += src_stride;
+ ILVRL_B(src0, src1, src2, src3, vec0, vec1, vec2, vec3);
+ ILVRL_H(vec0, vec2, vec1, vec3, reg4, reg5, reg6, reg7);
+ res8 = (v16u8)__msa_ilvr_w((v4i32)reg4, (v4i32)reg0);
+ res9 = (v16u8)__msa_ilvl_w((v4i32)reg4, (v4i32)reg0);
+ ILVRL_D(res0, res8, res1, res9, dst0, dst1, dst2, dst3);
+ ST_UB2(dst0, dst2, dst_a, dst_stride_a);
+ ST_UB2(dst1, dst3, dst_b, dst_stride_b);
+ dst_a += dst_stride_a * 2;
+ dst_b += dst_stride_b * 2;
+ res8 = (v16u8)__msa_ilvr_w((v4i32)reg5, (v4i32)reg1);
+ res9 = (v16u8)__msa_ilvl_w((v4i32)reg5, (v4i32)reg1);
+ ILVRL_D(res2, res8, res3, res9, dst0, dst1, dst2, dst3);
+ ST_UB2(dst0, dst2, dst_a, dst_stride_a);
+ ST_UB2(dst1, dst3, dst_b, dst_stride_b);
+ dst_a += dst_stride_a * 2;
+ dst_b += dst_stride_b * 2;
+ res8 = (v16u8)__msa_ilvr_w((v4i32)reg6, (v4i32)reg2);
+ res9 = (v16u8)__msa_ilvl_w((v4i32)reg6, (v4i32)reg2);
+ ILVRL_D(res4, res8, res5, res9, dst0, dst1, dst2, dst3);
+ ST_UB2(dst0, dst2, dst_a, dst_stride_a);
+ ST_UB2(dst1, dst3, dst_b, dst_stride_b);
+ dst_a += dst_stride_a * 2;
+ dst_b += dst_stride_b * 2;
+ res8 = (v16u8)__msa_ilvr_w((v4i32)reg7, (v4i32)reg3);
+ res9 = (v16u8)__msa_ilvl_w((v4i32)reg7, (v4i32)reg3);
+ ILVRL_D(res6, res8, res7, res9, dst0, dst1, dst2, dst3);
+ ST_UB2(dst0, dst2, dst_a, dst_stride_a);
+ ST_UB2(dst1, dst3, dst_b, dst_stride_b);
+ src += 16;
+ dst_a += dst_stride_a * 2;
+ dst_b += dst_stride_b * 2;
+ }
+}
+
+#ifdef __cplusplus
+} // extern "C"
+} // namespace libyuv
+#endif
+
+#endif // !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
diff --git a/media/libvpx/libvpx/third_party/libyuv/source/rotate_neon.cc b/media/libvpx/libvpx/third_party/libyuv/source/rotate_neon.cc
new file mode 100644
index 0000000000..fdc0dd476c
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/libyuv/source/rotate_neon.cc
@@ -0,0 +1,416 @@
+/*
+ * Copyright 2011 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/rotate_row.h"
+#include "libyuv/row.h"
+
+#include "libyuv/basic_types.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__) && \
+ !defined(__aarch64__)
+
+static const uvec8 kVTbl4x4Transpose = {0, 4, 8, 12, 1, 5, 9, 13,
+ 2, 6, 10, 14, 3, 7, 11, 15};
+
+void TransposeWx8_NEON(const uint8_t* src,
+ int src_stride,
+ uint8_t* dst,
+ int dst_stride,
+ int width) {
+ const uint8_t* src_temp;
+ asm volatile(
+ // loops are on blocks of 8. loop will stop when
+ // counter gets to or below 0. starting the counter
+ // at w-8 allow for this
+ "sub %5, #8 \n"
+
+ // handle 8x8 blocks. this should be the majority of the plane
+ "1: \n"
+ "mov %0, %1 \n"
+
+ "vld1.8 {d0}, [%0], %2 \n"
+ "vld1.8 {d1}, [%0], %2 \n"
+ "vld1.8 {d2}, [%0], %2 \n"
+ "vld1.8 {d3}, [%0], %2 \n"
+ "vld1.8 {d4}, [%0], %2 \n"
+ "vld1.8 {d5}, [%0], %2 \n"
+ "vld1.8 {d6}, [%0], %2 \n"
+ "vld1.8 {d7}, [%0] \n"
+
+ "vtrn.8 d1, d0 \n"
+ "vtrn.8 d3, d2 \n"
+ "vtrn.8 d5, d4 \n"
+ "vtrn.8 d7, d6 \n"
+
+ "vtrn.16 d1, d3 \n"
+ "vtrn.16 d0, d2 \n"
+ "vtrn.16 d5, d7 \n"
+ "vtrn.16 d4, d6 \n"
+
+ "vtrn.32 d1, d5 \n"
+ "vtrn.32 d0, d4 \n"
+ "vtrn.32 d3, d7 \n"
+ "vtrn.32 d2, d6 \n"
+
+ "vrev16.8 q0, q0 \n"
+ "vrev16.8 q1, q1 \n"
+ "vrev16.8 q2, q2 \n"
+ "vrev16.8 q3, q3 \n"
+
+ "mov %0, %3 \n"
+
+ "vst1.8 {d1}, [%0], %4 \n"
+ "vst1.8 {d0}, [%0], %4 \n"
+ "vst1.8 {d3}, [%0], %4 \n"
+ "vst1.8 {d2}, [%0], %4 \n"
+ "vst1.8 {d5}, [%0], %4 \n"
+ "vst1.8 {d4}, [%0], %4 \n"
+ "vst1.8 {d7}, [%0], %4 \n"
+ "vst1.8 {d6}, [%0] \n"
+
+ "add %1, #8 \n" // src += 8
+ "add %3, %3, %4, lsl #3 \n" // dst += 8 * dst_stride
+ "subs %5, #8 \n" // w -= 8
+ "bge 1b \n"
+
+ // add 8 back to counter. if the result is 0 there are
+ // no residuals.
+ "adds %5, #8 \n"
+ "beq 4f \n"
+
+ // some residual, so between 1 and 7 lines left to transpose
+ "cmp %5, #2 \n"
+ "blt 3f \n"
+
+ "cmp %5, #4 \n"
+ "blt 2f \n"
+
+ // 4x8 block
+ "mov %0, %1 \n"
+ "vld1.32 {d0[0]}, [%0], %2 \n"
+ "vld1.32 {d0[1]}, [%0], %2 \n"
+ "vld1.32 {d1[0]}, [%0], %2 \n"
+ "vld1.32 {d1[1]}, [%0], %2 \n"
+ "vld1.32 {d2[0]}, [%0], %2 \n"
+ "vld1.32 {d2[1]}, [%0], %2 \n"
+ "vld1.32 {d3[0]}, [%0], %2 \n"
+ "vld1.32 {d3[1]}, [%0] \n"
+
+ "mov %0, %3 \n"
+
+ "vld1.8 {q3}, [%6] \n"
+
+ "vtbl.8 d4, {d0, d1}, d6 \n"
+ "vtbl.8 d5, {d0, d1}, d7 \n"
+ "vtbl.8 d0, {d2, d3}, d6 \n"
+ "vtbl.8 d1, {d2, d3}, d7 \n"
+
+ // TODO(frkoenig): Rework shuffle above to
+ // write out with 4 instead of 8 writes.
+ "vst1.32 {d4[0]}, [%0], %4 \n"
+ "vst1.32 {d4[1]}, [%0], %4 \n"
+ "vst1.32 {d5[0]}, [%0], %4 \n"
+ "vst1.32 {d5[1]}, [%0] \n"
+
+ "add %0, %3, #4 \n"
+ "vst1.32 {d0[0]}, [%0], %4 \n"
+ "vst1.32 {d0[1]}, [%0], %4 \n"
+ "vst1.32 {d1[0]}, [%0], %4 \n"
+ "vst1.32 {d1[1]}, [%0] \n"
+
+ "add %1, #4 \n" // src += 4
+ "add %3, %3, %4, lsl #2 \n" // dst += 4 * dst_stride
+ "subs %5, #4 \n" // w -= 4
+ "beq 4f \n"
+
+ // some residual, check to see if it includes a 2x8 block,
+ // or less
+ "cmp %5, #2 \n"
+ "blt 3f \n"
+
+ // 2x8 block
+ "2: \n"
+ "mov %0, %1 \n"
+ "vld1.16 {d0[0]}, [%0], %2 \n"
+ "vld1.16 {d1[0]}, [%0], %2 \n"
+ "vld1.16 {d0[1]}, [%0], %2 \n"
+ "vld1.16 {d1[1]}, [%0], %2 \n"
+ "vld1.16 {d0[2]}, [%0], %2 \n"
+ "vld1.16 {d1[2]}, [%0], %2 \n"
+ "vld1.16 {d0[3]}, [%0], %2 \n"
+ "vld1.16 {d1[3]}, [%0] \n"
+
+ "vtrn.8 d0, d1 \n"
+
+ "mov %0, %3 \n"
+
+ "vst1.64 {d0}, [%0], %4 \n"
+ "vst1.64 {d1}, [%0] \n"
+
+ "add %1, #2 \n" // src += 2
+ "add %3, %3, %4, lsl #1 \n" // dst += 2 * dst_stride
+ "subs %5, #2 \n" // w -= 2
+ "beq 4f \n"
+
+ // 1x8 block
+ "3: \n"
+ "vld1.8 {d0[0]}, [%1], %2 \n"
+ "vld1.8 {d0[1]}, [%1], %2 \n"
+ "vld1.8 {d0[2]}, [%1], %2 \n"
+ "vld1.8 {d0[3]}, [%1], %2 \n"
+ "vld1.8 {d0[4]}, [%1], %2 \n"
+ "vld1.8 {d0[5]}, [%1], %2 \n"
+ "vld1.8 {d0[6]}, [%1], %2 \n"
+ "vld1.8 {d0[7]}, [%1] \n"
+
+ "vst1.64 {d0}, [%3] \n"
+
+ "4: \n"
+
+ : "=&r"(src_temp), // %0
+ "+r"(src), // %1
+ "+r"(src_stride), // %2
+ "+r"(dst), // %3
+ "+r"(dst_stride), // %4
+ "+r"(width) // %5
+ : "r"(&kVTbl4x4Transpose) // %6
+ : "memory", "cc", "q0", "q1", "q2", "q3");
+}
+
+static const uvec8 kVTbl4x4TransposeDi = {0, 8, 1, 9, 2, 10, 3, 11,
+ 4, 12, 5, 13, 6, 14, 7, 15};
+
+void TransposeUVWx8_NEON(const uint8_t* src,
+ int src_stride,
+ uint8_t* dst_a,
+ int dst_stride_a,
+ uint8_t* dst_b,
+ int dst_stride_b,
+ int width) {
+ const uint8_t* src_temp;
+ asm volatile(
+ // loops are on blocks of 8. loop will stop when
+ // counter gets to or below 0. starting the counter
+ // at w-8 allow for this
+ "sub %7, #8 \n"
+
+ // handle 8x8 blocks. this should be the majority of the plane
+ "1: \n"
+ "mov %0, %1 \n"
+
+ "vld2.8 {d0, d1}, [%0], %2 \n"
+ "vld2.8 {d2, d3}, [%0], %2 \n"
+ "vld2.8 {d4, d5}, [%0], %2 \n"
+ "vld2.8 {d6, d7}, [%0], %2 \n"
+ "vld2.8 {d16, d17}, [%0], %2 \n"
+ "vld2.8 {d18, d19}, [%0], %2 \n"
+ "vld2.8 {d20, d21}, [%0], %2 \n"
+ "vld2.8 {d22, d23}, [%0] \n"
+
+ "vtrn.8 q1, q0 \n"
+ "vtrn.8 q3, q2 \n"
+ "vtrn.8 q9, q8 \n"
+ "vtrn.8 q11, q10 \n"
+
+ "vtrn.16 q1, q3 \n"
+ "vtrn.16 q0, q2 \n"
+ "vtrn.16 q9, q11 \n"
+ "vtrn.16 q8, q10 \n"
+
+ "vtrn.32 q1, q9 \n"
+ "vtrn.32 q0, q8 \n"
+ "vtrn.32 q3, q11 \n"
+ "vtrn.32 q2, q10 \n"
+
+ "vrev16.8 q0, q0 \n"
+ "vrev16.8 q1, q1 \n"
+ "vrev16.8 q2, q2 \n"
+ "vrev16.8 q3, q3 \n"
+ "vrev16.8 q8, q8 \n"
+ "vrev16.8 q9, q9 \n"
+ "vrev16.8 q10, q10 \n"
+ "vrev16.8 q11, q11 \n"
+
+ "mov %0, %3 \n"
+
+ "vst1.8 {d2}, [%0], %4 \n"
+ "vst1.8 {d0}, [%0], %4 \n"
+ "vst1.8 {d6}, [%0], %4 \n"
+ "vst1.8 {d4}, [%0], %4 \n"
+ "vst1.8 {d18}, [%0], %4 \n"
+ "vst1.8 {d16}, [%0], %4 \n"
+ "vst1.8 {d22}, [%0], %4 \n"
+ "vst1.8 {d20}, [%0] \n"
+
+ "mov %0, %5 \n"
+
+ "vst1.8 {d3}, [%0], %6 \n"
+ "vst1.8 {d1}, [%0], %6 \n"
+ "vst1.8 {d7}, [%0], %6 \n"
+ "vst1.8 {d5}, [%0], %6 \n"
+ "vst1.8 {d19}, [%0], %6 \n"
+ "vst1.8 {d17}, [%0], %6 \n"
+ "vst1.8 {d23}, [%0], %6 \n"
+ "vst1.8 {d21}, [%0] \n"
+
+ "add %1, #8*2 \n" // src += 8*2
+ "add %3, %3, %4, lsl #3 \n" // dst_a += 8 * dst_stride_a
+ "add %5, %5, %6, lsl #3 \n" // dst_b += 8 * dst_stride_b
+ "subs %7, #8 \n" // w -= 8
+ "bge 1b \n"
+
+ // add 8 back to counter. if the result is 0 there are
+ // no residuals.
+ "adds %7, #8 \n"
+ "beq 4f \n"
+
+ // some residual, so between 1 and 7 lines left to transpose
+ "cmp %7, #2 \n"
+ "blt 3f \n"
+
+ "cmp %7, #4 \n"
+ "blt 2f \n"
+
+ // TODO(frkoenig): Clean this up
+ // 4x8 block
+ "mov %0, %1 \n"
+ "vld1.64 {d0}, [%0], %2 \n"
+ "vld1.64 {d1}, [%0], %2 \n"
+ "vld1.64 {d2}, [%0], %2 \n"
+ "vld1.64 {d3}, [%0], %2 \n"
+ "vld1.64 {d4}, [%0], %2 \n"
+ "vld1.64 {d5}, [%0], %2 \n"
+ "vld1.64 {d6}, [%0], %2 \n"
+ "vld1.64 {d7}, [%0] \n"
+
+ "vld1.8 {q15}, [%8] \n"
+
+ "vtrn.8 q0, q1 \n"
+ "vtrn.8 q2, q3 \n"
+
+ "vtbl.8 d16, {d0, d1}, d30 \n"
+ "vtbl.8 d17, {d0, d1}, d31 \n"
+ "vtbl.8 d18, {d2, d3}, d30 \n"
+ "vtbl.8 d19, {d2, d3}, d31 \n"
+ "vtbl.8 d20, {d4, d5}, d30 \n"
+ "vtbl.8 d21, {d4, d5}, d31 \n"
+ "vtbl.8 d22, {d6, d7}, d30 \n"
+ "vtbl.8 d23, {d6, d7}, d31 \n"
+
+ "mov %0, %3 \n"
+
+ "vst1.32 {d16[0]}, [%0], %4 \n"
+ "vst1.32 {d16[1]}, [%0], %4 \n"
+ "vst1.32 {d17[0]}, [%0], %4 \n"
+ "vst1.32 {d17[1]}, [%0], %4 \n"
+
+ "add %0, %3, #4 \n"
+ "vst1.32 {d20[0]}, [%0], %4 \n"
+ "vst1.32 {d20[1]}, [%0], %4 \n"
+ "vst1.32 {d21[0]}, [%0], %4 \n"
+ "vst1.32 {d21[1]}, [%0] \n"
+
+ "mov %0, %5 \n"
+
+ "vst1.32 {d18[0]}, [%0], %6 \n"
+ "vst1.32 {d18[1]}, [%0], %6 \n"
+ "vst1.32 {d19[0]}, [%0], %6 \n"
+ "vst1.32 {d19[1]}, [%0], %6 \n"
+
+ "add %0, %5, #4 \n"
+ "vst1.32 {d22[0]}, [%0], %6 \n"
+ "vst1.32 {d22[1]}, [%0], %6 \n"
+ "vst1.32 {d23[0]}, [%0], %6 \n"
+ "vst1.32 {d23[1]}, [%0] \n"
+
+ "add %1, #4*2 \n" // src += 4 * 2
+ "add %3, %3, %4, lsl #2 \n" // dst_a += 4 *
+ // dst_stride_a
+ "add %5, %5, %6, lsl #2 \n" // dst_b += 4 *
+ // dst_stride_b
+ "subs %7, #4 \n" // w -= 4
+ "beq 4f \n"
+
+ // some residual, check to see if it includes a 2x8 block,
+ // or less
+ "cmp %7, #2 \n"
+ "blt 3f \n"
+
+ // 2x8 block
+ "2: \n"
+ "mov %0, %1 \n"
+ "vld2.16 {d0[0], d2[0]}, [%0], %2 \n"
+ "vld2.16 {d1[0], d3[0]}, [%0], %2 \n"
+ "vld2.16 {d0[1], d2[1]}, [%0], %2 \n"
+ "vld2.16 {d1[1], d3[1]}, [%0], %2 \n"
+ "vld2.16 {d0[2], d2[2]}, [%0], %2 \n"
+ "vld2.16 {d1[2], d3[2]}, [%0], %2 \n"
+ "vld2.16 {d0[3], d2[3]}, [%0], %2 \n"
+ "vld2.16 {d1[3], d3[3]}, [%0] \n"
+
+ "vtrn.8 d0, d1 \n"
+ "vtrn.8 d2, d3 \n"
+
+ "mov %0, %3 \n"
+
+ "vst1.64 {d0}, [%0], %4 \n"
+ "vst1.64 {d2}, [%0] \n"
+
+ "mov %0, %5 \n"
+
+ "vst1.64 {d1}, [%0], %6 \n"
+ "vst1.64 {d3}, [%0] \n"
+
+ "add %1, #2*2 \n" // src += 2 * 2
+ "add %3, %3, %4, lsl #1 \n" // dst_a += 2 *
+ // dst_stride_a
+ "add %5, %5, %6, lsl #1 \n" // dst_b += 2 *
+ // dst_stride_b
+ "subs %7, #2 \n" // w -= 2
+ "beq 4f \n"
+
+ // 1x8 block
+ "3: \n"
+ "vld2.8 {d0[0], d1[0]}, [%1], %2 \n"
+ "vld2.8 {d0[1], d1[1]}, [%1], %2 \n"
+ "vld2.8 {d0[2], d1[2]}, [%1], %2 \n"
+ "vld2.8 {d0[3], d1[3]}, [%1], %2 \n"
+ "vld2.8 {d0[4], d1[4]}, [%1], %2 \n"
+ "vld2.8 {d0[5], d1[5]}, [%1], %2 \n"
+ "vld2.8 {d0[6], d1[6]}, [%1], %2 \n"
+ "vld2.8 {d0[7], d1[7]}, [%1] \n"
+
+ "vst1.64 {d0}, [%3] \n"
+ "vst1.64 {d1}, [%5] \n"
+
+ "4: \n"
+
+ : "=&r"(src_temp), // %0
+ "+r"(src), // %1
+ "+r"(src_stride), // %2
+ "+r"(dst_a), // %3
+ "+r"(dst_stride_a), // %4
+ "+r"(dst_b), // %5
+ "+r"(dst_stride_b), // %6
+ "+r"(width) // %7
+ : "r"(&kVTbl4x4TransposeDi) // %8
+ : "memory", "cc", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11");
+}
+#endif // defined(__ARM_NEON__) && !defined(__aarch64__)
+
+#ifdef __cplusplus
+} // extern "C"
+} // namespace libyuv
+#endif
diff --git a/media/libvpx/libvpx/third_party/libyuv/source/rotate_neon64.cc b/media/libvpx/libvpx/third_party/libyuv/source/rotate_neon64.cc
new file mode 100644
index 0000000000..f469baacf6
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/libyuv/source/rotate_neon64.cc
@@ -0,0 +1,426 @@
+/*
+ * Copyright 2014 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/rotate_row.h"
+#include "libyuv/row.h"
+
+#include "libyuv/basic_types.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+// This module is for GCC Neon armv8 64 bit.
+#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
+
+static const uvec8 kVTbl4x4Transpose = {0, 4, 8, 12, 1, 5, 9, 13,
+ 2, 6, 10, 14, 3, 7, 11, 15};
+
+void TransposeWx8_NEON(const uint8_t* src,
+ int src_stride,
+ uint8_t* dst,
+ int dst_stride,
+ int width) {
+ const uint8_t* src_temp;
+ asm volatile(
+ // loops are on blocks of 8. loop will stop when
+ // counter gets to or below 0. starting the counter
+ // at w-8 allow for this
+ "sub %w3, %w3, #8 \n"
+
+ // handle 8x8 blocks. this should be the majority of the plane
+ "1: \n"
+ "mov %0, %1 \n"
+
+ "ld1 {v0.8b}, [%0], %5 \n"
+ "ld1 {v1.8b}, [%0], %5 \n"
+ "ld1 {v2.8b}, [%0], %5 \n"
+ "ld1 {v3.8b}, [%0], %5 \n"
+ "ld1 {v4.8b}, [%0], %5 \n"
+ "ld1 {v5.8b}, [%0], %5 \n"
+ "ld1 {v6.8b}, [%0], %5 \n"
+ "ld1 {v7.8b}, [%0] \n"
+
+ "trn2 v16.8b, v0.8b, v1.8b \n"
+ "trn1 v17.8b, v0.8b, v1.8b \n"
+ "trn2 v18.8b, v2.8b, v3.8b \n"
+ "trn1 v19.8b, v2.8b, v3.8b \n"
+ "trn2 v20.8b, v4.8b, v5.8b \n"
+ "trn1 v21.8b, v4.8b, v5.8b \n"
+ "trn2 v22.8b, v6.8b, v7.8b \n"
+ "trn1 v23.8b, v6.8b, v7.8b \n"
+
+ "trn2 v3.4h, v17.4h, v19.4h \n"
+ "trn1 v1.4h, v17.4h, v19.4h \n"
+ "trn2 v2.4h, v16.4h, v18.4h \n"
+ "trn1 v0.4h, v16.4h, v18.4h \n"
+ "trn2 v7.4h, v21.4h, v23.4h \n"
+ "trn1 v5.4h, v21.4h, v23.4h \n"
+ "trn2 v6.4h, v20.4h, v22.4h \n"
+ "trn1 v4.4h, v20.4h, v22.4h \n"
+
+ "trn2 v21.2s, v1.2s, v5.2s \n"
+ "trn1 v17.2s, v1.2s, v5.2s \n"
+ "trn2 v20.2s, v0.2s, v4.2s \n"
+ "trn1 v16.2s, v0.2s, v4.2s \n"
+ "trn2 v23.2s, v3.2s, v7.2s \n"
+ "trn1 v19.2s, v3.2s, v7.2s \n"
+ "trn2 v22.2s, v2.2s, v6.2s \n"
+ "trn1 v18.2s, v2.2s, v6.2s \n"
+
+ "mov %0, %2 \n"
+
+ "st1 {v17.8b}, [%0], %6 \n"
+ "st1 {v16.8b}, [%0], %6 \n"
+ "st1 {v19.8b}, [%0], %6 \n"
+ "st1 {v18.8b}, [%0], %6 \n"
+ "st1 {v21.8b}, [%0], %6 \n"
+ "st1 {v20.8b}, [%0], %6 \n"
+ "st1 {v23.8b}, [%0], %6 \n"
+ "st1 {v22.8b}, [%0] \n"
+
+ "add %1, %1, #8 \n" // src += 8
+ "add %2, %2, %6, lsl #3 \n" // dst += 8 * dst_stride
+ "subs %w3, %w3, #8 \n" // w -= 8
+ "b.ge 1b \n"
+
+ // add 8 back to counter. if the result is 0 there are
+ // no residuals.
+ "adds %w3, %w3, #8 \n"
+ "b.eq 4f \n"
+
+ // some residual, so between 1 and 7 lines left to transpose
+ "cmp %w3, #2 \n"
+ "b.lt 3f \n"
+
+ "cmp %w3, #4 \n"
+ "b.lt 2f \n"
+
+ // 4x8 block
+ "mov %0, %1 \n"
+ "ld1 {v0.s}[0], [%0], %5 \n"
+ "ld1 {v0.s}[1], [%0], %5 \n"
+ "ld1 {v0.s}[2], [%0], %5 \n"
+ "ld1 {v0.s}[3], [%0], %5 \n"
+ "ld1 {v1.s}[0], [%0], %5 \n"
+ "ld1 {v1.s}[1], [%0], %5 \n"
+ "ld1 {v1.s}[2], [%0], %5 \n"
+ "ld1 {v1.s}[3], [%0] \n"
+
+ "mov %0, %2 \n"
+
+ "ld1 {v2.16b}, [%4] \n"
+
+ "tbl v3.16b, {v0.16b}, v2.16b \n"
+ "tbl v0.16b, {v1.16b}, v2.16b \n"
+
+ // TODO(frkoenig): Rework shuffle above to
+ // write out with 4 instead of 8 writes.
+ "st1 {v3.s}[0], [%0], %6 \n"
+ "st1 {v3.s}[1], [%0], %6 \n"
+ "st1 {v3.s}[2], [%0], %6 \n"
+ "st1 {v3.s}[3], [%0] \n"
+
+ "add %0, %2, #4 \n"
+ "st1 {v0.s}[0], [%0], %6 \n"
+ "st1 {v0.s}[1], [%0], %6 \n"
+ "st1 {v0.s}[2], [%0], %6 \n"
+ "st1 {v0.s}[3], [%0] \n"
+
+ "add %1, %1, #4 \n" // src += 4
+ "add %2, %2, %6, lsl #2 \n" // dst += 4 * dst_stride
+ "subs %w3, %w3, #4 \n" // w -= 4
+ "b.eq 4f \n"
+
+ // some residual, check to see if it includes a 2x8 block,
+ // or less
+ "cmp %w3, #2 \n"
+ "b.lt 3f \n"
+
+ // 2x8 block
+ "2: \n"
+ "mov %0, %1 \n"
+ "ld1 {v0.h}[0], [%0], %5 \n"
+ "ld1 {v1.h}[0], [%0], %5 \n"
+ "ld1 {v0.h}[1], [%0], %5 \n"
+ "ld1 {v1.h}[1], [%0], %5 \n"
+ "ld1 {v0.h}[2], [%0], %5 \n"
+ "ld1 {v1.h}[2], [%0], %5 \n"
+ "ld1 {v0.h}[3], [%0], %5 \n"
+ "ld1 {v1.h}[3], [%0] \n"
+
+ "trn2 v2.8b, v0.8b, v1.8b \n"
+ "trn1 v3.8b, v0.8b, v1.8b \n"
+
+ "mov %0, %2 \n"
+
+ "st1 {v3.8b}, [%0], %6 \n"
+ "st1 {v2.8b}, [%0] \n"
+
+ "add %1, %1, #2 \n" // src += 2
+ "add %2, %2, %6, lsl #1 \n" // dst += 2 * dst_stride
+ "subs %w3, %w3, #2 \n" // w -= 2
+ "b.eq 4f \n"
+
+ // 1x8 block
+ "3: \n"
+ "ld1 {v0.b}[0], [%1], %5 \n"
+ "ld1 {v0.b}[1], [%1], %5 \n"
+ "ld1 {v0.b}[2], [%1], %5 \n"
+ "ld1 {v0.b}[3], [%1], %5 \n"
+ "ld1 {v0.b}[4], [%1], %5 \n"
+ "ld1 {v0.b}[5], [%1], %5 \n"
+ "ld1 {v0.b}[6], [%1], %5 \n"
+ "ld1 {v0.b}[7], [%1] \n"
+
+ "st1 {v0.8b}, [%2] \n"
+
+ "4: \n"
+
+ : "=&r"(src_temp), // %0
+ "+r"(src), // %1
+ "+r"(dst), // %2
+ "+r"(width) // %3
+ : "r"(&kVTbl4x4Transpose), // %4
+ "r"(static_cast<ptrdiff_t>(src_stride)), // %5
+ "r"(static_cast<ptrdiff_t>(dst_stride)) // %6
+ : "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16",
+ "v17", "v18", "v19", "v20", "v21", "v22", "v23");
+}
+
+static const uint8_t kVTbl4x4TransposeDi[32] = {
+ 0, 16, 32, 48, 2, 18, 34, 50, 4, 20, 36, 52, 6, 22, 38, 54,
+ 1, 17, 33, 49, 3, 19, 35, 51, 5, 21, 37, 53, 7, 23, 39, 55};
+
+void TransposeUVWx8_NEON(const uint8_t* src,
+ int src_stride,
+ uint8_t* dst_a,
+ int dst_stride_a,
+ uint8_t* dst_b,
+ int dst_stride_b,
+ int width) {
+ const uint8_t* src_temp;
+ asm volatile(
+ // loops are on blocks of 8. loop will stop when
+ // counter gets to or below 0. starting the counter
+ // at w-8 allow for this
+ "sub %w4, %w4, #8 \n"
+
+ // handle 8x8 blocks. this should be the majority of the plane
+ "1: \n"
+ "mov %0, %1 \n"
+
+ "ld1 {v0.16b}, [%0], %5 \n"
+ "ld1 {v1.16b}, [%0], %5 \n"
+ "ld1 {v2.16b}, [%0], %5 \n"
+ "ld1 {v3.16b}, [%0], %5 \n"
+ "ld1 {v4.16b}, [%0], %5 \n"
+ "ld1 {v5.16b}, [%0], %5 \n"
+ "ld1 {v6.16b}, [%0], %5 \n"
+ "ld1 {v7.16b}, [%0] \n"
+
+ "trn1 v16.16b, v0.16b, v1.16b \n"
+ "trn2 v17.16b, v0.16b, v1.16b \n"
+ "trn1 v18.16b, v2.16b, v3.16b \n"
+ "trn2 v19.16b, v2.16b, v3.16b \n"
+ "trn1 v20.16b, v4.16b, v5.16b \n"
+ "trn2 v21.16b, v4.16b, v5.16b \n"
+ "trn1 v22.16b, v6.16b, v7.16b \n"
+ "trn2 v23.16b, v6.16b, v7.16b \n"
+
+ "trn1 v0.8h, v16.8h, v18.8h \n"
+ "trn2 v1.8h, v16.8h, v18.8h \n"
+ "trn1 v2.8h, v20.8h, v22.8h \n"
+ "trn2 v3.8h, v20.8h, v22.8h \n"
+ "trn1 v4.8h, v17.8h, v19.8h \n"
+ "trn2 v5.8h, v17.8h, v19.8h \n"
+ "trn1 v6.8h, v21.8h, v23.8h \n"
+ "trn2 v7.8h, v21.8h, v23.8h \n"
+
+ "trn1 v16.4s, v0.4s, v2.4s \n"
+ "trn2 v17.4s, v0.4s, v2.4s \n"
+ "trn1 v18.4s, v1.4s, v3.4s \n"
+ "trn2 v19.4s, v1.4s, v3.4s \n"
+ "trn1 v20.4s, v4.4s, v6.4s \n"
+ "trn2 v21.4s, v4.4s, v6.4s \n"
+ "trn1 v22.4s, v5.4s, v7.4s \n"
+ "trn2 v23.4s, v5.4s, v7.4s \n"
+
+ "mov %0, %2 \n"
+
+ "st1 {v16.d}[0], [%0], %6 \n"
+ "st1 {v18.d}[0], [%0], %6 \n"
+ "st1 {v17.d}[0], [%0], %6 \n"
+ "st1 {v19.d}[0], [%0], %6 \n"
+ "st1 {v16.d}[1], [%0], %6 \n"
+ "st1 {v18.d}[1], [%0], %6 \n"
+ "st1 {v17.d}[1], [%0], %6 \n"
+ "st1 {v19.d}[1], [%0] \n"
+
+ "mov %0, %3 \n"
+
+ "st1 {v20.d}[0], [%0], %7 \n"
+ "st1 {v22.d}[0], [%0], %7 \n"
+ "st1 {v21.d}[0], [%0], %7 \n"
+ "st1 {v23.d}[0], [%0], %7 \n"
+ "st1 {v20.d}[1], [%0], %7 \n"
+ "st1 {v22.d}[1], [%0], %7 \n"
+ "st1 {v21.d}[1], [%0], %7 \n"
+ "st1 {v23.d}[1], [%0] \n"
+
+ "add %1, %1, #16 \n" // src += 8*2
+ "add %2, %2, %6, lsl #3 \n" // dst_a += 8 *
+ // dst_stride_a
+ "add %3, %3, %7, lsl #3 \n" // dst_b += 8 *
+ // dst_stride_b
+ "subs %w4, %w4, #8 \n" // w -= 8
+ "b.ge 1b \n"
+
+ // add 8 back to counter. if the result is 0 there are
+ // no residuals.
+ "adds %w4, %w4, #8 \n"
+ "b.eq 4f \n"
+
+ // some residual, so between 1 and 7 lines left to transpose
+ "cmp %w4, #2 \n"
+ "b.lt 3f \n"
+
+ "cmp %w4, #4 \n"
+ "b.lt 2f \n"
+
+ // TODO(frkoenig): Clean this up
+ // 4x8 block
+ "mov %0, %1 \n"
+ "ld1 {v0.8b}, [%0], %5 \n"
+ "ld1 {v1.8b}, [%0], %5 \n"
+ "ld1 {v2.8b}, [%0], %5 \n"
+ "ld1 {v3.8b}, [%0], %5 \n"
+ "ld1 {v4.8b}, [%0], %5 \n"
+ "ld1 {v5.8b}, [%0], %5 \n"
+ "ld1 {v6.8b}, [%0], %5 \n"
+ "ld1 {v7.8b}, [%0] \n"
+
+ "ld1 {v30.16b}, [%8], #16 \n"
+ "ld1 {v31.16b}, [%8] \n"
+
+ "tbl v16.16b, {v0.16b, v1.16b, v2.16b, v3.16b}, v30.16b \n"
+ "tbl v17.16b, {v0.16b, v1.16b, v2.16b, v3.16b}, v31.16b \n"
+ "tbl v18.16b, {v4.16b, v5.16b, v6.16b, v7.16b}, v30.16b \n"
+ "tbl v19.16b, {v4.16b, v5.16b, v6.16b, v7.16b}, v31.16b \n"
+
+ "mov %0, %2 \n"
+
+ "st1 {v16.s}[0], [%0], %6 \n"
+ "st1 {v16.s}[1], [%0], %6 \n"
+ "st1 {v16.s}[2], [%0], %6 \n"
+ "st1 {v16.s}[3], [%0], %6 \n"
+
+ "add %0, %2, #4 \n"
+ "st1 {v18.s}[0], [%0], %6 \n"
+ "st1 {v18.s}[1], [%0], %6 \n"
+ "st1 {v18.s}[2], [%0], %6 \n"
+ "st1 {v18.s}[3], [%0] \n"
+
+ "mov %0, %3 \n"
+
+ "st1 {v17.s}[0], [%0], %7 \n"
+ "st1 {v17.s}[1], [%0], %7 \n"
+ "st1 {v17.s}[2], [%0], %7 \n"
+ "st1 {v17.s}[3], [%0], %7 \n"
+
+ "add %0, %3, #4 \n"
+ "st1 {v19.s}[0], [%0], %7 \n"
+ "st1 {v19.s}[1], [%0], %7 \n"
+ "st1 {v19.s}[2], [%0], %7 \n"
+ "st1 {v19.s}[3], [%0] \n"
+
+ "add %1, %1, #8 \n" // src += 4 * 2
+ "add %2, %2, %6, lsl #2 \n" // dst_a += 4 *
+ // dst_stride_a
+ "add %3, %3, %7, lsl #2 \n" // dst_b += 4 *
+ // dst_stride_b
+ "subs %w4, %w4, #4 \n" // w -= 4
+ "b.eq 4f \n"
+
+ // some residual, check to see if it includes a 2x8 block,
+ // or less
+ "cmp %w4, #2 \n"
+ "b.lt 3f \n"
+
+ // 2x8 block
+ "2: \n"
+ "mov %0, %1 \n"
+ "ld2 {v0.h, v1.h}[0], [%0], %5 \n"
+ "ld2 {v2.h, v3.h}[0], [%0], %5 \n"
+ "ld2 {v0.h, v1.h}[1], [%0], %5 \n"
+ "ld2 {v2.h, v3.h}[1], [%0], %5 \n"
+ "ld2 {v0.h, v1.h}[2], [%0], %5 \n"
+ "ld2 {v2.h, v3.h}[2], [%0], %5 \n"
+ "ld2 {v0.h, v1.h}[3], [%0], %5 \n"
+ "ld2 {v2.h, v3.h}[3], [%0] \n"
+
+ "trn1 v4.8b, v0.8b, v2.8b \n"
+ "trn2 v5.8b, v0.8b, v2.8b \n"
+ "trn1 v6.8b, v1.8b, v3.8b \n"
+ "trn2 v7.8b, v1.8b, v3.8b \n"
+
+ "mov %0, %2 \n"
+
+ "st1 {v4.d}[0], [%0], %6 \n"
+ "st1 {v6.d}[0], [%0] \n"
+
+ "mov %0, %3 \n"
+
+ "st1 {v5.d}[0], [%0], %7 \n"
+ "st1 {v7.d}[0], [%0] \n"
+
+ "add %1, %1, #4 \n" // src += 2 * 2
+ "add %2, %2, %6, lsl #1 \n" // dst_a += 2 *
+ // dst_stride_a
+ "add %3, %3, %7, lsl #1 \n" // dst_b += 2 *
+ // dst_stride_b
+ "subs %w4, %w4, #2 \n" // w -= 2
+ "b.eq 4f \n"
+
+ // 1x8 block
+ "3: \n"
+ "ld2 {v0.b, v1.b}[0], [%1], %5 \n"
+ "ld2 {v0.b, v1.b}[1], [%1], %5 \n"
+ "ld2 {v0.b, v1.b}[2], [%1], %5 \n"
+ "ld2 {v0.b, v1.b}[3], [%1], %5 \n"
+ "ld2 {v0.b, v1.b}[4], [%1], %5 \n"
+ "ld2 {v0.b, v1.b}[5], [%1], %5 \n"
+ "ld2 {v0.b, v1.b}[6], [%1], %5 \n"
+ "ld2 {v0.b, v1.b}[7], [%1] \n"
+
+ "st1 {v0.d}[0], [%2] \n"
+ "st1 {v1.d}[0], [%3] \n"
+
+ "4: \n"
+
+ : "=&r"(src_temp), // %0
+ "+r"(src), // %1
+ "+r"(dst_a), // %2
+ "+r"(dst_b), // %3
+ "+r"(width) // %4
+ : "r"(static_cast<ptrdiff_t>(src_stride)), // %5
+ "r"(static_cast<ptrdiff_t>(dst_stride_a)), // %6
+ "r"(static_cast<ptrdiff_t>(dst_stride_b)), // %7
+ "r"(&kVTbl4x4TransposeDi) // %8
+ : "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16",
+ "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v30", "v31");
+}
+#endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
+
+#ifdef __cplusplus
+} // extern "C"
+} // namespace libyuv
+#endif
diff --git a/media/libvpx/libvpx/third_party/libyuv/source/rotate_win.cc b/media/libvpx/libvpx/third_party/libyuv/source/rotate_win.cc
new file mode 100644
index 0000000000..e887dd525c
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/libyuv/source/rotate_win.cc
@@ -0,0 +1,252 @@
+/*
+ * Copyright 2013 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/rotate_row.h"
+#include "libyuv/row.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+// This module is for 32 bit Visual C x86 and clangcl
+#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)
+
+__declspec(naked) void TransposeWx8_SSSE3(const uint8_t* src,
+ int src_stride,
+ uint8_t* dst,
+ int dst_stride,
+ int width) {
+ __asm {
+ push edi
+ push esi
+ push ebp
+ mov eax, [esp + 12 + 4] // src
+ mov edi, [esp + 12 + 8] // src_stride
+ mov edx, [esp + 12 + 12] // dst
+ mov esi, [esp + 12 + 16] // dst_stride
+ mov ecx, [esp + 12 + 20] // width
+
+ // Read in the data from the source pointer.
+ // First round of bit swap.
+ align 4
+ convertloop:
+ movq xmm0, qword ptr [eax]
+ lea ebp, [eax + 8]
+ movq xmm1, qword ptr [eax + edi]
+ lea eax, [eax + 2 * edi]
+ punpcklbw xmm0, xmm1
+ movq xmm2, qword ptr [eax]
+ movdqa xmm1, xmm0
+ palignr xmm1, xmm1, 8
+ movq xmm3, qword ptr [eax + edi]
+ lea eax, [eax + 2 * edi]
+ punpcklbw xmm2, xmm3
+ movdqa xmm3, xmm2
+ movq xmm4, qword ptr [eax]
+ palignr xmm3, xmm3, 8
+ movq xmm5, qword ptr [eax + edi]
+ punpcklbw xmm4, xmm5
+ lea eax, [eax + 2 * edi]
+ movdqa xmm5, xmm4
+ movq xmm6, qword ptr [eax]
+ palignr xmm5, xmm5, 8
+ movq xmm7, qword ptr [eax + edi]
+ punpcklbw xmm6, xmm7
+ mov eax, ebp
+ movdqa xmm7, xmm6
+ palignr xmm7, xmm7, 8
+ // Second round of bit swap.
+ punpcklwd xmm0, xmm2
+ punpcklwd xmm1, xmm3
+ movdqa xmm2, xmm0
+ movdqa xmm3, xmm1
+ palignr xmm2, xmm2, 8
+ palignr xmm3, xmm3, 8
+ punpcklwd xmm4, xmm6
+ punpcklwd xmm5, xmm7
+ movdqa xmm6, xmm4
+ movdqa xmm7, xmm5
+ palignr xmm6, xmm6, 8
+ palignr xmm7, xmm7, 8
+ // Third round of bit swap.
+ // Write to the destination pointer.
+ punpckldq xmm0, xmm4
+ movq qword ptr [edx], xmm0
+ movdqa xmm4, xmm0
+ palignr xmm4, xmm4, 8
+ movq qword ptr [edx + esi], xmm4
+ lea edx, [edx + 2 * esi]
+ punpckldq xmm2, xmm6
+ movdqa xmm6, xmm2
+ palignr xmm6, xmm6, 8
+ movq qword ptr [edx], xmm2
+ punpckldq xmm1, xmm5
+ movq qword ptr [edx + esi], xmm6
+ lea edx, [edx + 2 * esi]
+ movdqa xmm5, xmm1
+ movq qword ptr [edx], xmm1
+ palignr xmm5, xmm5, 8
+ punpckldq xmm3, xmm7
+ movq qword ptr [edx + esi], xmm5
+ lea edx, [edx + 2 * esi]
+ movq qword ptr [edx], xmm3
+ movdqa xmm7, xmm3
+ palignr xmm7, xmm7, 8
+ sub ecx, 8
+ movq qword ptr [edx + esi], xmm7
+ lea edx, [edx + 2 * esi]
+ jg convertloop
+
+ pop ebp
+ pop esi
+ pop edi
+ ret
+ }
+}
+
+__declspec(naked) void TransposeUVWx8_SSE2(const uint8_t* src,
+ int src_stride,
+ uint8_t* dst_a,
+ int dst_stride_a,
+ uint8_t* dst_b,
+ int dst_stride_b,
+ int w) {
+ __asm {
+ push ebx
+ push esi
+ push edi
+ push ebp
+ mov eax, [esp + 16 + 4] // src
+ mov edi, [esp + 16 + 8] // src_stride
+ mov edx, [esp + 16 + 12] // dst_a
+ mov esi, [esp + 16 + 16] // dst_stride_a
+ mov ebx, [esp + 16 + 20] // dst_b
+ mov ebp, [esp + 16 + 24] // dst_stride_b
+ mov ecx, esp
+ sub esp, 4 + 16
+ and esp, ~15
+ mov [esp + 16], ecx
+ mov ecx, [ecx + 16 + 28] // w
+
+ align 4
+ // Read in the data from the source pointer.
+ // First round of bit swap.
+ convertloop:
+ movdqu xmm0, [eax]
+ movdqu xmm1, [eax + edi]
+ lea eax, [eax + 2 * edi]
+ movdqa xmm7, xmm0 // use xmm7 as temp register.
+ punpcklbw xmm0, xmm1
+ punpckhbw xmm7, xmm1
+ movdqa xmm1, xmm7
+ movdqu xmm2, [eax]
+ movdqu xmm3, [eax + edi]
+ lea eax, [eax + 2 * edi]
+ movdqa xmm7, xmm2
+ punpcklbw xmm2, xmm3
+ punpckhbw xmm7, xmm3
+ movdqa xmm3, xmm7
+ movdqu xmm4, [eax]
+ movdqu xmm5, [eax + edi]
+ lea eax, [eax + 2 * edi]
+ movdqa xmm7, xmm4
+ punpcklbw xmm4, xmm5
+ punpckhbw xmm7, xmm5
+ movdqa xmm5, xmm7
+ movdqu xmm6, [eax]
+ movdqu xmm7, [eax + edi]
+ lea eax, [eax + 2 * edi]
+ movdqu [esp], xmm5 // backup xmm5
+ neg edi
+ movdqa xmm5, xmm6 // use xmm5 as temp register.
+ punpcklbw xmm6, xmm7
+ punpckhbw xmm5, xmm7
+ movdqa xmm7, xmm5
+ lea eax, [eax + 8 * edi + 16]
+ neg edi
+ // Second round of bit swap.
+ movdqa xmm5, xmm0
+ punpcklwd xmm0, xmm2
+ punpckhwd xmm5, xmm2
+ movdqa xmm2, xmm5
+ movdqa xmm5, xmm1
+ punpcklwd xmm1, xmm3
+ punpckhwd xmm5, xmm3
+ movdqa xmm3, xmm5
+ movdqa xmm5, xmm4
+ punpcklwd xmm4, xmm6
+ punpckhwd xmm5, xmm6
+ movdqa xmm6, xmm5
+ movdqu xmm5, [esp] // restore xmm5
+ movdqu [esp], xmm6 // backup xmm6
+ movdqa xmm6, xmm5 // use xmm6 as temp register.
+ punpcklwd xmm5, xmm7
+ punpckhwd xmm6, xmm7
+ movdqa xmm7, xmm6
+
+ // Third round of bit swap.
+ // Write to the destination pointer.
+ movdqa xmm6, xmm0
+ punpckldq xmm0, xmm4
+ punpckhdq xmm6, xmm4
+ movdqa xmm4, xmm6
+ movdqu xmm6, [esp] // restore xmm6
+ movlpd qword ptr [edx], xmm0
+ movhpd qword ptr [ebx], xmm0
+ movlpd qword ptr [edx + esi], xmm4
+ lea edx, [edx + 2 * esi]
+ movhpd qword ptr [ebx + ebp], xmm4
+ lea ebx, [ebx + 2 * ebp]
+ movdqa xmm0, xmm2 // use xmm0 as the temp register.
+ punpckldq xmm2, xmm6
+ movlpd qword ptr [edx], xmm2
+ movhpd qword ptr [ebx], xmm2
+ punpckhdq xmm0, xmm6
+ movlpd qword ptr [edx + esi], xmm0
+ lea edx, [edx + 2 * esi]
+ movhpd qword ptr [ebx + ebp], xmm0
+ lea ebx, [ebx + 2 * ebp]
+ movdqa xmm0, xmm1 // use xmm0 as the temp register.
+ punpckldq xmm1, xmm5
+ movlpd qword ptr [edx], xmm1
+ movhpd qword ptr [ebx], xmm1
+ punpckhdq xmm0, xmm5
+ movlpd qword ptr [edx + esi], xmm0
+ lea edx, [edx + 2 * esi]
+ movhpd qword ptr [ebx + ebp], xmm0
+ lea ebx, [ebx + 2 * ebp]
+ movdqa xmm0, xmm3 // use xmm0 as the temp register.
+ punpckldq xmm3, xmm7
+ movlpd qword ptr [edx], xmm3
+ movhpd qword ptr [ebx], xmm3
+ punpckhdq xmm0, xmm7
+ sub ecx, 8
+ movlpd qword ptr [edx + esi], xmm0
+ lea edx, [edx + 2 * esi]
+ movhpd qword ptr [ebx + ebp], xmm0
+ lea ebx, [ebx + 2 * ebp]
+ jg convertloop
+
+ mov esp, [esp + 16]
+ pop ebp
+ pop edi
+ pop esi
+ pop ebx
+ ret
+ }
+}
+
+#endif // !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86)
+
+#ifdef __cplusplus
+} // extern "C"
+} // namespace libyuv
+#endif
diff --git a/media/libvpx/libvpx/third_party/libyuv/source/row_any.cc b/media/libvpx/libvpx/third_party/libyuv/source/row_any.cc
new file mode 100644
index 0000000000..e91560c44c
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/libyuv/source/row_any.cc
@@ -0,0 +1,1211 @@
+/*
+ * Copyright 2012 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/row.h"
+
+#include <string.h> // For memset.
+
+#include "libyuv/basic_types.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+// memset for temp is meant to clear the source buffer (not dest) so that
+// SIMD that reads full multiple of 16 bytes will not trigger msan errors.
+// memset is not needed for production, as the garbage values are processed but
+// not used, although there may be edge cases for subsampling.
+// The size of the buffer is based on the largest read, which can be inferred
+// by the source type (e.g. ARGB) and the mask (last parameter), or by examining
+// the source code for how much the source pointers are advanced.
+
+// Subsampled source needs to be increase by 1 of not even.
+#define SS(width, shift) (((width) + (1 << (shift)) - 1) >> (shift))
+
+// Any 4 planes to 1 with yuvconstants
+#define ANY41C(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \
+ void NAMEANY(const uint8_t* y_buf, const uint8_t* u_buf, \
+ const uint8_t* v_buf, const uint8_t* a_buf, uint8_t* dst_ptr, \
+ const struct YuvConstants* yuvconstants, int width) { \
+ SIMD_ALIGNED(uint8_t temp[64 * 5]); \
+ memset(temp, 0, 64 * 4); /* for msan */ \
+ int r = width & MASK; \
+ int n = width & ~MASK; \
+ if (n > 0) { \
+ ANY_SIMD(y_buf, u_buf, v_buf, a_buf, dst_ptr, yuvconstants, n); \
+ } \
+ memcpy(temp, y_buf + n, r); \
+ memcpy(temp + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
+ memcpy(temp + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
+ memcpy(temp + 192, a_buf + n, r); \
+ ANY_SIMD(temp, temp + 64, temp + 128, temp + 192, temp + 256, \
+ yuvconstants, MASK + 1); \
+ memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 256, \
+ SS(r, DUVSHIFT) * BPP); \
+ }
+
+#ifdef HAS_I422ALPHATOARGBROW_SSSE3
+ANY41C(I422AlphaToARGBRow_Any_SSSE3, I422AlphaToARGBRow_SSSE3, 1, 0, 4, 7)
+#endif
+#ifdef HAS_I422ALPHATOARGBROW_AVX2
+ANY41C(I422AlphaToARGBRow_Any_AVX2, I422AlphaToARGBRow_AVX2, 1, 0, 4, 15)
+#endif
+#ifdef HAS_I422ALPHATOARGBROW_NEON
+ANY41C(I422AlphaToARGBRow_Any_NEON, I422AlphaToARGBRow_NEON, 1, 0, 4, 7)
+#endif
+#ifdef HAS_I422ALPHATOARGBROW_MSA
+ANY41C(I422AlphaToARGBRow_Any_MSA, I422AlphaToARGBRow_MSA, 1, 0, 4, 7)
+#endif
+#undef ANY41C
+
+// Any 3 planes to 1.
+#define ANY31(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \
+ void NAMEANY(const uint8_t* y_buf, const uint8_t* u_buf, \
+ const uint8_t* v_buf, uint8_t* dst_ptr, int width) { \
+ SIMD_ALIGNED(uint8_t temp[64 * 4]); \
+ memset(temp, 0, 64 * 3); /* for YUY2 and msan */ \
+ int r = width & MASK; \
+ int n = width & ~MASK; \
+ if (n > 0) { \
+ ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, n); \
+ } \
+ memcpy(temp, y_buf + n, r); \
+ memcpy(temp + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
+ memcpy(temp + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
+ ANY_SIMD(temp, temp + 64, temp + 128, temp + 192, MASK + 1); \
+ memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 192, \
+ SS(r, DUVSHIFT) * BPP); \
+ }
+
+// Merge functions.
+#ifdef HAS_MERGERGBROW_SSSE3
+ANY31(MergeRGBRow_Any_SSSE3, MergeRGBRow_SSSE3, 0, 0, 3, 15)
+#endif
+#ifdef HAS_MERGERGBROW_NEON
+ANY31(MergeRGBRow_Any_NEON, MergeRGBRow_NEON, 0, 0, 3, 15)
+#endif
+#ifdef HAS_I422TOYUY2ROW_SSE2
+ANY31(I422ToYUY2Row_Any_SSE2, I422ToYUY2Row_SSE2, 1, 1, 4, 15)
+ANY31(I422ToUYVYRow_Any_SSE2, I422ToUYVYRow_SSE2, 1, 1, 4, 15)
+#endif
+#ifdef HAS_I422TOYUY2ROW_AVX2
+ANY31(I422ToYUY2Row_Any_AVX2, I422ToYUY2Row_AVX2, 1, 1, 4, 31)
+ANY31(I422ToUYVYRow_Any_AVX2, I422ToUYVYRow_AVX2, 1, 1, 4, 31)
+#endif
+#ifdef HAS_I422TOYUY2ROW_NEON
+ANY31(I422ToYUY2Row_Any_NEON, I422ToYUY2Row_NEON, 1, 1, 4, 15)
+#endif
+#ifdef HAS_I422TOYUY2ROW_MSA
+ANY31(I422ToYUY2Row_Any_MSA, I422ToYUY2Row_MSA, 1, 1, 4, 31)
+#endif
+#ifdef HAS_I422TOUYVYROW_NEON
+ANY31(I422ToUYVYRow_Any_NEON, I422ToUYVYRow_NEON, 1, 1, 4, 15)
+#endif
+#ifdef HAS_I422TOUYVYROW_MSA
+ANY31(I422ToUYVYRow_Any_MSA, I422ToUYVYRow_MSA, 1, 1, 4, 31)
+#endif
+#ifdef HAS_BLENDPLANEROW_AVX2
+ANY31(BlendPlaneRow_Any_AVX2, BlendPlaneRow_AVX2, 0, 0, 1, 31)
+#endif
+#ifdef HAS_BLENDPLANEROW_SSSE3
+ANY31(BlendPlaneRow_Any_SSSE3, BlendPlaneRow_SSSE3, 0, 0, 1, 7)
+#endif
+#undef ANY31
+
+// Note that odd width replication includes 444 due to implementation
+// on arm that subsamples 444 to 422 internally.
+// Any 3 planes to 1 with yuvconstants
+#define ANY31C(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \
+ void NAMEANY(const uint8_t* y_buf, const uint8_t* u_buf, \
+ const uint8_t* v_buf, uint8_t* dst_ptr, \
+ const struct YuvConstants* yuvconstants, int width) { \
+ SIMD_ALIGNED(uint8_t temp[128 * 4]); \
+ memset(temp, 0, 128 * 3); /* for YUY2 and msan */ \
+ int r = width & MASK; \
+ int n = width & ~MASK; \
+ if (n > 0) { \
+ ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, yuvconstants, n); \
+ } \
+ memcpy(temp, y_buf + n, r); \
+ memcpy(temp + 128, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
+ memcpy(temp + 256, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
+ if (width & 1) { \
+ temp[128 + SS(r, UVSHIFT)] = temp[128 + SS(r, UVSHIFT) - 1]; \
+ temp[256 + SS(r, UVSHIFT)] = temp[256 + SS(r, UVSHIFT) - 1]; \
+ } \
+ ANY_SIMD(temp, temp + 128, temp + 256, temp + 384, yuvconstants, \
+ MASK + 1); \
+ memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 384, \
+ SS(r, DUVSHIFT) * BPP); \
+ }
+
+#ifdef HAS_I422TOARGBROW_SSSE3
+ANY31C(I422ToARGBRow_Any_SSSE3, I422ToARGBRow_SSSE3, 1, 0, 4, 7)
+#endif
+#ifdef HAS_I422TOAR30ROW_SSSE3
+ANY31C(I422ToAR30Row_Any_SSSE3, I422ToAR30Row_SSSE3, 1, 0, 4, 7)
+#endif
+#ifdef HAS_I422TOAR30ROW_AVX2
+ANY31C(I422ToAR30Row_Any_AVX2, I422ToAR30Row_AVX2, 1, 0, 4, 15)
+#endif
+#ifdef HAS_I444TOARGBROW_SSSE3
+ANY31C(I444ToARGBRow_Any_SSSE3, I444ToARGBRow_SSSE3, 0, 0, 4, 7)
+ANY31C(I422ToRGBARow_Any_SSSE3, I422ToRGBARow_SSSE3, 1, 0, 4, 7)
+ANY31C(I422ToARGB4444Row_Any_SSSE3, I422ToARGB4444Row_SSSE3, 1, 0, 2, 7)
+ANY31C(I422ToARGB1555Row_Any_SSSE3, I422ToARGB1555Row_SSSE3, 1, 0, 2, 7)
+ANY31C(I422ToRGB565Row_Any_SSSE3, I422ToRGB565Row_SSSE3, 1, 0, 2, 7)
+ANY31C(I422ToRGB24Row_Any_SSSE3, I422ToRGB24Row_SSSE3, 1, 0, 3, 15)
+#endif // HAS_I444TOARGBROW_SSSE3
+#ifdef HAS_I422TORGB24ROW_AVX2
+ANY31C(I422ToRGB24Row_Any_AVX2, I422ToRGB24Row_AVX2, 1, 0, 3, 31)
+#endif
+#ifdef HAS_I422TOARGBROW_AVX2
+ANY31C(I422ToARGBRow_Any_AVX2, I422ToARGBRow_AVX2, 1, 0, 4, 15)
+#endif
+#ifdef HAS_I422TORGBAROW_AVX2
+ANY31C(I422ToRGBARow_Any_AVX2, I422ToRGBARow_AVX2, 1, 0, 4, 15)
+#endif
+#ifdef HAS_I444TOARGBROW_AVX2
+ANY31C(I444ToARGBRow_Any_AVX2, I444ToARGBRow_AVX2, 0, 0, 4, 15)
+#endif
+#ifdef HAS_I422TOARGB4444ROW_AVX2
+ANY31C(I422ToARGB4444Row_Any_AVX2, I422ToARGB4444Row_AVX2, 1, 0, 2, 15)
+#endif
+#ifdef HAS_I422TOARGB1555ROW_AVX2
+ANY31C(I422ToARGB1555Row_Any_AVX2, I422ToARGB1555Row_AVX2, 1, 0, 2, 15)
+#endif
+#ifdef HAS_I422TORGB565ROW_AVX2
+ANY31C(I422ToRGB565Row_Any_AVX2, I422ToRGB565Row_AVX2, 1, 0, 2, 15)
+#endif
+#ifdef HAS_I422TOARGBROW_NEON
+ANY31C(I444ToARGBRow_Any_NEON, I444ToARGBRow_NEON, 0, 0, 4, 7)
+ANY31C(I422ToARGBRow_Any_NEON, I422ToARGBRow_NEON, 1, 0, 4, 7)
+ANY31C(I422ToRGBARow_Any_NEON, I422ToRGBARow_NEON, 1, 0, 4, 7)
+ANY31C(I422ToRGB24Row_Any_NEON, I422ToRGB24Row_NEON, 1, 0, 3, 7)
+ANY31C(I422ToARGB4444Row_Any_NEON, I422ToARGB4444Row_NEON, 1, 0, 2, 7)
+ANY31C(I422ToARGB1555Row_Any_NEON, I422ToARGB1555Row_NEON, 1, 0, 2, 7)
+ANY31C(I422ToRGB565Row_Any_NEON, I422ToRGB565Row_NEON, 1, 0, 2, 7)
+#endif
+#ifdef HAS_I422TOARGBROW_MSA
+ANY31C(I444ToARGBRow_Any_MSA, I444ToARGBRow_MSA, 0, 0, 4, 7)
+ANY31C(I422ToARGBRow_Any_MSA, I422ToARGBRow_MSA, 1, 0, 4, 7)
+ANY31C(I422ToRGBARow_Any_MSA, I422ToRGBARow_MSA, 1, 0, 4, 7)
+ANY31C(I422ToRGB24Row_Any_MSA, I422ToRGB24Row_MSA, 1, 0, 3, 15)
+ANY31C(I422ToARGB4444Row_Any_MSA, I422ToARGB4444Row_MSA, 1, 0, 2, 7)
+ANY31C(I422ToARGB1555Row_Any_MSA, I422ToARGB1555Row_MSA, 1, 0, 2, 7)
+ANY31C(I422ToRGB565Row_Any_MSA, I422ToRGB565Row_MSA, 1, 0, 2, 7)
+#endif
+#undef ANY31C
+
+// Any 3 planes of 16 bit to 1 with yuvconstants
+// TODO(fbarchard): consider sharing this code with ANY31C
+#define ANY31CT(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, T, SBPP, BPP, MASK) \
+ void NAMEANY(const T* y_buf, const T* u_buf, const T* v_buf, \
+ uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, \
+ int width) { \
+ SIMD_ALIGNED(T temp[16 * 3]); \
+ SIMD_ALIGNED(uint8_t out[64]); \
+ memset(temp, 0, 16 * 3 * SBPP); /* for YUY2 and msan */ \
+ int r = width & MASK; \
+ int n = width & ~MASK; \
+ if (n > 0) { \
+ ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, yuvconstants, n); \
+ } \
+ memcpy(temp, y_buf + n, r * SBPP); \
+ memcpy(temp + 16, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP); \
+ memcpy(temp + 32, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP); \
+ ANY_SIMD(temp, temp + 16, temp + 32, out, yuvconstants, MASK + 1); \
+ memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, out, SS(r, DUVSHIFT) * BPP); \
+ }
+
+#ifdef HAS_I210TOAR30ROW_SSSE3
+ANY31CT(I210ToAR30Row_Any_SSSE3, I210ToAR30Row_SSSE3, 1, 0, uint16_t, 2, 4, 7)
+#endif
+#ifdef HAS_I210TOARGBROW_SSSE3
+ANY31CT(I210ToARGBRow_Any_SSSE3, I210ToARGBRow_SSSE3, 1, 0, uint16_t, 2, 4, 7)
+#endif
+#ifdef HAS_I210TOARGBROW_AVX2
+ANY31CT(I210ToARGBRow_Any_AVX2, I210ToARGBRow_AVX2, 1, 0, uint16_t, 2, 4, 15)
+#endif
+#ifdef HAS_I210TOAR30ROW_AVX2
+ANY31CT(I210ToAR30Row_Any_AVX2, I210ToAR30Row_AVX2, 1, 0, uint16_t, 2, 4, 15)
+#endif
+#undef ANY31CT
+
+// Any 2 planes to 1.
+#define ANY21(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, SBPP2, BPP, MASK) \
+ void NAMEANY(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, \
+ int width) { \
+ SIMD_ALIGNED(uint8_t temp[64 * 3]); \
+ memset(temp, 0, 64 * 2); /* for msan */ \
+ int r = width & MASK; \
+ int n = width & ~MASK; \
+ if (n > 0) { \
+ ANY_SIMD(y_buf, uv_buf, dst_ptr, n); \
+ } \
+ memcpy(temp, y_buf + n * SBPP, r * SBPP); \
+ memcpy(temp + 64, uv_buf + (n >> UVSHIFT) * SBPP2, \
+ SS(r, UVSHIFT) * SBPP2); \
+ ANY_SIMD(temp, temp + 64, temp + 128, MASK + 1); \
+ memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \
+ }
+
+// Merge functions.
+#ifdef HAS_MERGEUVROW_SSE2
+ANY21(MergeUVRow_Any_SSE2, MergeUVRow_SSE2, 0, 1, 1, 2, 15)
+#endif
+#ifdef HAS_MERGEUVROW_AVX2
+ANY21(MergeUVRow_Any_AVX2, MergeUVRow_AVX2, 0, 1, 1, 2, 31)
+#endif
+#ifdef HAS_MERGEUVROW_NEON
+ANY21(MergeUVRow_Any_NEON, MergeUVRow_NEON, 0, 1, 1, 2, 15)
+#endif
+#ifdef HAS_MERGEUVROW_MSA
+ANY21(MergeUVRow_Any_MSA, MergeUVRow_MSA, 0, 1, 1, 2, 15)
+#endif
+
+// Math functions.
+#ifdef HAS_ARGBMULTIPLYROW_SSE2
+ANY21(ARGBMultiplyRow_Any_SSE2, ARGBMultiplyRow_SSE2, 0, 4, 4, 4, 3)
+#endif
+#ifdef HAS_ARGBADDROW_SSE2
+ANY21(ARGBAddRow_Any_SSE2, ARGBAddRow_SSE2, 0, 4, 4, 4, 3)
+#endif
+#ifdef HAS_ARGBSUBTRACTROW_SSE2
+ANY21(ARGBSubtractRow_Any_SSE2, ARGBSubtractRow_SSE2, 0, 4, 4, 4, 3)
+#endif
+#ifdef HAS_ARGBMULTIPLYROW_AVX2
+ANY21(ARGBMultiplyRow_Any_AVX2, ARGBMultiplyRow_AVX2, 0, 4, 4, 4, 7)
+#endif
+#ifdef HAS_ARGBADDROW_AVX2
+ANY21(ARGBAddRow_Any_AVX2, ARGBAddRow_AVX2, 0, 4, 4, 4, 7)
+#endif
+#ifdef HAS_ARGBSUBTRACTROW_AVX2
+ANY21(ARGBSubtractRow_Any_AVX2, ARGBSubtractRow_AVX2, 0, 4, 4, 4, 7)
+#endif
+#ifdef HAS_ARGBMULTIPLYROW_NEON
+ANY21(ARGBMultiplyRow_Any_NEON, ARGBMultiplyRow_NEON, 0, 4, 4, 4, 7)
+#endif
+#ifdef HAS_ARGBADDROW_NEON
+ANY21(ARGBAddRow_Any_NEON, ARGBAddRow_NEON, 0, 4, 4, 4, 7)
+#endif
+#ifdef HAS_ARGBSUBTRACTROW_NEON
+ANY21(ARGBSubtractRow_Any_NEON, ARGBSubtractRow_NEON, 0, 4, 4, 4, 7)
+#endif
+#ifdef HAS_ARGBMULTIPLYROW_MSA
+ANY21(ARGBMultiplyRow_Any_MSA, ARGBMultiplyRow_MSA, 0, 4, 4, 4, 3)
+#endif
+#ifdef HAS_ARGBADDROW_MSA
+ANY21(ARGBAddRow_Any_MSA, ARGBAddRow_MSA, 0, 4, 4, 4, 7)
+#endif
+#ifdef HAS_ARGBSUBTRACTROW_MSA
+ANY21(ARGBSubtractRow_Any_MSA, ARGBSubtractRow_MSA, 0, 4, 4, 4, 7)
+#endif
+#ifdef HAS_SOBELROW_SSE2
+ANY21(SobelRow_Any_SSE2, SobelRow_SSE2, 0, 1, 1, 4, 15)
+#endif
+#ifdef HAS_SOBELROW_NEON
+ANY21(SobelRow_Any_NEON, SobelRow_NEON, 0, 1, 1, 4, 7)
+#endif
+#ifdef HAS_SOBELROW_MSA
+ANY21(SobelRow_Any_MSA, SobelRow_MSA, 0, 1, 1, 4, 15)
+#endif
+#ifdef HAS_SOBELTOPLANEROW_SSE2
+ANY21(SobelToPlaneRow_Any_SSE2, SobelToPlaneRow_SSE2, 0, 1, 1, 1, 15)
+#endif
+#ifdef HAS_SOBELTOPLANEROW_NEON
+ANY21(SobelToPlaneRow_Any_NEON, SobelToPlaneRow_NEON, 0, 1, 1, 1, 15)
+#endif
+#ifdef HAS_SOBELTOPLANEROW_MSA
+ANY21(SobelToPlaneRow_Any_MSA, SobelToPlaneRow_MSA, 0, 1, 1, 1, 31)
+#endif
+#ifdef HAS_SOBELXYROW_SSE2
+ANY21(SobelXYRow_Any_SSE2, SobelXYRow_SSE2, 0, 1, 1, 4, 15)
+#endif
+#ifdef HAS_SOBELXYROW_NEON
+ANY21(SobelXYRow_Any_NEON, SobelXYRow_NEON, 0, 1, 1, 4, 7)
+#endif
+#ifdef HAS_SOBELXYROW_MSA
+ANY21(SobelXYRow_Any_MSA, SobelXYRow_MSA, 0, 1, 1, 4, 15)
+#endif
+#undef ANY21
+
+// Any 2 planes to 1 with yuvconstants
+#define ANY21C(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, SBPP2, BPP, MASK) \
+ void NAMEANY(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, \
+ const struct YuvConstants* yuvconstants, int width) { \
+ SIMD_ALIGNED(uint8_t temp[128 * 3]); \
+ memset(temp, 0, 128 * 2); /* for msan */ \
+ int r = width & MASK; \
+ int n = width & ~MASK; \
+ if (n > 0) { \
+ ANY_SIMD(y_buf, uv_buf, dst_ptr, yuvconstants, n); \
+ } \
+ memcpy(temp, y_buf + n * SBPP, r * SBPP); \
+ memcpy(temp + 128, uv_buf + (n >> UVSHIFT) * SBPP2, \
+ SS(r, UVSHIFT) * SBPP2); \
+ ANY_SIMD(temp, temp + 128, temp + 256, yuvconstants, MASK + 1); \
+ memcpy(dst_ptr + n * BPP, temp + 256, r * BPP); \
+ }
+
+// Biplanar to RGB.
+#ifdef HAS_NV12TOARGBROW_SSSE3
+ANY21C(NV12ToARGBRow_Any_SSSE3, NV12ToARGBRow_SSSE3, 1, 1, 2, 4, 7)
+#endif
+#ifdef HAS_NV12TOARGBROW_AVX2
+ANY21C(NV12ToARGBRow_Any_AVX2, NV12ToARGBRow_AVX2, 1, 1, 2, 4, 15)
+#endif
+#ifdef HAS_NV12TOARGBROW_NEON
+ANY21C(NV12ToARGBRow_Any_NEON, NV12ToARGBRow_NEON, 1, 1, 2, 4, 7)
+#endif
+#ifdef HAS_NV12TOARGBROW_MSA
+ANY21C(NV12ToARGBRow_Any_MSA, NV12ToARGBRow_MSA, 1, 1, 2, 4, 7)
+#endif
+#ifdef HAS_NV21TOARGBROW_SSSE3
+ANY21C(NV21ToARGBRow_Any_SSSE3, NV21ToARGBRow_SSSE3, 1, 1, 2, 4, 7)
+#endif
+#ifdef HAS_NV21TOARGBROW_AVX2
+ANY21C(NV21ToARGBRow_Any_AVX2, NV21ToARGBRow_AVX2, 1, 1, 2, 4, 15)
+#endif
+#ifdef HAS_NV21TOARGBROW_NEON
+ANY21C(NV21ToARGBRow_Any_NEON, NV21ToARGBRow_NEON, 1, 1, 2, 4, 7)
+#endif
+#ifdef HAS_NV21TOARGBROW_MSA
+ANY21C(NV21ToARGBRow_Any_MSA, NV21ToARGBRow_MSA, 1, 1, 2, 4, 7)
+#endif
+#ifdef HAS_NV12TORGB24ROW_NEON
+ANY21C(NV12ToRGB24Row_Any_NEON, NV12ToRGB24Row_NEON, 1, 1, 2, 3, 7)
+#endif
+#ifdef HAS_NV21TORGB24ROW_NEON
+ANY21C(NV21ToRGB24Row_Any_NEON, NV21ToRGB24Row_NEON, 1, 1, 2, 3, 7)
+#endif
+#ifdef HAS_NV12TORGB24ROW_SSSE3
+ANY21C(NV12ToRGB24Row_Any_SSSE3, NV12ToRGB24Row_SSSE3, 1, 1, 2, 3, 15)
+#endif
+#ifdef HAS_NV21TORGB24ROW_SSSE3
+ANY21C(NV21ToRGB24Row_Any_SSSE3, NV21ToRGB24Row_SSSE3, 1, 1, 2, 3, 15)
+#endif
+#ifdef HAS_NV12TORGB24ROW_AVX2
+ANY21C(NV12ToRGB24Row_Any_AVX2, NV12ToRGB24Row_AVX2, 1, 1, 2, 3, 31)
+#endif
+#ifdef HAS_NV21TORGB24ROW_AVX2
+ANY21C(NV21ToRGB24Row_Any_AVX2, NV21ToRGB24Row_AVX2, 1, 1, 2, 3, 31)
+#endif
+#ifdef HAS_NV12TORGB565ROW_SSSE3
+ANY21C(NV12ToRGB565Row_Any_SSSE3, NV12ToRGB565Row_SSSE3, 1, 1, 2, 2, 7)
+#endif
+#ifdef HAS_NV12TORGB565ROW_AVX2
+ANY21C(NV12ToRGB565Row_Any_AVX2, NV12ToRGB565Row_AVX2, 1, 1, 2, 2, 15)
+#endif
+#ifdef HAS_NV12TORGB565ROW_NEON
+ANY21C(NV12ToRGB565Row_Any_NEON, NV12ToRGB565Row_NEON, 1, 1, 2, 2, 7)
+#endif
+#ifdef HAS_NV12TORGB565ROW_MSA
+ANY21C(NV12ToRGB565Row_Any_MSA, NV12ToRGB565Row_MSA, 1, 1, 2, 2, 7)
+#endif
+#undef ANY21C
+
+// Any 1 to 1.
+#define ANY11(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \
+ void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width) { \
+ SIMD_ALIGNED(uint8_t temp[128 * 2]); \
+ memset(temp, 0, 128); /* for YUY2 and msan */ \
+ int r = width & MASK; \
+ int n = width & ~MASK; \
+ if (n > 0) { \
+ ANY_SIMD(src_ptr, dst_ptr, n); \
+ } \
+ memcpy(temp, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \
+ ANY_SIMD(temp, temp + 128, MASK + 1); \
+ memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \
+ }
+
+#ifdef HAS_COPYROW_AVX
+ANY11(CopyRow_Any_AVX, CopyRow_AVX, 0, 1, 1, 63)
+#endif
+#ifdef HAS_COPYROW_SSE2
+ANY11(CopyRow_Any_SSE2, CopyRow_SSE2, 0, 1, 1, 31)
+#endif
+#ifdef HAS_COPYROW_NEON
+ANY11(CopyRow_Any_NEON, CopyRow_NEON, 0, 1, 1, 31)
+#endif
+#if defined(HAS_ARGBTORGB24ROW_SSSE3)
+ANY11(ARGBToRGB24Row_Any_SSSE3, ARGBToRGB24Row_SSSE3, 0, 4, 3, 15)
+ANY11(ARGBToRAWRow_Any_SSSE3, ARGBToRAWRow_SSSE3, 0, 4, 3, 15)
+ANY11(ARGBToRGB565Row_Any_SSE2, ARGBToRGB565Row_SSE2, 0, 4, 2, 3)
+ANY11(ARGBToARGB1555Row_Any_SSE2, ARGBToARGB1555Row_SSE2, 0, 4, 2, 3)
+ANY11(ARGBToARGB4444Row_Any_SSE2, ARGBToARGB4444Row_SSE2, 0, 4, 2, 3)
+#endif
+#if defined(HAS_ARGBTORGB24ROW_AVX2)
+ANY11(ARGBToRGB24Row_Any_AVX2, ARGBToRGB24Row_AVX2, 0, 4, 3, 31)
+#endif
+#if defined(HAS_ARGBTORGB24ROW_AVX512VBMI)
+ANY11(ARGBToRGB24Row_Any_AVX512VBMI, ARGBToRGB24Row_AVX512VBMI, 0, 4, 3, 31)
+#endif
+#if defined(HAS_ARGBTORAWROW_AVX2)
+ANY11(ARGBToRAWRow_Any_AVX2, ARGBToRAWRow_AVX2, 0, 4, 3, 31)
+#endif
+#if defined(HAS_ARGBTORGB565ROW_AVX2)
+ANY11(ARGBToRGB565Row_Any_AVX2, ARGBToRGB565Row_AVX2, 0, 4, 2, 7)
+#endif
+#if defined(HAS_ARGBTOARGB4444ROW_AVX2)
+ANY11(ARGBToARGB1555Row_Any_AVX2, ARGBToARGB1555Row_AVX2, 0, 4, 2, 7)
+ANY11(ARGBToARGB4444Row_Any_AVX2, ARGBToARGB4444Row_AVX2, 0, 4, 2, 7)
+#endif
+#if defined(HAS_ABGRTOAR30ROW_SSSE3)
+ANY11(ABGRToAR30Row_Any_SSSE3, ABGRToAR30Row_SSSE3, 0, 4, 4, 3)
+#endif
+#if defined(HAS_ARGBTOAR30ROW_SSSE3)
+ANY11(ARGBToAR30Row_Any_SSSE3, ARGBToAR30Row_SSSE3, 0, 4, 4, 3)
+#endif
+#if defined(HAS_ABGRTOAR30ROW_AVX2)
+ANY11(ABGRToAR30Row_Any_AVX2, ABGRToAR30Row_AVX2, 0, 4, 4, 7)
+#endif
+#if defined(HAS_ARGBTOAR30ROW_AVX2)
+ANY11(ARGBToAR30Row_Any_AVX2, ARGBToAR30Row_AVX2, 0, 4, 4, 7)
+#endif
+#if defined(HAS_J400TOARGBROW_SSE2)
+ANY11(J400ToARGBRow_Any_SSE2, J400ToARGBRow_SSE2, 0, 1, 4, 7)
+#endif
+#if defined(HAS_J400TOARGBROW_AVX2)
+ANY11(J400ToARGBRow_Any_AVX2, J400ToARGBRow_AVX2, 0, 1, 4, 15)
+#endif
+#if defined(HAS_I400TOARGBROW_SSE2)
+ANY11(I400ToARGBRow_Any_SSE2, I400ToARGBRow_SSE2, 0, 1, 4, 7)
+#endif
+#if defined(HAS_I400TOARGBROW_AVX2)
+ANY11(I400ToARGBRow_Any_AVX2, I400ToARGBRow_AVX2, 0, 1, 4, 15)
+#endif
+#if defined(HAS_RGB24TOARGBROW_SSSE3)
+ANY11(RGB24ToARGBRow_Any_SSSE3, RGB24ToARGBRow_SSSE3, 0, 3, 4, 15)
+ANY11(RAWToARGBRow_Any_SSSE3, RAWToARGBRow_SSSE3, 0, 3, 4, 15)
+ANY11(RGB565ToARGBRow_Any_SSE2, RGB565ToARGBRow_SSE2, 0, 2, 4, 7)
+ANY11(ARGB1555ToARGBRow_Any_SSE2, ARGB1555ToARGBRow_SSE2, 0, 2, 4, 7)
+ANY11(ARGB4444ToARGBRow_Any_SSE2, ARGB4444ToARGBRow_SSE2, 0, 2, 4, 7)
+#endif
+#if defined(HAS_RAWTORGB24ROW_SSSE3)
+ANY11(RAWToRGB24Row_Any_SSSE3, RAWToRGB24Row_SSSE3, 0, 3, 3, 7)
+#endif
+#if defined(HAS_RGB565TOARGBROW_AVX2)
+ANY11(RGB565ToARGBRow_Any_AVX2, RGB565ToARGBRow_AVX2, 0, 2, 4, 15)
+#endif
+#if defined(HAS_ARGB1555TOARGBROW_AVX2)
+ANY11(ARGB1555ToARGBRow_Any_AVX2, ARGB1555ToARGBRow_AVX2, 0, 2, 4, 15)
+#endif
+#if defined(HAS_ARGB4444TOARGBROW_AVX2)
+ANY11(ARGB4444ToARGBRow_Any_AVX2, ARGB4444ToARGBRow_AVX2, 0, 2, 4, 15)
+#endif
+#if defined(HAS_ARGBTORGB24ROW_NEON)
+ANY11(ARGBToRGB24Row_Any_NEON, ARGBToRGB24Row_NEON, 0, 4, 3, 7)
+ANY11(ARGBToRAWRow_Any_NEON, ARGBToRAWRow_NEON, 0, 4, 3, 7)
+ANY11(ARGBToRGB565Row_Any_NEON, ARGBToRGB565Row_NEON, 0, 4, 2, 7)
+ANY11(ARGBToARGB1555Row_Any_NEON, ARGBToARGB1555Row_NEON, 0, 4, 2, 7)
+ANY11(ARGBToARGB4444Row_Any_NEON, ARGBToARGB4444Row_NEON, 0, 4, 2, 7)
+ANY11(J400ToARGBRow_Any_NEON, J400ToARGBRow_NEON, 0, 1, 4, 7)
+ANY11(I400ToARGBRow_Any_NEON, I400ToARGBRow_NEON, 0, 1, 4, 7)
+#endif
+#if defined(HAS_ARGBTORGB24ROW_MSA)
+ANY11(ARGBToRGB24Row_Any_MSA, ARGBToRGB24Row_MSA, 0, 4, 3, 15)
+ANY11(ARGBToRAWRow_Any_MSA, ARGBToRAWRow_MSA, 0, 4, 3, 15)
+ANY11(ARGBToRGB565Row_Any_MSA, ARGBToRGB565Row_MSA, 0, 4, 2, 7)
+ANY11(ARGBToARGB1555Row_Any_MSA, ARGBToARGB1555Row_MSA, 0, 4, 2, 7)
+ANY11(ARGBToARGB4444Row_Any_MSA, ARGBToARGB4444Row_MSA, 0, 4, 2, 7)
+ANY11(J400ToARGBRow_Any_MSA, J400ToARGBRow_MSA, 0, 1, 4, 15)
+ANY11(I400ToARGBRow_Any_MSA, I400ToARGBRow_MSA, 0, 1, 4, 15)
+#endif
+#if defined(HAS_RAWTORGB24ROW_NEON)
+ANY11(RAWToRGB24Row_Any_NEON, RAWToRGB24Row_NEON, 0, 3, 3, 7)
+#endif
+#if defined(HAS_RAWTORGB24ROW_MSA)
+ANY11(RAWToRGB24Row_Any_MSA, RAWToRGB24Row_MSA, 0, 3, 3, 15)
+#endif
+#ifdef HAS_ARGBTOYROW_AVX2
+ANY11(ARGBToYRow_Any_AVX2, ARGBToYRow_AVX2, 0, 4, 1, 31)
+#endif
+#ifdef HAS_ARGBTOYJROW_AVX2
+ANY11(ARGBToYJRow_Any_AVX2, ARGBToYJRow_AVX2, 0, 4, 1, 31)
+#endif
+#ifdef HAS_UYVYTOYROW_AVX2
+ANY11(UYVYToYRow_Any_AVX2, UYVYToYRow_AVX2, 0, 2, 1, 31)
+#endif
+#ifdef HAS_YUY2TOYROW_AVX2
+ANY11(YUY2ToYRow_Any_AVX2, YUY2ToYRow_AVX2, 1, 4, 1, 31)
+#endif
+#ifdef HAS_ARGBTOYROW_SSSE3
+ANY11(ARGBToYRow_Any_SSSE3, ARGBToYRow_SSSE3, 0, 4, 1, 15)
+#endif
+#ifdef HAS_BGRATOYROW_SSSE3
+ANY11(BGRAToYRow_Any_SSSE3, BGRAToYRow_SSSE3, 0, 4, 1, 15)
+ANY11(ABGRToYRow_Any_SSSE3, ABGRToYRow_SSSE3, 0, 4, 1, 15)
+ANY11(RGBAToYRow_Any_SSSE3, RGBAToYRow_SSSE3, 0, 4, 1, 15)
+ANY11(YUY2ToYRow_Any_SSE2, YUY2ToYRow_SSE2, 1, 4, 1, 15)
+ANY11(UYVYToYRow_Any_SSE2, UYVYToYRow_SSE2, 1, 4, 1, 15)
+#endif
+#ifdef HAS_ARGBTOYJROW_SSSE3
+ANY11(ARGBToYJRow_Any_SSSE3, ARGBToYJRow_SSSE3, 0, 4, 1, 15)
+#endif
+#ifdef HAS_ARGBTOYROW_NEON
+ANY11(ARGBToYRow_Any_NEON, ARGBToYRow_NEON, 0, 4, 1, 7)
+#endif
+#ifdef HAS_ARGBTOYROW_MSA
+ANY11(ARGBToYRow_Any_MSA, ARGBToYRow_MSA, 0, 4, 1, 15)
+#endif
+#ifdef HAS_ARGBTOYJROW_NEON
+ANY11(ARGBToYJRow_Any_NEON, ARGBToYJRow_NEON, 0, 4, 1, 7)
+#endif
+#ifdef HAS_ARGBTOYJROW_MSA
+ANY11(ARGBToYJRow_Any_MSA, ARGBToYJRow_MSA, 0, 4, 1, 15)
+#endif
+#ifdef HAS_BGRATOYROW_NEON
+ANY11(BGRAToYRow_Any_NEON, BGRAToYRow_NEON, 0, 4, 1, 7)
+#endif
+#ifdef HAS_BGRATOYROW_MSA
+ANY11(BGRAToYRow_Any_MSA, BGRAToYRow_MSA, 0, 4, 1, 15)
+#endif
+#ifdef HAS_ABGRTOYROW_NEON
+ANY11(ABGRToYRow_Any_NEON, ABGRToYRow_NEON, 0, 4, 1, 7)
+#endif
+#ifdef HAS_ABGRTOYROW_MSA
+ANY11(ABGRToYRow_Any_MSA, ABGRToYRow_MSA, 0, 4, 1, 7)
+#endif
+#ifdef HAS_RGBATOYROW_NEON
+ANY11(RGBAToYRow_Any_NEON, RGBAToYRow_NEON, 0, 4, 1, 7)
+#endif
+#ifdef HAS_RGBATOYROW_MSA
+ANY11(RGBAToYRow_Any_MSA, RGBAToYRow_MSA, 0, 4, 1, 15)
+#endif
+#ifdef HAS_RGB24TOYROW_NEON
+ANY11(RGB24ToYRow_Any_NEON, RGB24ToYRow_NEON, 0, 3, 1, 7)
+#endif
+#ifdef HAS_RGB24TOYROW_MSA
+ANY11(RGB24ToYRow_Any_MSA, RGB24ToYRow_MSA, 0, 3, 1, 15)
+#endif
+#ifdef HAS_RAWTOYROW_NEON
+ANY11(RAWToYRow_Any_NEON, RAWToYRow_NEON, 0, 3, 1, 7)
+#endif
+#ifdef HAS_RAWTOYROW_MSA
+ANY11(RAWToYRow_Any_MSA, RAWToYRow_MSA, 0, 3, 1, 15)
+#endif
+#ifdef HAS_RGB565TOYROW_NEON
+ANY11(RGB565ToYRow_Any_NEON, RGB565ToYRow_NEON, 0, 2, 1, 7)
+#endif
+#ifdef HAS_RGB565TOYROW_MSA
+ANY11(RGB565ToYRow_Any_MSA, RGB565ToYRow_MSA, 0, 2, 1, 15)
+#endif
+#ifdef HAS_ARGB1555TOYROW_NEON
+ANY11(ARGB1555ToYRow_Any_NEON, ARGB1555ToYRow_NEON, 0, 2, 1, 7)
+#endif
+#ifdef HAS_ARGB1555TOYROW_MSA
+ANY11(ARGB1555ToYRow_Any_MSA, ARGB1555ToYRow_MSA, 0, 2, 1, 15)
+#endif
+#ifdef HAS_ARGB4444TOYROW_NEON
+ANY11(ARGB4444ToYRow_Any_NEON, ARGB4444ToYRow_NEON, 0, 2, 1, 7)
+#endif
+#ifdef HAS_YUY2TOYROW_NEON
+ANY11(YUY2ToYRow_Any_NEON, YUY2ToYRow_NEON, 1, 4, 1, 15)
+#endif
+#ifdef HAS_UYVYTOYROW_NEON
+ANY11(UYVYToYRow_Any_NEON, UYVYToYRow_NEON, 1, 4, 1, 15)
+#endif
+#ifdef HAS_YUY2TOYROW_MSA
+ANY11(YUY2ToYRow_Any_MSA, YUY2ToYRow_MSA, 1, 4, 1, 31)
+#endif
+#ifdef HAS_UYVYTOYROW_MSA
+ANY11(UYVYToYRow_Any_MSA, UYVYToYRow_MSA, 1, 4, 1, 31)
+#endif
+#ifdef HAS_RGB24TOARGBROW_NEON
+ANY11(RGB24ToARGBRow_Any_NEON, RGB24ToARGBRow_NEON, 0, 3, 4, 7)
+#endif
+#ifdef HAS_RGB24TOARGBROW_MSA
+ANY11(RGB24ToARGBRow_Any_MSA, RGB24ToARGBRow_MSA, 0, 3, 4, 15)
+#endif
+#ifdef HAS_RAWTOARGBROW_NEON
+ANY11(RAWToARGBRow_Any_NEON, RAWToARGBRow_NEON, 0, 3, 4, 7)
+#endif
+#ifdef HAS_RAWTOARGBROW_MSA
+ANY11(RAWToARGBRow_Any_MSA, RAWToARGBRow_MSA, 0, 3, 4, 15)
+#endif
+#ifdef HAS_RGB565TOARGBROW_NEON
+ANY11(RGB565ToARGBRow_Any_NEON, RGB565ToARGBRow_NEON, 0, 2, 4, 7)
+#endif
+#ifdef HAS_RGB565TOARGBROW_MSA
+ANY11(RGB565ToARGBRow_Any_MSA, RGB565ToARGBRow_MSA, 0, 2, 4, 15)
+#endif
+#ifdef HAS_ARGB1555TOARGBROW_NEON
+ANY11(ARGB1555ToARGBRow_Any_NEON, ARGB1555ToARGBRow_NEON, 0, 2, 4, 7)
+#endif
+#ifdef HAS_ARGB1555TOARGBROW_MSA
+ANY11(ARGB1555ToARGBRow_Any_MSA, ARGB1555ToARGBRow_MSA, 0, 2, 4, 15)
+#endif
+#ifdef HAS_ARGB4444TOARGBROW_NEON
+ANY11(ARGB4444ToARGBRow_Any_NEON, ARGB4444ToARGBRow_NEON, 0, 2, 4, 7)
+#endif
+#ifdef HAS_ARGB4444TOARGBROW_MSA
+ANY11(ARGB4444ToARGBRow_Any_MSA, ARGB4444ToARGBRow_MSA, 0, 2, 4, 15)
+#endif
+#ifdef HAS_ARGBATTENUATEROW_SSSE3
+ANY11(ARGBAttenuateRow_Any_SSSE3, ARGBAttenuateRow_SSSE3, 0, 4, 4, 3)
+#endif
+#ifdef HAS_ARGBUNATTENUATEROW_SSE2
+ANY11(ARGBUnattenuateRow_Any_SSE2, ARGBUnattenuateRow_SSE2, 0, 4, 4, 3)
+#endif
+#ifdef HAS_ARGBATTENUATEROW_AVX2
+ANY11(ARGBAttenuateRow_Any_AVX2, ARGBAttenuateRow_AVX2, 0, 4, 4, 7)
+#endif
+#ifdef HAS_ARGBUNATTENUATEROW_AVX2
+ANY11(ARGBUnattenuateRow_Any_AVX2, ARGBUnattenuateRow_AVX2, 0, 4, 4, 7)
+#endif
+#ifdef HAS_ARGBATTENUATEROW_NEON
+ANY11(ARGBAttenuateRow_Any_NEON, ARGBAttenuateRow_NEON, 0, 4, 4, 7)
+#endif
+#ifdef HAS_ARGBATTENUATEROW_MSA
+ANY11(ARGBAttenuateRow_Any_MSA, ARGBAttenuateRow_MSA, 0, 4, 4, 7)
+#endif
+#ifdef HAS_ARGBEXTRACTALPHAROW_SSE2
+ANY11(ARGBExtractAlphaRow_Any_SSE2, ARGBExtractAlphaRow_SSE2, 0, 4, 1, 7)
+#endif
+#ifdef HAS_ARGBEXTRACTALPHAROW_AVX2
+ANY11(ARGBExtractAlphaRow_Any_AVX2, ARGBExtractAlphaRow_AVX2, 0, 4, 1, 31)
+#endif
+#ifdef HAS_ARGBEXTRACTALPHAROW_NEON
+ANY11(ARGBExtractAlphaRow_Any_NEON, ARGBExtractAlphaRow_NEON, 0, 4, 1, 15)
+#endif
+#ifdef HAS_ARGBEXTRACTALPHAROW_MSA
+ANY11(ARGBExtractAlphaRow_Any_MSA, ARGBExtractAlphaRow_MSA, 0, 4, 1, 15)
+#endif
+#undef ANY11
+
+// Any 1 to 1 blended. Destination is read, modify, write.
+#define ANY11B(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \
+ void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width) { \
+ SIMD_ALIGNED(uint8_t temp[64 * 2]); \
+ memset(temp, 0, 64 * 2); /* for msan */ \
+ int r = width & MASK; \
+ int n = width & ~MASK; \
+ if (n > 0) { \
+ ANY_SIMD(src_ptr, dst_ptr, n); \
+ } \
+ memcpy(temp, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \
+ memcpy(temp + 64, dst_ptr + n * BPP, r * BPP); \
+ ANY_SIMD(temp, temp + 64, MASK + 1); \
+ memcpy(dst_ptr + n * BPP, temp + 64, r * BPP); \
+ }
+
+#ifdef HAS_ARGBCOPYALPHAROW_AVX2
+ANY11B(ARGBCopyAlphaRow_Any_AVX2, ARGBCopyAlphaRow_AVX2, 0, 4, 4, 15)
+#endif
+#ifdef HAS_ARGBCOPYALPHAROW_SSE2
+ANY11B(ARGBCopyAlphaRow_Any_SSE2, ARGBCopyAlphaRow_SSE2, 0, 4, 4, 7)
+#endif
+#ifdef HAS_ARGBCOPYYTOALPHAROW_AVX2
+ANY11B(ARGBCopyYToAlphaRow_Any_AVX2, ARGBCopyYToAlphaRow_AVX2, 0, 1, 4, 15)
+#endif
+#ifdef HAS_ARGBCOPYYTOALPHAROW_SSE2
+ANY11B(ARGBCopyYToAlphaRow_Any_SSE2, ARGBCopyYToAlphaRow_SSE2, 0, 1, 4, 7)
+#endif
+#undef ANY11B
+
+// Any 1 to 1 with parameter.
+#define ANY11P(NAMEANY, ANY_SIMD, T, SBPP, BPP, MASK) \
+ void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, T param, int width) { \
+ SIMD_ALIGNED(uint8_t temp[64 * 2]); \
+ memset(temp, 0, 64); /* for msan */ \
+ int r = width & MASK; \
+ int n = width & ~MASK; \
+ if (n > 0) { \
+ ANY_SIMD(src_ptr, dst_ptr, param, n); \
+ } \
+ memcpy(temp, src_ptr + n * SBPP, r * SBPP); \
+ ANY_SIMD(temp, temp + 64, param, MASK + 1); \
+ memcpy(dst_ptr + n * BPP, temp + 64, r * BPP); \
+ }
+
+#if defined(HAS_ARGBTORGB565DITHERROW_SSE2)
+ANY11P(ARGBToRGB565DitherRow_Any_SSE2,
+ ARGBToRGB565DitherRow_SSE2,
+ const uint32_t,
+ 4,
+ 2,
+ 3)
+#endif
+#if defined(HAS_ARGBTORGB565DITHERROW_AVX2)
+ANY11P(ARGBToRGB565DitherRow_Any_AVX2,
+ ARGBToRGB565DitherRow_AVX2,
+ const uint32_t,
+ 4,
+ 2,
+ 7)
+#endif
+#if defined(HAS_ARGBTORGB565DITHERROW_NEON)
+ANY11P(ARGBToRGB565DitherRow_Any_NEON,
+ ARGBToRGB565DitherRow_NEON,
+ const uint32_t,
+ 4,
+ 2,
+ 7)
+#endif
+#if defined(HAS_ARGBTORGB565DITHERROW_MSA)
+ANY11P(ARGBToRGB565DitherRow_Any_MSA,
+ ARGBToRGB565DitherRow_MSA,
+ const uint32_t,
+ 4,
+ 2,
+ 7)
+#endif
+#ifdef HAS_ARGBSHUFFLEROW_SSSE3
+ANY11P(ARGBShuffleRow_Any_SSSE3, ARGBShuffleRow_SSSE3, const uint8_t*, 4, 4, 7)
+#endif
+#ifdef HAS_ARGBSHUFFLEROW_AVX2
+ANY11P(ARGBShuffleRow_Any_AVX2, ARGBShuffleRow_AVX2, const uint8_t*, 4, 4, 15)
+#endif
+#ifdef HAS_ARGBSHUFFLEROW_NEON
+ANY11P(ARGBShuffleRow_Any_NEON, ARGBShuffleRow_NEON, const uint8_t*, 4, 4, 3)
+#endif
+#ifdef HAS_ARGBSHUFFLEROW_MSA
+ANY11P(ARGBShuffleRow_Any_MSA, ARGBShuffleRow_MSA, const uint8_t*, 4, 4, 7)
+#endif
+#undef ANY11P
+
+// Any 1 to 1 with parameter and shorts. BPP measures in shorts.
+#define ANY11C(NAMEANY, ANY_SIMD, SBPP, BPP, STYPE, DTYPE, MASK) \
+ void NAMEANY(const STYPE* src_ptr, DTYPE* dst_ptr, int scale, int width) { \
+ SIMD_ALIGNED(STYPE temp[32]); \
+ SIMD_ALIGNED(DTYPE out[32]); \
+ memset(temp, 0, 32 * SBPP); /* for msan */ \
+ int r = width & MASK; \
+ int n = width & ~MASK; \
+ if (n > 0) { \
+ ANY_SIMD(src_ptr, dst_ptr, scale, n); \
+ } \
+ memcpy(temp, src_ptr + n, r * SBPP); \
+ ANY_SIMD(temp, out, scale, MASK + 1); \
+ memcpy(dst_ptr + n, out, r * BPP); \
+ }
+
+#ifdef HAS_CONVERT16TO8ROW_SSSE3
+ANY11C(Convert16To8Row_Any_SSSE3,
+ Convert16To8Row_SSSE3,
+ 2,
+ 1,
+ uint16_t,
+ uint8_t,
+ 15)
+#endif
+#ifdef HAS_CONVERT16TO8ROW_AVX2
+ANY11C(Convert16To8Row_Any_AVX2,
+ Convert16To8Row_AVX2,
+ 2,
+ 1,
+ uint16_t,
+ uint8_t,
+ 31)
+#endif
+#ifdef HAS_CONVERT8TO16ROW_SSE2
+ANY11C(Convert8To16Row_Any_SSE2,
+ Convert8To16Row_SSE2,
+ 1,
+ 2,
+ uint8_t,
+ uint16_t,
+ 15)
+#endif
+#ifdef HAS_CONVERT8TO16ROW_AVX2
+ANY11C(Convert8To16Row_Any_AVX2,
+ Convert8To16Row_AVX2,
+ 1,
+ 2,
+ uint8_t,
+ uint16_t,
+ 31)
+#endif
+#undef ANY11C
+
+// Any 1 to 1 with parameter and shorts to byte. BPP measures in shorts.
+#define ANY11P16(NAMEANY, ANY_SIMD, ST, T, SBPP, BPP, MASK) \
+ void NAMEANY(const ST* src_ptr, T* dst_ptr, float param, int width) { \
+ SIMD_ALIGNED(ST temp[32]); \
+ SIMD_ALIGNED(T out[32]); \
+ memset(temp, 0, SBPP * 32); /* for msan */ \
+ int r = width & MASK; \
+ int n = width & ~MASK; \
+ if (n > 0) { \
+ ANY_SIMD(src_ptr, dst_ptr, param, n); \
+ } \
+ memcpy(temp, src_ptr + n, r * SBPP); \
+ ANY_SIMD(temp, out, param, MASK + 1); \
+ memcpy(dst_ptr + n, out, r * BPP); \
+ }
+
+#ifdef HAS_HALFFLOATROW_SSE2
+ANY11P16(HalfFloatRow_Any_SSE2, HalfFloatRow_SSE2, uint16_t, uint16_t, 2, 2, 7)
+#endif
+#ifdef HAS_HALFFLOATROW_AVX2
+ANY11P16(HalfFloatRow_Any_AVX2, HalfFloatRow_AVX2, uint16_t, uint16_t, 2, 2, 15)
+#endif
+#ifdef HAS_HALFFLOATROW_F16C
+ANY11P16(HalfFloatRow_Any_F16C, HalfFloatRow_F16C, uint16_t, uint16_t, 2, 2, 15)
+ANY11P16(HalfFloat1Row_Any_F16C,
+ HalfFloat1Row_F16C,
+ uint16_t,
+ uint16_t,
+ 2,
+ 2,
+ 15)
+#endif
+#ifdef HAS_HALFFLOATROW_NEON
+ANY11P16(HalfFloatRow_Any_NEON, HalfFloatRow_NEON, uint16_t, uint16_t, 2, 2, 7)
+ANY11P16(HalfFloat1Row_Any_NEON,
+ HalfFloat1Row_NEON,
+ uint16_t,
+ uint16_t,
+ 2,
+ 2,
+ 7)
+#endif
+#ifdef HAS_HALFFLOATROW_MSA
+ANY11P16(HalfFloatRow_Any_MSA, HalfFloatRow_MSA, uint16_t, uint16_t, 2, 2, 31)
+#endif
+#ifdef HAS_BYTETOFLOATROW_NEON
+ANY11P16(ByteToFloatRow_Any_NEON, ByteToFloatRow_NEON, uint8_t, float, 1, 3, 7)
+#endif
+#undef ANY11P16
+
+// Any 1 to 1 with yuvconstants
+#define ANY11C(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \
+ void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, \
+ const struct YuvConstants* yuvconstants, int width) { \
+ SIMD_ALIGNED(uint8_t temp[128 * 2]); \
+ memset(temp, 0, 128); /* for YUY2 and msan */ \
+ int r = width & MASK; \
+ int n = width & ~MASK; \
+ if (n > 0) { \
+ ANY_SIMD(src_ptr, dst_ptr, yuvconstants, n); \
+ } \
+ memcpy(temp, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \
+ ANY_SIMD(temp, temp + 128, yuvconstants, MASK + 1); \
+ memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \
+ }
+#if defined(HAS_YUY2TOARGBROW_SSSE3)
+ANY11C(YUY2ToARGBRow_Any_SSSE3, YUY2ToARGBRow_SSSE3, 1, 4, 4, 15)
+ANY11C(UYVYToARGBRow_Any_SSSE3, UYVYToARGBRow_SSSE3, 1, 4, 4, 15)
+#endif
+#if defined(HAS_YUY2TOARGBROW_AVX2)
+ANY11C(YUY2ToARGBRow_Any_AVX2, YUY2ToARGBRow_AVX2, 1, 4, 4, 31)
+ANY11C(UYVYToARGBRow_Any_AVX2, UYVYToARGBRow_AVX2, 1, 4, 4, 31)
+#endif
+#if defined(HAS_YUY2TOARGBROW_NEON)
+ANY11C(YUY2ToARGBRow_Any_NEON, YUY2ToARGBRow_NEON, 1, 4, 4, 7)
+ANY11C(UYVYToARGBRow_Any_NEON, UYVYToARGBRow_NEON, 1, 4, 4, 7)
+#endif
+#if defined(HAS_YUY2TOARGBROW_MSA)
+ANY11C(YUY2ToARGBRow_Any_MSA, YUY2ToARGBRow_MSA, 1, 4, 4, 7)
+ANY11C(UYVYToARGBRow_Any_MSA, UYVYToARGBRow_MSA, 1, 4, 4, 7)
+#endif
+#undef ANY11C
+
+// Any 1 to 1 interpolate. Takes 2 rows of source via stride.
+#define ANY11T(NAMEANY, ANY_SIMD, SBPP, BPP, MASK) \
+ void NAMEANY(uint8_t* dst_ptr, const uint8_t* src_ptr, \
+ ptrdiff_t src_stride_ptr, int width, int source_y_fraction) { \
+ SIMD_ALIGNED(uint8_t temp[64 * 3]); \
+ memset(temp, 0, 64 * 2); /* for msan */ \
+ int r = width & MASK; \
+ int n = width & ~MASK; \
+ if (n > 0) { \
+ ANY_SIMD(dst_ptr, src_ptr, src_stride_ptr, n, source_y_fraction); \
+ } \
+ memcpy(temp, src_ptr + n * SBPP, r * SBPP); \
+ memcpy(temp + 64, src_ptr + src_stride_ptr + n * SBPP, r * SBPP); \
+ ANY_SIMD(temp + 128, temp, 64, MASK + 1, source_y_fraction); \
+ memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \
+ }
+
+#ifdef HAS_INTERPOLATEROW_AVX2
+ANY11T(InterpolateRow_Any_AVX2, InterpolateRow_AVX2, 1, 1, 31)
+#endif
+#ifdef HAS_INTERPOLATEROW_SSSE3
+ANY11T(InterpolateRow_Any_SSSE3, InterpolateRow_SSSE3, 1, 1, 15)
+#endif
+#ifdef HAS_INTERPOLATEROW_NEON
+ANY11T(InterpolateRow_Any_NEON, InterpolateRow_NEON, 1, 1, 15)
+#endif
+#ifdef HAS_INTERPOLATEROW_MSA
+ANY11T(InterpolateRow_Any_MSA, InterpolateRow_MSA, 1, 1, 31)
+#endif
+#undef ANY11T
+
+// Any 1 to 1 mirror.
+#define ANY11M(NAMEANY, ANY_SIMD, BPP, MASK) \
+ void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width) { \
+ SIMD_ALIGNED(uint8_t temp[64 * 2]); \
+ memset(temp, 0, 64); /* for msan */ \
+ int r = width & MASK; \
+ int n = width & ~MASK; \
+ if (n > 0) { \
+ ANY_SIMD(src_ptr + r * BPP, dst_ptr, n); \
+ } \
+ memcpy(temp, src_ptr, r* BPP); \
+ ANY_SIMD(temp, temp + 64, MASK + 1); \
+ memcpy(dst_ptr + n * BPP, temp + 64 + (MASK + 1 - r) * BPP, r * BPP); \
+ }
+
+#ifdef HAS_MIRRORROW_AVX2
+ANY11M(MirrorRow_Any_AVX2, MirrorRow_AVX2, 1, 31)
+#endif
+#ifdef HAS_MIRRORROW_SSSE3
+ANY11M(MirrorRow_Any_SSSE3, MirrorRow_SSSE3, 1, 15)
+#endif
+#ifdef HAS_MIRRORROW_NEON
+ANY11M(MirrorRow_Any_NEON, MirrorRow_NEON, 1, 15)
+#endif
+#ifdef HAS_MIRRORROW_MSA
+ANY11M(MirrorRow_Any_MSA, MirrorRow_MSA, 1, 63)
+#endif
+#ifdef HAS_ARGBMIRRORROW_AVX2
+ANY11M(ARGBMirrorRow_Any_AVX2, ARGBMirrorRow_AVX2, 4, 7)
+#endif
+#ifdef HAS_ARGBMIRRORROW_SSE2
+ANY11M(ARGBMirrorRow_Any_SSE2, ARGBMirrorRow_SSE2, 4, 3)
+#endif
+#ifdef HAS_ARGBMIRRORROW_NEON
+ANY11M(ARGBMirrorRow_Any_NEON, ARGBMirrorRow_NEON, 4, 3)
+#endif
+#ifdef HAS_ARGBMIRRORROW_MSA
+ANY11M(ARGBMirrorRow_Any_MSA, ARGBMirrorRow_MSA, 4, 15)
+#endif
+#undef ANY11M
+
+// Any 1 plane. (memset)
+#define ANY1(NAMEANY, ANY_SIMD, T, BPP, MASK) \
+ void NAMEANY(uint8_t* dst_ptr, T v32, int width) { \
+ SIMD_ALIGNED(uint8_t temp[64]); \
+ int r = width & MASK; \
+ int n = width & ~MASK; \
+ if (n > 0) { \
+ ANY_SIMD(dst_ptr, v32, n); \
+ } \
+ ANY_SIMD(temp, v32, MASK + 1); \
+ memcpy(dst_ptr + n * BPP, temp, r * BPP); \
+ }
+
+#ifdef HAS_SETROW_X86
+ANY1(SetRow_Any_X86, SetRow_X86, uint8_t, 1, 3)
+#endif
+#ifdef HAS_SETROW_NEON
+ANY1(SetRow_Any_NEON, SetRow_NEON, uint8_t, 1, 15)
+#endif
+#ifdef HAS_ARGBSETROW_NEON
+ANY1(ARGBSetRow_Any_NEON, ARGBSetRow_NEON, uint32_t, 4, 3)
+#endif
+#ifdef HAS_ARGBSETROW_MSA
+ANY1(ARGBSetRow_Any_MSA, ARGBSetRow_MSA, uint32_t, 4, 3)
+#endif
+#undef ANY1
+
+// Any 1 to 2. Outputs UV planes.
+#define ANY12(NAMEANY, ANY_SIMD, UVSHIFT, BPP, DUVSHIFT, MASK) \
+ void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_u, uint8_t* dst_v, \
+ int width) { \
+ SIMD_ALIGNED(uint8_t temp[128 * 3]); \
+ memset(temp, 0, 128); /* for msan */ \
+ int r = width & MASK; \
+ int n = width & ~MASK; \
+ if (n > 0) { \
+ ANY_SIMD(src_ptr, dst_u, dst_v, n); \
+ } \
+ memcpy(temp, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \
+ ANY_SIMD(temp, temp + 128, temp + 256, MASK + 1); \
+ memcpy(dst_u + (n >> DUVSHIFT), temp + 128, SS(r, DUVSHIFT)); \
+ memcpy(dst_v + (n >> DUVSHIFT), temp + 256, SS(r, DUVSHIFT)); \
+ }
+
+#ifdef HAS_SPLITUVROW_SSE2
+ANY12(SplitUVRow_Any_SSE2, SplitUVRow_SSE2, 0, 2, 0, 15)
+#endif
+#ifdef HAS_SPLITUVROW_AVX2
+ANY12(SplitUVRow_Any_AVX2, SplitUVRow_AVX2, 0, 2, 0, 31)
+#endif
+#ifdef HAS_SPLITUVROW_NEON
+ANY12(SplitUVRow_Any_NEON, SplitUVRow_NEON, 0, 2, 0, 15)
+#endif
+#ifdef HAS_SPLITUVROW_MSA
+ANY12(SplitUVRow_Any_MSA, SplitUVRow_MSA, 0, 2, 0, 31)
+#endif
+#ifdef HAS_ARGBTOUV444ROW_SSSE3
+ANY12(ARGBToUV444Row_Any_SSSE3, ARGBToUV444Row_SSSE3, 0, 4, 0, 15)
+#endif
+#ifdef HAS_YUY2TOUV422ROW_AVX2
+ANY12(YUY2ToUV422Row_Any_AVX2, YUY2ToUV422Row_AVX2, 1, 4, 1, 31)
+ANY12(UYVYToUV422Row_Any_AVX2, UYVYToUV422Row_AVX2, 1, 4, 1, 31)
+#endif
+#ifdef HAS_YUY2TOUV422ROW_SSE2
+ANY12(YUY2ToUV422Row_Any_SSE2, YUY2ToUV422Row_SSE2, 1, 4, 1, 15)
+ANY12(UYVYToUV422Row_Any_SSE2, UYVYToUV422Row_SSE2, 1, 4, 1, 15)
+#endif
+#ifdef HAS_YUY2TOUV422ROW_NEON
+ANY12(ARGBToUV444Row_Any_NEON, ARGBToUV444Row_NEON, 0, 4, 0, 7)
+ANY12(YUY2ToUV422Row_Any_NEON, YUY2ToUV422Row_NEON, 1, 4, 1, 15)
+ANY12(UYVYToUV422Row_Any_NEON, UYVYToUV422Row_NEON, 1, 4, 1, 15)
+#endif
+#ifdef HAS_YUY2TOUV422ROW_MSA
+ANY12(ARGBToUV444Row_Any_MSA, ARGBToUV444Row_MSA, 0, 4, 0, 15)
+ANY12(YUY2ToUV422Row_Any_MSA, YUY2ToUV422Row_MSA, 1, 4, 1, 31)
+ANY12(UYVYToUV422Row_Any_MSA, UYVYToUV422Row_MSA, 1, 4, 1, 31)
+#endif
+#undef ANY12
+
+// Any 1 to 3. Outputs RGB planes.
+#define ANY13(NAMEANY, ANY_SIMD, BPP, MASK) \
+ void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_r, uint8_t* dst_g, \
+ uint8_t* dst_b, int width) { \
+ SIMD_ALIGNED(uint8_t temp[16 * 6]); \
+ memset(temp, 0, 16 * 3); /* for msan */ \
+ int r = width & MASK; \
+ int n = width & ~MASK; \
+ if (n > 0) { \
+ ANY_SIMD(src_ptr, dst_r, dst_g, dst_b, n); \
+ } \
+ memcpy(temp, src_ptr + n * BPP, r * BPP); \
+ ANY_SIMD(temp, temp + 16 * 3, temp + 16 * 4, temp + 16 * 5, MASK + 1); \
+ memcpy(dst_r + n, temp + 16 * 3, r); \
+ memcpy(dst_g + n, temp + 16 * 4, r); \
+ memcpy(dst_b + n, temp + 16 * 5, r); \
+ }
+
+#ifdef HAS_SPLITRGBROW_SSSE3
+ANY13(SplitRGBRow_Any_SSSE3, SplitRGBRow_SSSE3, 3, 15)
+#endif
+#ifdef HAS_SPLITRGBROW_NEON
+ANY13(SplitRGBRow_Any_NEON, SplitRGBRow_NEON, 3, 15)
+#endif
+
+// Any 1 to 2 with source stride (2 rows of source). Outputs UV planes.
+// 128 byte row allows for 32 avx ARGB pixels.
+#define ANY12S(NAMEANY, ANY_SIMD, UVSHIFT, BPP, MASK) \
+ void NAMEANY(const uint8_t* src_ptr, int src_stride_ptr, uint8_t* dst_u, \
+ uint8_t* dst_v, int width) { \
+ SIMD_ALIGNED(uint8_t temp[128 * 4]); \
+ memset(temp, 0, 128 * 2); /* for msan */ \
+ int r = width & MASK; \
+ int n = width & ~MASK; \
+ if (n > 0) { \
+ ANY_SIMD(src_ptr, src_stride_ptr, dst_u, dst_v, n); \
+ } \
+ memcpy(temp, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \
+ memcpy(temp + 128, src_ptr + src_stride_ptr + (n >> UVSHIFT) * BPP, \
+ SS(r, UVSHIFT) * BPP); \
+ if ((width & 1) && UVSHIFT == 0) { /* repeat last pixel for subsample */ \
+ memcpy(temp + SS(r, UVSHIFT) * BPP, temp + SS(r, UVSHIFT) * BPP - BPP, \
+ BPP); \
+ memcpy(temp + 128 + SS(r, UVSHIFT) * BPP, \
+ temp + 128 + SS(r, UVSHIFT) * BPP - BPP, BPP); \
+ } \
+ ANY_SIMD(temp, 128, temp + 256, temp + 384, MASK + 1); \
+ memcpy(dst_u + (n >> 1), temp + 256, SS(r, 1)); \
+ memcpy(dst_v + (n >> 1), temp + 384, SS(r, 1)); \
+ }
+
+#ifdef HAS_ARGBTOUVROW_AVX2
+ANY12S(ARGBToUVRow_Any_AVX2, ARGBToUVRow_AVX2, 0, 4, 31)
+#endif
+#ifdef HAS_ARGBTOUVJROW_AVX2
+ANY12S(ARGBToUVJRow_Any_AVX2, ARGBToUVJRow_AVX2, 0, 4, 31)
+#endif
+#ifdef HAS_ARGBTOUVROW_SSSE3
+ANY12S(ARGBToUVRow_Any_SSSE3, ARGBToUVRow_SSSE3, 0, 4, 15)
+ANY12S(ARGBToUVJRow_Any_SSSE3, ARGBToUVJRow_SSSE3, 0, 4, 15)
+ANY12S(BGRAToUVRow_Any_SSSE3, BGRAToUVRow_SSSE3, 0, 4, 15)
+ANY12S(ABGRToUVRow_Any_SSSE3, ABGRToUVRow_SSSE3, 0, 4, 15)
+ANY12S(RGBAToUVRow_Any_SSSE3, RGBAToUVRow_SSSE3, 0, 4, 15)
+#endif
+#ifdef HAS_YUY2TOUVROW_AVX2
+ANY12S(YUY2ToUVRow_Any_AVX2, YUY2ToUVRow_AVX2, 1, 4, 31)
+ANY12S(UYVYToUVRow_Any_AVX2, UYVYToUVRow_AVX2, 1, 4, 31)
+#endif
+#ifdef HAS_YUY2TOUVROW_SSE2
+ANY12S(YUY2ToUVRow_Any_SSE2, YUY2ToUVRow_SSE2, 1, 4, 15)
+ANY12S(UYVYToUVRow_Any_SSE2, UYVYToUVRow_SSE2, 1, 4, 15)
+#endif
+#ifdef HAS_ARGBTOUVROW_NEON
+ANY12S(ARGBToUVRow_Any_NEON, ARGBToUVRow_NEON, 0, 4, 15)
+#endif
+#ifdef HAS_ARGBTOUVROW_MSA
+ANY12S(ARGBToUVRow_Any_MSA, ARGBToUVRow_MSA, 0, 4, 31)
+#endif
+#ifdef HAS_ARGBTOUVJROW_NEON
+ANY12S(ARGBToUVJRow_Any_NEON, ARGBToUVJRow_NEON, 0, 4, 15)
+#endif
+#ifdef HAS_ARGBTOUVJROW_MSA
+ANY12S(ARGBToUVJRow_Any_MSA, ARGBToUVJRow_MSA, 0, 4, 31)
+#endif
+#ifdef HAS_BGRATOUVROW_NEON
+ANY12S(BGRAToUVRow_Any_NEON, BGRAToUVRow_NEON, 0, 4, 15)
+#endif
+#ifdef HAS_BGRATOUVROW_MSA
+ANY12S(BGRAToUVRow_Any_MSA, BGRAToUVRow_MSA, 0, 4, 31)
+#endif
+#ifdef HAS_ABGRTOUVROW_NEON
+ANY12S(ABGRToUVRow_Any_NEON, ABGRToUVRow_NEON, 0, 4, 15)
+#endif
+#ifdef HAS_ABGRTOUVROW_MSA
+ANY12S(ABGRToUVRow_Any_MSA, ABGRToUVRow_MSA, 0, 4, 31)
+#endif
+#ifdef HAS_RGBATOUVROW_NEON
+ANY12S(RGBAToUVRow_Any_NEON, RGBAToUVRow_NEON, 0, 4, 15)
+#endif
+#ifdef HAS_RGBATOUVROW_MSA
+ANY12S(RGBAToUVRow_Any_MSA, RGBAToUVRow_MSA, 0, 4, 31)
+#endif
+#ifdef HAS_RGB24TOUVROW_NEON
+ANY12S(RGB24ToUVRow_Any_NEON, RGB24ToUVRow_NEON, 0, 3, 15)
+#endif
+#ifdef HAS_RGB24TOUVROW_MSA
+ANY12S(RGB24ToUVRow_Any_MSA, RGB24ToUVRow_MSA, 0, 3, 15)
+#endif
+#ifdef HAS_RAWTOUVROW_NEON
+ANY12S(RAWToUVRow_Any_NEON, RAWToUVRow_NEON, 0, 3, 15)
+#endif
+#ifdef HAS_RAWTOUVROW_MSA
+ANY12S(RAWToUVRow_Any_MSA, RAWToUVRow_MSA, 0, 3, 15)
+#endif
+#ifdef HAS_RGB565TOUVROW_NEON
+ANY12S(RGB565ToUVRow_Any_NEON, RGB565ToUVRow_NEON, 0, 2, 15)
+#endif
+#ifdef HAS_RGB565TOUVROW_MSA
+ANY12S(RGB565ToUVRow_Any_MSA, RGB565ToUVRow_MSA, 0, 2, 15)
+#endif
+#ifdef HAS_ARGB1555TOUVROW_NEON
+ANY12S(ARGB1555ToUVRow_Any_NEON, ARGB1555ToUVRow_NEON, 0, 2, 15)
+#endif
+#ifdef HAS_ARGB1555TOUVROW_MSA
+ANY12S(ARGB1555ToUVRow_Any_MSA, ARGB1555ToUVRow_MSA, 0, 2, 15)
+#endif
+#ifdef HAS_ARGB4444TOUVROW_NEON
+ANY12S(ARGB4444ToUVRow_Any_NEON, ARGB4444ToUVRow_NEON, 0, 2, 15)
+#endif
+#ifdef HAS_YUY2TOUVROW_NEON
+ANY12S(YUY2ToUVRow_Any_NEON, YUY2ToUVRow_NEON, 1, 4, 15)
+#endif
+#ifdef HAS_UYVYTOUVROW_NEON
+ANY12S(UYVYToUVRow_Any_NEON, UYVYToUVRow_NEON, 1, 4, 15)
+#endif
+#ifdef HAS_YUY2TOUVROW_MSA
+ANY12S(YUY2ToUVRow_Any_MSA, YUY2ToUVRow_MSA, 1, 4, 31)
+#endif
+#ifdef HAS_UYVYTOUVROW_MSA
+ANY12S(UYVYToUVRow_Any_MSA, UYVYToUVRow_MSA, 1, 4, 31)
+#endif
+#undef ANY12S
+
+#ifdef __cplusplus
+} // extern "C"
+} // namespace libyuv
+#endif
diff --git a/media/libvpx/libvpx/third_party/libyuv/source/row_common.cc b/media/libvpx/libvpx/third_party/libyuv/source/row_common.cc
new file mode 100644
index 0000000000..2bbc5adbf1
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/libyuv/source/row_common.cc
@@ -0,0 +1,3237 @@
+/*
+ * Copyright 2011 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/row.h"
+
+#include <stdio.h>
+#include <string.h> // For memcpy and memset.
+
+#include "libyuv/basic_types.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+// llvm x86 is poor at ternary operator, so use branchless min/max.
+
+#define USE_BRANCHLESS 1
+#if USE_BRANCHLESS
+static __inline int32_t clamp0(int32_t v) {
+ return ((-(v) >> 31) & (v));
+}
+
+static __inline int32_t clamp255(int32_t v) {
+ return (((255 - (v)) >> 31) | (v)) & 255;
+}
+
+static __inline int32_t clamp1023(int32_t v) {
+ return (((1023 - (v)) >> 31) | (v)) & 1023;
+}
+
+static __inline uint32_t Abs(int32_t v) {
+ int m = v >> 31;
+ return (v + m) ^ m;
+}
+#else // USE_BRANCHLESS
+static __inline int32_t clamp0(int32_t v) {
+ return (v < 0) ? 0 : v;
+}
+
+static __inline int32_t clamp255(int32_t v) {
+ return (v > 255) ? 255 : v;
+}
+
+static __inline int32_t clamp1023(int32_t v) {
+ return (v > 1023) ? 1023 : v;
+}
+
+static __inline uint32_t Abs(int32_t v) {
+ return (v < 0) ? -v : v;
+}
+#endif // USE_BRANCHLESS
+static __inline uint32_t Clamp(int32_t val) {
+ int v = clamp0(val);
+ return (uint32_t)(clamp255(v));
+}
+
+static __inline uint32_t Clamp10(int32_t val) {
+ int v = clamp0(val);
+ return (uint32_t)(clamp1023(v));
+}
+
+// Little Endian
+#if defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || \
+ defined(_M_IX86) || defined(__arm__) || defined(_M_ARM) || \
+ (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
+#define WRITEWORD(p, v) *(uint32_t*)(p) = v
+#else
+static inline void WRITEWORD(uint8_t* p, uint32_t v) {
+ p[0] = (uint8_t)(v & 255);
+ p[1] = (uint8_t)((v >> 8) & 255);
+ p[2] = (uint8_t)((v >> 16) & 255);
+ p[3] = (uint8_t)((v >> 24) & 255);
+}
+#endif
+
+void RGB24ToARGBRow_C(const uint8_t* src_rgb24, uint8_t* dst_argb, int width) {
+ int x;
+ for (x = 0; x < width; ++x) {
+ uint8_t b = src_rgb24[0];
+ uint8_t g = src_rgb24[1];
+ uint8_t r = src_rgb24[2];
+ dst_argb[0] = b;
+ dst_argb[1] = g;
+ dst_argb[2] = r;
+ dst_argb[3] = 255u;
+ dst_argb += 4;
+ src_rgb24 += 3;
+ }
+}
+
+void RAWToARGBRow_C(const uint8_t* src_raw, uint8_t* dst_argb, int width) {
+ int x;
+ for (x = 0; x < width; ++x) {
+ uint8_t r = src_raw[0];
+ uint8_t g = src_raw[1];
+ uint8_t b = src_raw[2];
+ dst_argb[0] = b;
+ dst_argb[1] = g;
+ dst_argb[2] = r;
+ dst_argb[3] = 255u;
+ dst_argb += 4;
+ src_raw += 3;
+ }
+}
+
+void RAWToRGB24Row_C(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) {
+ int x;
+ for (x = 0; x < width; ++x) {
+ uint8_t r = src_raw[0];
+ uint8_t g = src_raw[1];
+ uint8_t b = src_raw[2];
+ dst_rgb24[0] = b;
+ dst_rgb24[1] = g;
+ dst_rgb24[2] = r;
+ dst_rgb24 += 3;
+ src_raw += 3;
+ }
+}
+
+void RGB565ToARGBRow_C(const uint8_t* src_rgb565,
+ uint8_t* dst_argb,
+ int width) {
+ int x;
+ for (x = 0; x < width; ++x) {
+ uint8_t b = src_rgb565[0] & 0x1f;
+ uint8_t g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
+ uint8_t r = src_rgb565[1] >> 3;
+ dst_argb[0] = (b << 3) | (b >> 2);
+ dst_argb[1] = (g << 2) | (g >> 4);
+ dst_argb[2] = (r << 3) | (r >> 2);
+ dst_argb[3] = 255u;
+ dst_argb += 4;
+ src_rgb565 += 2;
+ }
+}
+
+void ARGB1555ToARGBRow_C(const uint8_t* src_argb1555,
+ uint8_t* dst_argb,
+ int width) {
+ int x;
+ for (x = 0; x < width; ++x) {
+ uint8_t b = src_argb1555[0] & 0x1f;
+ uint8_t g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
+ uint8_t r = (src_argb1555[1] & 0x7c) >> 2;
+ uint8_t a = src_argb1555[1] >> 7;
+ dst_argb[0] = (b << 3) | (b >> 2);
+ dst_argb[1] = (g << 3) | (g >> 2);
+ dst_argb[2] = (r << 3) | (r >> 2);
+ dst_argb[3] = -a;
+ dst_argb += 4;
+ src_argb1555 += 2;
+ }
+}
+
+void ARGB4444ToARGBRow_C(const uint8_t* src_argb4444,
+ uint8_t* dst_argb,
+ int width) {
+ int x;
+ for (x = 0; x < width; ++x) {
+ uint8_t b = src_argb4444[0] & 0x0f;
+ uint8_t g = src_argb4444[0] >> 4;
+ uint8_t r = src_argb4444[1] & 0x0f;
+ uint8_t a = src_argb4444[1] >> 4;
+ dst_argb[0] = (b << 4) | b;
+ dst_argb[1] = (g << 4) | g;
+ dst_argb[2] = (r << 4) | r;
+ dst_argb[3] = (a << 4) | a;
+ dst_argb += 4;
+ src_argb4444 += 2;
+ }
+}
+
+void AR30ToARGBRow_C(const uint8_t* src_ar30, uint8_t* dst_argb, int width) {
+ int x;
+ for (x = 0; x < width; ++x) {
+ uint32_t ar30 = *(const uint32_t*)src_ar30;
+ uint32_t b = (ar30 >> 2) & 0xff;
+ uint32_t g = (ar30 >> 12) & 0xff;
+ uint32_t r = (ar30 >> 22) & 0xff;
+ uint32_t a = (ar30 >> 30) * 0x55; // Replicate 2 bits to 8 bits.
+ *(uint32_t*)(dst_argb) = b | (g << 8) | (r << 16) | (a << 24);
+ dst_argb += 4;
+ src_ar30 += 4;
+ }
+}
+
+void AR30ToABGRRow_C(const uint8_t* src_ar30, uint8_t* dst_abgr, int width) {
+ int x;
+ for (x = 0; x < width; ++x) {
+ uint32_t ar30 = *(const uint32_t*)src_ar30;
+ uint32_t b = (ar30 >> 2) & 0xff;
+ uint32_t g = (ar30 >> 12) & 0xff;
+ uint32_t r = (ar30 >> 22) & 0xff;
+ uint32_t a = (ar30 >> 30) * 0x55; // Replicate 2 bits to 8 bits.
+ *(uint32_t*)(dst_abgr) = r | (g << 8) | (b << 16) | (a << 24);
+ dst_abgr += 4;
+ src_ar30 += 4;
+ }
+}
+
+void AR30ToAB30Row_C(const uint8_t* src_ar30, uint8_t* dst_ab30, int width) {
+ int x;
+ for (x = 0; x < width; ++x) {
+ uint32_t ar30 = *(const uint32_t*)src_ar30;
+ uint32_t b = ar30 & 0x3ff;
+ uint32_t ga = ar30 & 0xc00ffc00;
+ uint32_t r = (ar30 >> 20) & 0x3ff;
+ *(uint32_t*)(dst_ab30) = r | ga | (b << 20);
+ dst_ab30 += 4;
+ src_ar30 += 4;
+ }
+}
+
+void ARGBToRGB24Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
+ int x;
+ for (x = 0; x < width; ++x) {
+ uint8_t b = src_argb[0];
+ uint8_t g = src_argb[1];
+ uint8_t r = src_argb[2];
+ dst_rgb[0] = b;
+ dst_rgb[1] = g;
+ dst_rgb[2] = r;
+ dst_rgb += 3;
+ src_argb += 4;
+ }
+}
+
+void ARGBToRAWRow_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
+ int x;
+ for (x = 0; x < width; ++x) {
+ uint8_t b = src_argb[0];
+ uint8_t g = src_argb[1];
+ uint8_t r = src_argb[2];
+ dst_rgb[0] = r;
+ dst_rgb[1] = g;
+ dst_rgb[2] = b;
+ dst_rgb += 3;
+ src_argb += 4;
+ }
+}
+
+void ARGBToRGB565Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
+ int x;
+ for (x = 0; x < width - 1; x += 2) {
+ uint8_t b0 = src_argb[0] >> 3;
+ uint8_t g0 = src_argb[1] >> 2;
+ uint8_t r0 = src_argb[2] >> 3;
+ uint8_t b1 = src_argb[4] >> 3;
+ uint8_t g1 = src_argb[5] >> 2;
+ uint8_t r1 = src_argb[6] >> 3;
+ WRITEWORD(dst_rgb, b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) |
+ (r1 << 27));
+ dst_rgb += 4;
+ src_argb += 8;
+ }
+ if (width & 1) {
+ uint8_t b0 = src_argb[0] >> 3;
+ uint8_t g0 = src_argb[1] >> 2;
+ uint8_t r0 = src_argb[2] >> 3;
+ *(uint16_t*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 11);
+ }
+}
+
+// dither4 is a row of 4 values from 4x4 dither matrix.
+// The 4x4 matrix contains values to increase RGB. When converting to
+// fewer bits (565) this provides an ordered dither.
+// The order in the 4x4 matrix in first byte is upper left.
+// The 4 values are passed as an int, then referenced as an array, so
+// endian will not affect order of the original matrix. But the dither4
+// will containing the first pixel in the lower byte for little endian
+// or the upper byte for big endian.
+void ARGBToRGB565DitherRow_C(const uint8_t* src_argb,
+ uint8_t* dst_rgb,
+ const uint32_t dither4,
+ int width) {
+ int x;
+ for (x = 0; x < width - 1; x += 2) {
+ int dither0 = ((const unsigned char*)(&dither4))[x & 3];
+ int dither1 = ((const unsigned char*)(&dither4))[(x + 1) & 3];
+ uint8_t b0 = clamp255(src_argb[0] + dither0) >> 3;
+ uint8_t g0 = clamp255(src_argb[1] + dither0) >> 2;
+ uint8_t r0 = clamp255(src_argb[2] + dither0) >> 3;
+ uint8_t b1 = clamp255(src_argb[4] + dither1) >> 3;
+ uint8_t g1 = clamp255(src_argb[5] + dither1) >> 2;
+ uint8_t r1 = clamp255(src_argb[6] + dither1) >> 3;
+ WRITEWORD(dst_rgb, b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) |
+ (r1 << 27));
+ dst_rgb += 4;
+ src_argb += 8;
+ }
+ if (width & 1) {
+ int dither0 = ((const unsigned char*)(&dither4))[(width - 1) & 3];
+ uint8_t b0 = clamp255(src_argb[0] + dither0) >> 3;
+ uint8_t g0 = clamp255(src_argb[1] + dither0) >> 2;
+ uint8_t r0 = clamp255(src_argb[2] + dither0) >> 3;
+ *(uint16_t*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 11);
+ }
+}
+
+void ARGBToARGB1555Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
+ int x;
+ for (x = 0; x < width - 1; x += 2) {
+ uint8_t b0 = src_argb[0] >> 3;
+ uint8_t g0 = src_argb[1] >> 3;
+ uint8_t r0 = src_argb[2] >> 3;
+ uint8_t a0 = src_argb[3] >> 7;
+ uint8_t b1 = src_argb[4] >> 3;
+ uint8_t g1 = src_argb[5] >> 3;
+ uint8_t r1 = src_argb[6] >> 3;
+ uint8_t a1 = src_argb[7] >> 7;
+ *(uint32_t*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 10) | (a0 << 15) |
+ (b1 << 16) | (g1 << 21) | (r1 << 26) | (a1 << 31);
+ dst_rgb += 4;
+ src_argb += 8;
+ }
+ if (width & 1) {
+ uint8_t b0 = src_argb[0] >> 3;
+ uint8_t g0 = src_argb[1] >> 3;
+ uint8_t r0 = src_argb[2] >> 3;
+ uint8_t a0 = src_argb[3] >> 7;
+ *(uint16_t*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 10) | (a0 << 15);
+ }
+}
+
+void ARGBToARGB4444Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
+ int x;
+ for (x = 0; x < width - 1; x += 2) {
+ uint8_t b0 = src_argb[0] >> 4;
+ uint8_t g0 = src_argb[1] >> 4;
+ uint8_t r0 = src_argb[2] >> 4;
+ uint8_t a0 = src_argb[3] >> 4;
+ uint8_t b1 = src_argb[4] >> 4;
+ uint8_t g1 = src_argb[5] >> 4;
+ uint8_t r1 = src_argb[6] >> 4;
+ uint8_t a1 = src_argb[7] >> 4;
+ *(uint32_t*)(dst_rgb) = b0 | (g0 << 4) | (r0 << 8) | (a0 << 12) |
+ (b1 << 16) | (g1 << 20) | (r1 << 24) | (a1 << 28);
+ dst_rgb += 4;
+ src_argb += 8;
+ }
+ if (width & 1) {
+ uint8_t b0 = src_argb[0] >> 4;
+ uint8_t g0 = src_argb[1] >> 4;
+ uint8_t r0 = src_argb[2] >> 4;
+ uint8_t a0 = src_argb[3] >> 4;
+ *(uint16_t*)(dst_rgb) = b0 | (g0 << 4) | (r0 << 8) | (a0 << 12);
+ }
+}
+
+void ABGRToAR30Row_C(const uint8_t* src_abgr, uint8_t* dst_ar30, int width) {
+ int x;
+ for (x = 0; x < width; ++x) {
+ uint32_t b0 = (src_abgr[0] >> 6) | ((uint32_t)(src_abgr[0]) << 2);
+ uint32_t g0 = (src_abgr[1] >> 6) | ((uint32_t)(src_abgr[1]) << 2);
+ uint32_t r0 = (src_abgr[2] >> 6) | ((uint32_t)(src_abgr[2]) << 2);
+ uint32_t a0 = (src_abgr[3] >> 6);
+ *(uint32_t*)(dst_ar30) = r0 | (g0 << 10) | (b0 << 20) | (a0 << 30);
+ dst_ar30 += 4;
+ src_abgr += 4;
+ }
+}
+
+void ARGBToAR30Row_C(const uint8_t* src_argb, uint8_t* dst_ar30, int width) {
+ int x;
+ for (x = 0; x < width; ++x) {
+ uint32_t b0 = (src_argb[0] >> 6) | ((uint32_t)(src_argb[0]) << 2);
+ uint32_t g0 = (src_argb[1] >> 6) | ((uint32_t)(src_argb[1]) << 2);
+ uint32_t r0 = (src_argb[2] >> 6) | ((uint32_t)(src_argb[2]) << 2);
+ uint32_t a0 = (src_argb[3] >> 6);
+ *(uint32_t*)(dst_ar30) = b0 | (g0 << 10) | (r0 << 20) | (a0 << 30);
+ dst_ar30 += 4;
+ src_argb += 4;
+ }
+}
+
+static __inline int RGBToY(uint8_t r, uint8_t g, uint8_t b) {
+ return (66 * r + 129 * g + 25 * b + 0x1080) >> 8;
+}
+
+static __inline int RGBToU(uint8_t r, uint8_t g, uint8_t b) {
+ return (112 * b - 74 * g - 38 * r + 0x8080) >> 8;
+}
+static __inline int RGBToV(uint8_t r, uint8_t g, uint8_t b) {
+ return (112 * r - 94 * g - 18 * b + 0x8080) >> 8;
+}
+
+// ARGBToY_C and ARGBToUV_C
+#define MAKEROWY(NAME, R, G, B, BPP) \
+ void NAME##ToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width) { \
+ int x; \
+ for (x = 0; x < width; ++x) { \
+ dst_y[0] = RGBToY(src_argb0[R], src_argb0[G], src_argb0[B]); \
+ src_argb0 += BPP; \
+ dst_y += 1; \
+ } \
+ } \
+ void NAME##ToUVRow_C(const uint8_t* src_rgb0, int src_stride_rgb, \
+ uint8_t* dst_u, uint8_t* dst_v, int width) { \
+ const uint8_t* src_rgb1 = src_rgb0 + src_stride_rgb; \
+ int x; \
+ for (x = 0; x < width - 1; x += 2) { \
+ uint8_t ab = (src_rgb0[B] + src_rgb0[B + BPP] + src_rgb1[B] + \
+ src_rgb1[B + BPP]) >> \
+ 2; \
+ uint8_t ag = (src_rgb0[G] + src_rgb0[G + BPP] + src_rgb1[G] + \
+ src_rgb1[G + BPP]) >> \
+ 2; \
+ uint8_t ar = (src_rgb0[R] + src_rgb0[R + BPP] + src_rgb1[R] + \
+ src_rgb1[R + BPP]) >> \
+ 2; \
+ dst_u[0] = RGBToU(ar, ag, ab); \
+ dst_v[0] = RGBToV(ar, ag, ab); \
+ src_rgb0 += BPP * 2; \
+ src_rgb1 += BPP * 2; \
+ dst_u += 1; \
+ dst_v += 1; \
+ } \
+ if (width & 1) { \
+ uint8_t ab = (src_rgb0[B] + src_rgb1[B]) >> 1; \
+ uint8_t ag = (src_rgb0[G] + src_rgb1[G]) >> 1; \
+ uint8_t ar = (src_rgb0[R] + src_rgb1[R]) >> 1; \
+ dst_u[0] = RGBToU(ar, ag, ab); \
+ dst_v[0] = RGBToV(ar, ag, ab); \
+ } \
+ }
+
+MAKEROWY(ARGB, 2, 1, 0, 4)
+MAKEROWY(BGRA, 1, 2, 3, 4)
+MAKEROWY(ABGR, 0, 1, 2, 4)
+MAKEROWY(RGBA, 3, 2, 1, 4)
+MAKEROWY(RGB24, 2, 1, 0, 3)
+MAKEROWY(RAW, 0, 1, 2, 3)
+#undef MAKEROWY
+
+// JPeg uses a variation on BT.601-1 full range
+// y = 0.29900 * r + 0.58700 * g + 0.11400 * b
+// u = -0.16874 * r - 0.33126 * g + 0.50000 * b + center
+// v = 0.50000 * r - 0.41869 * g - 0.08131 * b + center
+// BT.601 Mpeg range uses:
+// b 0.1016 * 255 = 25.908 = 25
+// g 0.5078 * 255 = 129.489 = 129
+// r 0.2578 * 255 = 65.739 = 66
+// JPeg 8 bit Y (not used):
+// b 0.11400 * 256 = 29.184 = 29
+// g 0.58700 * 256 = 150.272 = 150
+// r 0.29900 * 256 = 76.544 = 77
+// JPeg 7 bit Y:
+// b 0.11400 * 128 = 14.592 = 15
+// g 0.58700 * 128 = 75.136 = 75
+// r 0.29900 * 128 = 38.272 = 38
+// JPeg 8 bit U:
+// b 0.50000 * 255 = 127.5 = 127
+// g -0.33126 * 255 = -84.4713 = -84
+// r -0.16874 * 255 = -43.0287 = -43
+// JPeg 8 bit V:
+// b -0.08131 * 255 = -20.73405 = -20
+// g -0.41869 * 255 = -106.76595 = -107
+// r 0.50000 * 255 = 127.5 = 127
+
+static __inline int RGBToYJ(uint8_t r, uint8_t g, uint8_t b) {
+ return (38 * r + 75 * g + 15 * b + 64) >> 7;
+}
+
+static __inline int RGBToUJ(uint8_t r, uint8_t g, uint8_t b) {
+ return (127 * b - 84 * g - 43 * r + 0x8080) >> 8;
+}
+static __inline int RGBToVJ(uint8_t r, uint8_t g, uint8_t b) {
+ return (127 * r - 107 * g - 20 * b + 0x8080) >> 8;
+}
+
+#define AVGB(a, b) (((a) + (b) + 1) >> 1)
+
+// ARGBToYJ_C and ARGBToUVJ_C
+#define MAKEROWYJ(NAME, R, G, B, BPP) \
+ void NAME##ToYJRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width) { \
+ int x; \
+ for (x = 0; x < width; ++x) { \
+ dst_y[0] = RGBToYJ(src_argb0[R], src_argb0[G], src_argb0[B]); \
+ src_argb0 += BPP; \
+ dst_y += 1; \
+ } \
+ } \
+ void NAME##ToUVJRow_C(const uint8_t* src_rgb0, int src_stride_rgb, \
+ uint8_t* dst_u, uint8_t* dst_v, int width) { \
+ const uint8_t* src_rgb1 = src_rgb0 + src_stride_rgb; \
+ int x; \
+ for (x = 0; x < width - 1; x += 2) { \
+ uint8_t ab = AVGB(AVGB(src_rgb0[B], src_rgb1[B]), \
+ AVGB(src_rgb0[B + BPP], src_rgb1[B + BPP])); \
+ uint8_t ag = AVGB(AVGB(src_rgb0[G], src_rgb1[G]), \
+ AVGB(src_rgb0[G + BPP], src_rgb1[G + BPP])); \
+ uint8_t ar = AVGB(AVGB(src_rgb0[R], src_rgb1[R]), \
+ AVGB(src_rgb0[R + BPP], src_rgb1[R + BPP])); \
+ dst_u[0] = RGBToUJ(ar, ag, ab); \
+ dst_v[0] = RGBToVJ(ar, ag, ab); \
+ src_rgb0 += BPP * 2; \
+ src_rgb1 += BPP * 2; \
+ dst_u += 1; \
+ dst_v += 1; \
+ } \
+ if (width & 1) { \
+ uint8_t ab = AVGB(src_rgb0[B], src_rgb1[B]); \
+ uint8_t ag = AVGB(src_rgb0[G], src_rgb1[G]); \
+ uint8_t ar = AVGB(src_rgb0[R], src_rgb1[R]); \
+ dst_u[0] = RGBToUJ(ar, ag, ab); \
+ dst_v[0] = RGBToVJ(ar, ag, ab); \
+ } \
+ }
+
+MAKEROWYJ(ARGB, 2, 1, 0, 4)
+#undef MAKEROWYJ
+
+void RGB565ToYRow_C(const uint8_t* src_rgb565, uint8_t* dst_y, int width) {
+ int x;
+ for (x = 0; x < width; ++x) {
+ uint8_t b = src_rgb565[0] & 0x1f;
+ uint8_t g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
+ uint8_t r = src_rgb565[1] >> 3;
+ b = (b << 3) | (b >> 2);
+ g = (g << 2) | (g >> 4);
+ r = (r << 3) | (r >> 2);
+ dst_y[0] = RGBToY(r, g, b);
+ src_rgb565 += 2;
+ dst_y += 1;
+ }
+}
+
+void ARGB1555ToYRow_C(const uint8_t* src_argb1555, uint8_t* dst_y, int width) {
+ int x;
+ for (x = 0; x < width; ++x) {
+ uint8_t b = src_argb1555[0] & 0x1f;
+ uint8_t g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
+ uint8_t r = (src_argb1555[1] & 0x7c) >> 2;
+ b = (b << 3) | (b >> 2);
+ g = (g << 3) | (g >> 2);
+ r = (r << 3) | (r >> 2);
+ dst_y[0] = RGBToY(r, g, b);
+ src_argb1555 += 2;
+ dst_y += 1;
+ }
+}
+
+void ARGB4444ToYRow_C(const uint8_t* src_argb4444, uint8_t* dst_y, int width) {
+ int x;
+ for (x = 0; x < width; ++x) {
+ uint8_t b = src_argb4444[0] & 0x0f;
+ uint8_t g = src_argb4444[0] >> 4;
+ uint8_t r = src_argb4444[1] & 0x0f;
+ b = (b << 4) | b;
+ g = (g << 4) | g;
+ r = (r << 4) | r;
+ dst_y[0] = RGBToY(r, g, b);
+ src_argb4444 += 2;
+ dst_y += 1;
+ }
+}
+
+void RGB565ToUVRow_C(const uint8_t* src_rgb565,
+ int src_stride_rgb565,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ const uint8_t* next_rgb565 = src_rgb565 + src_stride_rgb565;
+ int x;
+ for (x = 0; x < width - 1; x += 2) {
+ uint8_t b0 = src_rgb565[0] & 0x1f;
+ uint8_t g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
+ uint8_t r0 = src_rgb565[1] >> 3;
+ uint8_t b1 = src_rgb565[2] & 0x1f;
+ uint8_t g1 = (src_rgb565[2] >> 5) | ((src_rgb565[3] & 0x07) << 3);
+ uint8_t r1 = src_rgb565[3] >> 3;
+ uint8_t b2 = next_rgb565[0] & 0x1f;
+ uint8_t g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3);
+ uint8_t r2 = next_rgb565[1] >> 3;
+ uint8_t b3 = next_rgb565[2] & 0x1f;
+ uint8_t g3 = (next_rgb565[2] >> 5) | ((next_rgb565[3] & 0x07) << 3);
+ uint8_t r3 = next_rgb565[3] >> 3;
+ uint8_t b = (b0 + b1 + b2 + b3); // 565 * 4 = 787.
+ uint8_t g = (g0 + g1 + g2 + g3);
+ uint8_t r = (r0 + r1 + r2 + r3);
+ b = (b << 1) | (b >> 6); // 787 -> 888.
+ r = (r << 1) | (r >> 6);
+ dst_u[0] = RGBToU(r, g, b);
+ dst_v[0] = RGBToV(r, g, b);
+ src_rgb565 += 4;
+ next_rgb565 += 4;
+ dst_u += 1;
+ dst_v += 1;
+ }
+ if (width & 1) {
+ uint8_t b0 = src_rgb565[0] & 0x1f;
+ uint8_t g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
+ uint8_t r0 = src_rgb565[1] >> 3;
+ uint8_t b2 = next_rgb565[0] & 0x1f;
+ uint8_t g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3);
+ uint8_t r2 = next_rgb565[1] >> 3;
+ uint8_t b = (b0 + b2); // 565 * 2 = 676.
+ uint8_t g = (g0 + g2);
+ uint8_t r = (r0 + r2);
+ b = (b << 2) | (b >> 4); // 676 -> 888
+ g = (g << 1) | (g >> 6);
+ r = (r << 2) | (r >> 4);
+ dst_u[0] = RGBToU(r, g, b);
+ dst_v[0] = RGBToV(r, g, b);
+ }
+}
+
+void ARGB1555ToUVRow_C(const uint8_t* src_argb1555,
+ int src_stride_argb1555,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ const uint8_t* next_argb1555 = src_argb1555 + src_stride_argb1555;
+ int x;
+ for (x = 0; x < width - 1; x += 2) {
+ uint8_t b0 = src_argb1555[0] & 0x1f;
+ uint8_t g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
+ uint8_t r0 = (src_argb1555[1] & 0x7c) >> 2;
+ uint8_t b1 = src_argb1555[2] & 0x1f;
+ uint8_t g1 = (src_argb1555[2] >> 5) | ((src_argb1555[3] & 0x03) << 3);
+ uint8_t r1 = (src_argb1555[3] & 0x7c) >> 2;
+ uint8_t b2 = next_argb1555[0] & 0x1f;
+ uint8_t g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3);
+ uint8_t r2 = (next_argb1555[1] & 0x7c) >> 2;
+ uint8_t b3 = next_argb1555[2] & 0x1f;
+ uint8_t g3 = (next_argb1555[2] >> 5) | ((next_argb1555[3] & 0x03) << 3);
+ uint8_t r3 = (next_argb1555[3] & 0x7c) >> 2;
+ uint8_t b = (b0 + b1 + b2 + b3); // 555 * 4 = 777.
+ uint8_t g = (g0 + g1 + g2 + g3);
+ uint8_t r = (r0 + r1 + r2 + r3);
+ b = (b << 1) | (b >> 6); // 777 -> 888.
+ g = (g << 1) | (g >> 6);
+ r = (r << 1) | (r >> 6);
+ dst_u[0] = RGBToU(r, g, b);
+ dst_v[0] = RGBToV(r, g, b);
+ src_argb1555 += 4;
+ next_argb1555 += 4;
+ dst_u += 1;
+ dst_v += 1;
+ }
+ if (width & 1) {
+ uint8_t b0 = src_argb1555[0] & 0x1f;
+ uint8_t g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
+ uint8_t r0 = (src_argb1555[1] & 0x7c) >> 2;
+ uint8_t b2 = next_argb1555[0] & 0x1f;
+ uint8_t g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3);
+ uint8_t r2 = next_argb1555[1] >> 3;
+ uint8_t b = (b0 + b2); // 555 * 2 = 666.
+ uint8_t g = (g0 + g2);
+ uint8_t r = (r0 + r2);
+ b = (b << 2) | (b >> 4); // 666 -> 888.
+ g = (g << 2) | (g >> 4);
+ r = (r << 2) | (r >> 4);
+ dst_u[0] = RGBToU(r, g, b);
+ dst_v[0] = RGBToV(r, g, b);
+ }
+}
+
+void ARGB4444ToUVRow_C(const uint8_t* src_argb4444,
+ int src_stride_argb4444,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ const uint8_t* next_argb4444 = src_argb4444 + src_stride_argb4444;
+ int x;
+ for (x = 0; x < width - 1; x += 2) {
+ uint8_t b0 = src_argb4444[0] & 0x0f;
+ uint8_t g0 = src_argb4444[0] >> 4;
+ uint8_t r0 = src_argb4444[1] & 0x0f;
+ uint8_t b1 = src_argb4444[2] & 0x0f;
+ uint8_t g1 = src_argb4444[2] >> 4;
+ uint8_t r1 = src_argb4444[3] & 0x0f;
+ uint8_t b2 = next_argb4444[0] & 0x0f;
+ uint8_t g2 = next_argb4444[0] >> 4;
+ uint8_t r2 = next_argb4444[1] & 0x0f;
+ uint8_t b3 = next_argb4444[2] & 0x0f;
+ uint8_t g3 = next_argb4444[2] >> 4;
+ uint8_t r3 = next_argb4444[3] & 0x0f;
+ uint8_t b = (b0 + b1 + b2 + b3); // 444 * 4 = 666.
+ uint8_t g = (g0 + g1 + g2 + g3);
+ uint8_t r = (r0 + r1 + r2 + r3);
+ b = (b << 2) | (b >> 4); // 666 -> 888.
+ g = (g << 2) | (g >> 4);
+ r = (r << 2) | (r >> 4);
+ dst_u[0] = RGBToU(r, g, b);
+ dst_v[0] = RGBToV(r, g, b);
+ src_argb4444 += 4;
+ next_argb4444 += 4;
+ dst_u += 1;
+ dst_v += 1;
+ }
+ if (width & 1) {
+ uint8_t b0 = src_argb4444[0] & 0x0f;
+ uint8_t g0 = src_argb4444[0] >> 4;
+ uint8_t r0 = src_argb4444[1] & 0x0f;
+ uint8_t b2 = next_argb4444[0] & 0x0f;
+ uint8_t g2 = next_argb4444[0] >> 4;
+ uint8_t r2 = next_argb4444[1] & 0x0f;
+ uint8_t b = (b0 + b2); // 444 * 2 = 555.
+ uint8_t g = (g0 + g2);
+ uint8_t r = (r0 + r2);
+ b = (b << 3) | (b >> 2); // 555 -> 888.
+ g = (g << 3) | (g >> 2);
+ r = (r << 3) | (r >> 2);
+ dst_u[0] = RGBToU(r, g, b);
+ dst_v[0] = RGBToV(r, g, b);
+ }
+}
+
+void ARGBToUV444Row_C(const uint8_t* src_argb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ int x;
+ for (x = 0; x < width; ++x) {
+ uint8_t ab = src_argb[0];
+ uint8_t ag = src_argb[1];
+ uint8_t ar = src_argb[2];
+ dst_u[0] = RGBToU(ar, ag, ab);
+ dst_v[0] = RGBToV(ar, ag, ab);
+ src_argb += 4;
+ dst_u += 1;
+ dst_v += 1;
+ }
+}
+
+void ARGBGrayRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width) {
+ int x;
+ for (x = 0; x < width; ++x) {
+ uint8_t y = RGBToYJ(src_argb[2], src_argb[1], src_argb[0]);
+ dst_argb[2] = dst_argb[1] = dst_argb[0] = y;
+ dst_argb[3] = src_argb[3];
+ dst_argb += 4;
+ src_argb += 4;
+ }
+}
+
+// Convert a row of image to Sepia tone.
+void ARGBSepiaRow_C(uint8_t* dst_argb, int width) {
+ int x;
+ for (x = 0; x < width; ++x) {
+ int b = dst_argb[0];
+ int g = dst_argb[1];
+ int r = dst_argb[2];
+ int sb = (b * 17 + g * 68 + r * 35) >> 7;
+ int sg = (b * 22 + g * 88 + r * 45) >> 7;
+ int sr = (b * 24 + g * 98 + r * 50) >> 7;
+ // b does not over flow. a is preserved from original.
+ dst_argb[0] = sb;
+ dst_argb[1] = clamp255(sg);
+ dst_argb[2] = clamp255(sr);
+ dst_argb += 4;
+ }
+}
+
+// Apply color matrix to a row of image. Matrix is signed.
+// TODO(fbarchard): Consider adding rounding (+32).
+void ARGBColorMatrixRow_C(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ const int8_t* matrix_argb,
+ int width) {
+ int x;
+ for (x = 0; x < width; ++x) {
+ int b = src_argb[0];
+ int g = src_argb[1];
+ int r = src_argb[2];
+ int a = src_argb[3];
+ int sb = (b * matrix_argb[0] + g * matrix_argb[1] + r * matrix_argb[2] +
+ a * matrix_argb[3]) >>
+ 6;
+ int sg = (b * matrix_argb[4] + g * matrix_argb[5] + r * matrix_argb[6] +
+ a * matrix_argb[7]) >>
+ 6;
+ int sr = (b * matrix_argb[8] + g * matrix_argb[9] + r * matrix_argb[10] +
+ a * matrix_argb[11]) >>
+ 6;
+ int sa = (b * matrix_argb[12] + g * matrix_argb[13] + r * matrix_argb[14] +
+ a * matrix_argb[15]) >>
+ 6;
+ dst_argb[0] = Clamp(sb);
+ dst_argb[1] = Clamp(sg);
+ dst_argb[2] = Clamp(sr);
+ dst_argb[3] = Clamp(sa);
+ src_argb += 4;
+ dst_argb += 4;
+ }
+}
+
+// Apply color table to a row of image.
+void ARGBColorTableRow_C(uint8_t* dst_argb,
+ const uint8_t* table_argb,
+ int width) {
+ int x;
+ for (x = 0; x < width; ++x) {
+ int b = dst_argb[0];
+ int g = dst_argb[1];
+ int r = dst_argb[2];
+ int a = dst_argb[3];
+ dst_argb[0] = table_argb[b * 4 + 0];
+ dst_argb[1] = table_argb[g * 4 + 1];
+ dst_argb[2] = table_argb[r * 4 + 2];
+ dst_argb[3] = table_argb[a * 4 + 3];
+ dst_argb += 4;
+ }
+}
+
+// Apply color table to a row of image.
+void RGBColorTableRow_C(uint8_t* dst_argb,
+ const uint8_t* table_argb,
+ int width) {
+ int x;
+ for (x = 0; x < width; ++x) {
+ int b = dst_argb[0];
+ int g = dst_argb[1];
+ int r = dst_argb[2];
+ dst_argb[0] = table_argb[b * 4 + 0];
+ dst_argb[1] = table_argb[g * 4 + 1];
+ dst_argb[2] = table_argb[r * 4 + 2];
+ dst_argb += 4;
+ }
+}
+
+void ARGBQuantizeRow_C(uint8_t* dst_argb,
+ int scale,
+ int interval_size,
+ int interval_offset,
+ int width) {
+ int x;
+ for (x = 0; x < width; ++x) {
+ int b = dst_argb[0];
+ int g = dst_argb[1];
+ int r = dst_argb[2];
+ dst_argb[0] = (b * scale >> 16) * interval_size + interval_offset;
+ dst_argb[1] = (g * scale >> 16) * interval_size + interval_offset;
+ dst_argb[2] = (r * scale >> 16) * interval_size + interval_offset;
+ dst_argb += 4;
+ }
+}
+
+#define REPEAT8(v) (v) | ((v) << 8)
+#define SHADE(f, v) v* f >> 24
+
+void ARGBShadeRow_C(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ int width,
+ uint32_t value) {
+ const uint32_t b_scale = REPEAT8(value & 0xff);
+ const uint32_t g_scale = REPEAT8((value >> 8) & 0xff);
+ const uint32_t r_scale = REPEAT8((value >> 16) & 0xff);
+ const uint32_t a_scale = REPEAT8(value >> 24);
+
+ int i;
+ for (i = 0; i < width; ++i) {
+ const uint32_t b = REPEAT8(src_argb[0]);
+ const uint32_t g = REPEAT8(src_argb[1]);
+ const uint32_t r = REPEAT8(src_argb[2]);
+ const uint32_t a = REPEAT8(src_argb[3]);
+ dst_argb[0] = SHADE(b, b_scale);
+ dst_argb[1] = SHADE(g, g_scale);
+ dst_argb[2] = SHADE(r, r_scale);
+ dst_argb[3] = SHADE(a, a_scale);
+ src_argb += 4;
+ dst_argb += 4;
+ }
+}
+#undef REPEAT8
+#undef SHADE
+
+#define REPEAT8(v) (v) | ((v) << 8)
+#define SHADE(f, v) v* f >> 16
+
+void ARGBMultiplyRow_C(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
+ int width) {
+ int i;
+ for (i = 0; i < width; ++i) {
+ const uint32_t b = REPEAT8(src_argb0[0]);
+ const uint32_t g = REPEAT8(src_argb0[1]);
+ const uint32_t r = REPEAT8(src_argb0[2]);
+ const uint32_t a = REPEAT8(src_argb0[3]);
+ const uint32_t b_scale = src_argb1[0];
+ const uint32_t g_scale = src_argb1[1];
+ const uint32_t r_scale = src_argb1[2];
+ const uint32_t a_scale = src_argb1[3];
+ dst_argb[0] = SHADE(b, b_scale);
+ dst_argb[1] = SHADE(g, g_scale);
+ dst_argb[2] = SHADE(r, r_scale);
+ dst_argb[3] = SHADE(a, a_scale);
+ src_argb0 += 4;
+ src_argb1 += 4;
+ dst_argb += 4;
+ }
+}
+#undef REPEAT8
+#undef SHADE
+
+#define SHADE(f, v) clamp255(v + f)
+
+void ARGBAddRow_C(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
+ int width) {
+ int i;
+ for (i = 0; i < width; ++i) {
+ const int b = src_argb0[0];
+ const int g = src_argb0[1];
+ const int r = src_argb0[2];
+ const int a = src_argb0[3];
+ const int b_add = src_argb1[0];
+ const int g_add = src_argb1[1];
+ const int r_add = src_argb1[2];
+ const int a_add = src_argb1[3];
+ dst_argb[0] = SHADE(b, b_add);
+ dst_argb[1] = SHADE(g, g_add);
+ dst_argb[2] = SHADE(r, r_add);
+ dst_argb[3] = SHADE(a, a_add);
+ src_argb0 += 4;
+ src_argb1 += 4;
+ dst_argb += 4;
+ }
+}
+#undef SHADE
+
+#define SHADE(f, v) clamp0(f - v)
+
+void ARGBSubtractRow_C(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
+ int width) {
+ int i;
+ for (i = 0; i < width; ++i) {
+ const int b = src_argb0[0];
+ const int g = src_argb0[1];
+ const int r = src_argb0[2];
+ const int a = src_argb0[3];
+ const int b_sub = src_argb1[0];
+ const int g_sub = src_argb1[1];
+ const int r_sub = src_argb1[2];
+ const int a_sub = src_argb1[3];
+ dst_argb[0] = SHADE(b, b_sub);
+ dst_argb[1] = SHADE(g, g_sub);
+ dst_argb[2] = SHADE(r, r_sub);
+ dst_argb[3] = SHADE(a, a_sub);
+ src_argb0 += 4;
+ src_argb1 += 4;
+ dst_argb += 4;
+ }
+}
+#undef SHADE
+
+// Sobel functions which mimics SSSE3.
+void SobelXRow_C(const uint8_t* src_y0,
+ const uint8_t* src_y1,
+ const uint8_t* src_y2,
+ uint8_t* dst_sobelx,
+ int width) {
+ int i;
+ for (i = 0; i < width; ++i) {
+ int a = src_y0[i];
+ int b = src_y1[i];
+ int c = src_y2[i];
+ int a_sub = src_y0[i + 2];
+ int b_sub = src_y1[i + 2];
+ int c_sub = src_y2[i + 2];
+ int a_diff = a - a_sub;
+ int b_diff = b - b_sub;
+ int c_diff = c - c_sub;
+ int sobel = Abs(a_diff + b_diff * 2 + c_diff);
+ dst_sobelx[i] = (uint8_t)(clamp255(sobel));
+ }
+}
+
+void SobelYRow_C(const uint8_t* src_y0,
+ const uint8_t* src_y1,
+ uint8_t* dst_sobely,
+ int width) {
+ int i;
+ for (i = 0; i < width; ++i) {
+ int a = src_y0[i + 0];
+ int b = src_y0[i + 1];
+ int c = src_y0[i + 2];
+ int a_sub = src_y1[i + 0];
+ int b_sub = src_y1[i + 1];
+ int c_sub = src_y1[i + 2];
+ int a_diff = a - a_sub;
+ int b_diff = b - b_sub;
+ int c_diff = c - c_sub;
+ int sobel = Abs(a_diff + b_diff * 2 + c_diff);
+ dst_sobely[i] = (uint8_t)(clamp255(sobel));
+ }
+}
+
+void SobelRow_C(const uint8_t* src_sobelx,
+ const uint8_t* src_sobely,
+ uint8_t* dst_argb,
+ int width) {
+ int i;
+ for (i = 0; i < width; ++i) {
+ int r = src_sobelx[i];
+ int b = src_sobely[i];
+ int s = clamp255(r + b);
+ dst_argb[0] = (uint8_t)(s);
+ dst_argb[1] = (uint8_t)(s);
+ dst_argb[2] = (uint8_t)(s);
+ dst_argb[3] = (uint8_t)(255u);
+ dst_argb += 4;
+ }
+}
+
+void SobelToPlaneRow_C(const uint8_t* src_sobelx,
+ const uint8_t* src_sobely,
+ uint8_t* dst_y,
+ int width) {
+ int i;
+ for (i = 0; i < width; ++i) {
+ int r = src_sobelx[i];
+ int b = src_sobely[i];
+ int s = clamp255(r + b);
+ dst_y[i] = (uint8_t)(s);
+ }
+}
+
+void SobelXYRow_C(const uint8_t* src_sobelx,
+ const uint8_t* src_sobely,
+ uint8_t* dst_argb,
+ int width) {
+ int i;
+ for (i = 0; i < width; ++i) {
+ int r = src_sobelx[i];
+ int b = src_sobely[i];
+ int g = clamp255(r + b);
+ dst_argb[0] = (uint8_t)(b);
+ dst_argb[1] = (uint8_t)(g);
+ dst_argb[2] = (uint8_t)(r);
+ dst_argb[3] = (uint8_t)(255u);
+ dst_argb += 4;
+ }
+}
+
+void J400ToARGBRow_C(const uint8_t* src_y, uint8_t* dst_argb, int width) {
+ // Copy a Y to RGB.
+ int x;
+ for (x = 0; x < width; ++x) {
+ uint8_t y = src_y[0];
+ dst_argb[2] = dst_argb[1] = dst_argb[0] = y;
+ dst_argb[3] = 255u;
+ dst_argb += 4;
+ ++src_y;
+ }
+}
+
+// TODO(fbarchard): Unify these structures to be platform independent.
+// TODO(fbarchard): Generate SIMD structures from float matrix.
+
+// BT.601 YUV to RGB reference
+// R = (Y - 16) * 1.164 - V * -1.596
+// G = (Y - 16) * 1.164 - U * 0.391 - V * 0.813
+// B = (Y - 16) * 1.164 - U * -2.018
+
+// Y contribution to R,G,B. Scale and bias.
+#define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */
+#define YGB -1160 /* 1.164 * 64 * -16 + 64 / 2 */
+
+// U and V contributions to R,G,B.
+#define UB -128 /* max(-128, round(-2.018 * 64)) */
+#define UG 25 /* round(0.391 * 64) */
+#define VG 52 /* round(0.813 * 64) */
+#define VR -102 /* round(-1.596 * 64) */
+
+// Bias values to subtract 16 from Y and 128 from U and V.
+#define BB (UB * 128 + YGB)
+#define BG (UG * 128 + VG * 128 + YGB)
+#define BR (VR * 128 + YGB)
+
+#if defined(__aarch64__) // 64 bit arm
+const struct YuvConstants SIMD_ALIGNED(kYuvI601Constants) = {
+ {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR},
+ {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR},
+ {UG, VG, UG, VG, UG, VG, UG, VG},
+ {UG, VG, UG, VG, UG, VG, UG, VG},
+ {BB, BG, BR, 0, 0, 0, 0, 0},
+ {0x0101 * YG, 0, 0, 0}};
+const struct YuvConstants SIMD_ALIGNED(kYvuI601Constants) = {
+ {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB},
+ {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB},
+ {VG, UG, VG, UG, VG, UG, VG, UG},
+ {VG, UG, VG, UG, VG, UG, VG, UG},
+ {BR, BG, BB, 0, 0, 0, 0, 0},
+ {0x0101 * YG, 0, 0, 0}};
+#elif defined(__arm__) // 32 bit arm
+const struct YuvConstants SIMD_ALIGNED(kYuvI601Constants) = {
+ {-UB, -UB, -UB, -UB, -VR, -VR, -VR, -VR, 0, 0, 0, 0, 0, 0, 0, 0},
+ {UG, UG, UG, UG, VG, VG, VG, VG, 0, 0, 0, 0, 0, 0, 0, 0},
+ {BB, BG, BR, 0, 0, 0, 0, 0},
+ {0x0101 * YG, 0, 0, 0}};
+const struct YuvConstants SIMD_ALIGNED(kYvuI601Constants) = {
+ {-VR, -VR, -VR, -VR, -UB, -UB, -UB, -UB, 0, 0, 0, 0, 0, 0, 0, 0},
+ {VG, VG, VG, VG, UG, UG, UG, UG, 0, 0, 0, 0, 0, 0, 0, 0},
+ {BR, BG, BB, 0, 0, 0, 0, 0},
+ {0x0101 * YG, 0, 0, 0}};
+#else
+const struct YuvConstants SIMD_ALIGNED(kYuvI601Constants) = {
+ {UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0,
+ UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0},
+ {UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG,
+ UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG},
+ {0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR,
+ 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR},
+ {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB},
+ {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG},
+ {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR},
+ {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}};
+const struct YuvConstants SIMD_ALIGNED(kYvuI601Constants) = {
+ {VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0,
+ VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0},
+ {VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG,
+ VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG},
+ {0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB,
+ 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB},
+ {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR},
+ {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG},
+ {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB},
+ {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}};
+#endif
+
+#undef BB
+#undef BG
+#undef BR
+#undef YGB
+#undef UB
+#undef UG
+#undef VG
+#undef VR
+#undef YG
+
+// JPEG YUV to RGB reference
+// * R = Y - V * -1.40200
+// * G = Y - U * 0.34414 - V * 0.71414
+// * B = Y - U * -1.77200
+
+// Y contribution to R,G,B. Scale and bias.
+#define YG 16320 /* round(1.000 * 64 * 256 * 256 / 257) */
+#define YGB 32 /* 64 / 2 */
+
+// U and V contributions to R,G,B.
+#define UB -113 /* round(-1.77200 * 64) */
+#define UG 22 /* round(0.34414 * 64) */
+#define VG 46 /* round(0.71414 * 64) */
+#define VR -90 /* round(-1.40200 * 64) */
+
+// Bias values to round, and subtract 128 from U and V.
+#define BB (UB * 128 + YGB)
+#define BG (UG * 128 + VG * 128 + YGB)
+#define BR (VR * 128 + YGB)
+
+#if defined(__aarch64__)
+const struct YuvConstants SIMD_ALIGNED(kYuvJPEGConstants) = {
+ {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR},
+ {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR},
+ {UG, VG, UG, VG, UG, VG, UG, VG},
+ {UG, VG, UG, VG, UG, VG, UG, VG},
+ {BB, BG, BR, 0, 0, 0, 0, 0},
+ {0x0101 * YG, 0, 0, 0}};
+const struct YuvConstants SIMD_ALIGNED(kYvuJPEGConstants) = {
+ {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB},
+ {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB},
+ {VG, UG, VG, UG, VG, UG, VG, UG},
+ {VG, UG, VG, UG, VG, UG, VG, UG},
+ {BR, BG, BB, 0, 0, 0, 0, 0},
+ {0x0101 * YG, 0, 0, 0}};
+#elif defined(__arm__)
+const struct YuvConstants SIMD_ALIGNED(kYuvJPEGConstants) = {
+ {-UB, -UB, -UB, -UB, -VR, -VR, -VR, -VR, 0, 0, 0, 0, 0, 0, 0, 0},
+ {UG, UG, UG, UG, VG, VG, VG, VG, 0, 0, 0, 0, 0, 0, 0, 0},
+ {BB, BG, BR, 0, 0, 0, 0, 0},
+ {0x0101 * YG, 0, 0, 0}};
+const struct YuvConstants SIMD_ALIGNED(kYvuJPEGConstants) = {
+ {-VR, -VR, -VR, -VR, -UB, -UB, -UB, -UB, 0, 0, 0, 0, 0, 0, 0, 0},
+ {VG, VG, VG, VG, UG, UG, UG, UG, 0, 0, 0, 0, 0, 0, 0, 0},
+ {BR, BG, BB, 0, 0, 0, 0, 0},
+ {0x0101 * YG, 0, 0, 0}};
+#else
+const struct YuvConstants SIMD_ALIGNED(kYuvJPEGConstants) = {
+ {UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0,
+ UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0},
+ {UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG,
+ UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG},
+ {0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR,
+ 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR},
+ {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB},
+ {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG},
+ {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR},
+ {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}};
+const struct YuvConstants SIMD_ALIGNED(kYvuJPEGConstants) = {
+ {VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0,
+ VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0},
+ {VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG,
+ VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG},
+ {0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB,
+ 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB},
+ {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR},
+ {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG},
+ {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB},
+ {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}};
+#endif
+
+#undef BB
+#undef BG
+#undef BR
+#undef YGB
+#undef UB
+#undef UG
+#undef VG
+#undef VR
+#undef YG
+
+// BT.709 YUV to RGB reference
+// R = (Y - 16) * 1.164 - V * -1.793
+// G = (Y - 16) * 1.164 - U * 0.213 - V * 0.533
+// B = (Y - 16) * 1.164 - U * -2.112
+// See also http://www.equasys.de/colorconversion.html
+
+// Y contribution to R,G,B. Scale and bias.
+#define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */
+#define YGB -1160 /* 1.164 * 64 * -16 + 64 / 2 */
+
+// TODO(fbarchard): Find way to express 2.112 instead of 2.0.
+// U and V contributions to R,G,B.
+#define UB -128 /* max(-128, round(-2.112 * 64)) */
+#define UG 14 /* round(0.213 * 64) */
+#define VG 34 /* round(0.533 * 64) */
+#define VR -115 /* round(-1.793 * 64) */
+
+// Bias values to round, and subtract 128 from U and V.
+#define BB (UB * 128 + YGB)
+#define BG (UG * 128 + VG * 128 + YGB)
+#define BR (VR * 128 + YGB)
+
+#if defined(__aarch64__)
+const struct YuvConstants SIMD_ALIGNED(kYuvH709Constants) = {
+ {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR},
+ {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR},
+ {UG, VG, UG, VG, UG, VG, UG, VG},
+ {UG, VG, UG, VG, UG, VG, UG, VG},
+ {BB, BG, BR, 0, 0, 0, 0, 0},
+ {0x0101 * YG, 0, 0, 0}};
+const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants) = {
+ {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB},
+ {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB},
+ {VG, UG, VG, UG, VG, UG, VG, UG},
+ {VG, UG, VG, UG, VG, UG, VG, UG},
+ {BR, BG, BB, 0, 0, 0, 0, 0},
+ {0x0101 * YG, 0, 0, 0}};
+#elif defined(__arm__)
+const struct YuvConstants SIMD_ALIGNED(kYuvH709Constants) = {
+ {-UB, -UB, -UB, -UB, -VR, -VR, -VR, -VR, 0, 0, 0, 0, 0, 0, 0, 0},
+ {UG, UG, UG, UG, VG, VG, VG, VG, 0, 0, 0, 0, 0, 0, 0, 0},
+ {BB, BG, BR, 0, 0, 0, 0, 0},
+ {0x0101 * YG, 0, 0, 0}};
+const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants) = {
+ {-VR, -VR, -VR, -VR, -UB, -UB, -UB, -UB, 0, 0, 0, 0, 0, 0, 0, 0},
+ {VG, VG, VG, VG, UG, UG, UG, UG, 0, 0, 0, 0, 0, 0, 0, 0},
+ {BR, BG, BB, 0, 0, 0, 0, 0},
+ {0x0101 * YG, 0, 0, 0}};
+#else
+const struct YuvConstants SIMD_ALIGNED(kYuvH709Constants) = {
+ {UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0,
+ UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0},
+ {UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG,
+ UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG},
+ {0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR,
+ 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR},
+ {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB},
+ {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG},
+ {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR},
+ {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}};
+const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants) = {
+ {VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0,
+ VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0},
+ {VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG,
+ VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG},
+ {0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB,
+ 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB},
+ {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR},
+ {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG},
+ {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB},
+ {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}};
+#endif
+
+#undef BB
+#undef BG
+#undef BR
+#undef YGB
+#undef UB
+#undef UG
+#undef VG
+#undef VR
+#undef YG
+
+// C reference code that mimics the YUV assembly.
+// Reads 8 bit YUV and leaves result as 16 bit.
+
+static __inline void YuvPixel(uint8_t y,
+ uint8_t u,
+ uint8_t v,
+ uint8_t* b,
+ uint8_t* g,
+ uint8_t* r,
+ const struct YuvConstants* yuvconstants) {
+#if defined(__aarch64__)
+ int ub = -yuvconstants->kUVToRB[0];
+ int ug = yuvconstants->kUVToG[0];
+ int vg = yuvconstants->kUVToG[1];
+ int vr = -yuvconstants->kUVToRB[1];
+ int bb = yuvconstants->kUVBiasBGR[0];
+ int bg = yuvconstants->kUVBiasBGR[1];
+ int br = yuvconstants->kUVBiasBGR[2];
+ int yg = yuvconstants->kYToRgb[0] / 0x0101;
+#elif defined(__arm__)
+ int ub = -yuvconstants->kUVToRB[0];
+ int ug = yuvconstants->kUVToG[0];
+ int vg = yuvconstants->kUVToG[4];
+ int vr = -yuvconstants->kUVToRB[4];
+ int bb = yuvconstants->kUVBiasBGR[0];
+ int bg = yuvconstants->kUVBiasBGR[1];
+ int br = yuvconstants->kUVBiasBGR[2];
+ int yg = yuvconstants->kYToRgb[0] / 0x0101;
+#else
+ int ub = yuvconstants->kUVToB[0];
+ int ug = yuvconstants->kUVToG[0];
+ int vg = yuvconstants->kUVToG[1];
+ int vr = yuvconstants->kUVToR[1];
+ int bb = yuvconstants->kUVBiasB[0];
+ int bg = yuvconstants->kUVBiasG[0];
+ int br = yuvconstants->kUVBiasR[0];
+ int yg = yuvconstants->kYToRgb[0];
+#endif
+
+ uint32_t y1 = (uint32_t)(y * 0x0101 * yg) >> 16;
+ *b = Clamp((int32_t)(-(u * ub) + y1 + bb) >> 6);
+ *g = Clamp((int32_t)(-(u * ug + v * vg) + y1 + bg) >> 6);
+ *r = Clamp((int32_t)(-(v * vr) + y1 + br) >> 6);
+}
+
+// Reads 8 bit YUV and leaves result as 16 bit.
+static __inline void YuvPixel8_16(uint8_t y,
+ uint8_t u,
+ uint8_t v,
+ int* b,
+ int* g,
+ int* r,
+ const struct YuvConstants* yuvconstants) {
+#if defined(__aarch64__)
+ int ub = -yuvconstants->kUVToRB[0];
+ int ug = yuvconstants->kUVToG[0];
+ int vg = yuvconstants->kUVToG[1];
+ int vr = -yuvconstants->kUVToRB[1];
+ int bb = yuvconstants->kUVBiasBGR[0];
+ int bg = yuvconstants->kUVBiasBGR[1];
+ int br = yuvconstants->kUVBiasBGR[2];
+ int yg = yuvconstants->kYToRgb[0] / 0x0101;
+#elif defined(__arm__)
+ int ub = -yuvconstants->kUVToRB[0];
+ int ug = yuvconstants->kUVToG[0];
+ int vg = yuvconstants->kUVToG[4];
+ int vr = -yuvconstants->kUVToRB[4];
+ int bb = yuvconstants->kUVBiasBGR[0];
+ int bg = yuvconstants->kUVBiasBGR[1];
+ int br = yuvconstants->kUVBiasBGR[2];
+ int yg = yuvconstants->kYToRgb[0] / 0x0101;
+#else
+ int ub = yuvconstants->kUVToB[0];
+ int ug = yuvconstants->kUVToG[0];
+ int vg = yuvconstants->kUVToG[1];
+ int vr = yuvconstants->kUVToR[1];
+ int bb = yuvconstants->kUVBiasB[0];
+ int bg = yuvconstants->kUVBiasG[0];
+ int br = yuvconstants->kUVBiasR[0];
+ int yg = yuvconstants->kYToRgb[0];
+#endif
+
+ uint32_t y1 = (uint32_t)(y * 0x0101 * yg) >> 16;
+ *b = (int)(-(u * ub) + y1 + bb);
+ *g = (int)(-(u * ug + v * vg) + y1 + bg);
+ *r = (int)(-(v * vr) + y1 + br);
+}
+
+// C reference code that mimics the YUV 16 bit assembly.
+// Reads 10 bit YUV and leaves result as 16 bit.
+static __inline void YuvPixel16(int16_t y,
+ int16_t u,
+ int16_t v,
+ int* b,
+ int* g,
+ int* r,
+ const struct YuvConstants* yuvconstants) {
+#if defined(__aarch64__)
+ int ub = -yuvconstants->kUVToRB[0];
+ int ug = yuvconstants->kUVToG[0];
+ int vg = yuvconstants->kUVToG[1];
+ int vr = -yuvconstants->kUVToRB[1];
+ int bb = yuvconstants->kUVBiasBGR[0];
+ int bg = yuvconstants->kUVBiasBGR[1];
+ int br = yuvconstants->kUVBiasBGR[2];
+ int yg = yuvconstants->kYToRgb[0] / 0x0101;
+#elif defined(__arm__)
+ int ub = -yuvconstants->kUVToRB[0];
+ int ug = yuvconstants->kUVToG[0];
+ int vg = yuvconstants->kUVToG[4];
+ int vr = -yuvconstants->kUVToRB[4];
+ int bb = yuvconstants->kUVBiasBGR[0];
+ int bg = yuvconstants->kUVBiasBGR[1];
+ int br = yuvconstants->kUVBiasBGR[2];
+ int yg = yuvconstants->kYToRgb[0] / 0x0101;
+#else
+ int ub = yuvconstants->kUVToB[0];
+ int ug = yuvconstants->kUVToG[0];
+ int vg = yuvconstants->kUVToG[1];
+ int vr = yuvconstants->kUVToR[1];
+ int bb = yuvconstants->kUVBiasB[0];
+ int bg = yuvconstants->kUVBiasG[0];
+ int br = yuvconstants->kUVBiasR[0];
+ int yg = yuvconstants->kYToRgb[0];
+#endif
+
+ uint32_t y1 = (uint32_t)((y << 6) * yg) >> 16;
+ u = clamp255(u >> 2);
+ v = clamp255(v >> 2);
+ *b = (int)(-(u * ub) + y1 + bb);
+ *g = (int)(-(u * ug + v * vg) + y1 + bg);
+ *r = (int)(-(v * vr) + y1 + br);
+}
+
+// C reference code that mimics the YUV 10 bit assembly.
+// Reads 10 bit YUV and clamps down to 8 bit RGB.
+static __inline void YuvPixel10(uint16_t y,
+ uint16_t u,
+ uint16_t v,
+ uint8_t* b,
+ uint8_t* g,
+ uint8_t* r,
+ const struct YuvConstants* yuvconstants) {
+ int b16;
+ int g16;
+ int r16;
+ YuvPixel16(y, u, v, &b16, &g16, &r16, yuvconstants);
+ *b = Clamp(b16 >> 6);
+ *g = Clamp(g16 >> 6);
+ *r = Clamp(r16 >> 6);
+}
+
+// Y contribution to R,G,B. Scale and bias.
+#define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */
+#define YGB -1160 /* 1.164 * 64 * -16 + 64 / 2 */
+
+// C reference code that mimics the YUV assembly.
+static __inline void YPixel(uint8_t y, uint8_t* b, uint8_t* g, uint8_t* r) {
+ uint32_t y1 = (uint32_t)(y * 0x0101 * YG) >> 16;
+ *b = Clamp((int32_t)(y1 + YGB) >> 6);
+ *g = Clamp((int32_t)(y1 + YGB) >> 6);
+ *r = Clamp((int32_t)(y1 + YGB) >> 6);
+}
+
+#undef YG
+#undef YGB
+
+#if !defined(LIBYUV_DISABLE_NEON) && \
+ (defined(__ARM_NEON__) || defined(__aarch64__) || defined(LIBYUV_NEON))
+// C mimic assembly.
+// TODO(fbarchard): Remove subsampling from Neon.
+void I444ToARGBRow_C(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ int x;
+ for (x = 0; x < width - 1; x += 2) {
+ uint8_t u = (src_u[0] + src_u[1] + 1) >> 1;
+ uint8_t v = (src_v[0] + src_v[1] + 1) >> 1;
+ YuvPixel(src_y[0], u, v, rgb_buf + 0, rgb_buf + 1, rgb_buf + 2,
+ yuvconstants);
+ rgb_buf[3] = 255;
+ YuvPixel(src_y[1], u, v, rgb_buf + 4, rgb_buf + 5, rgb_buf + 6,
+ yuvconstants);
+ rgb_buf[7] = 255;
+ src_y += 2;
+ src_u += 2;
+ src_v += 2;
+ rgb_buf += 8; // Advance 2 pixels.
+ }
+ if (width & 1) {
+ YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
+ rgb_buf + 2, yuvconstants);
+ rgb_buf[3] = 255;
+ }
+}
+#else
+void I444ToARGBRow_C(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ int x;
+ for (x = 0; x < width; ++x) {
+ YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
+ rgb_buf + 2, yuvconstants);
+ rgb_buf[3] = 255;
+ src_y += 1;
+ src_u += 1;
+ src_v += 1;
+ rgb_buf += 4; // Advance 1 pixel.
+ }
+}
+#endif
+
+// Also used for 420
+void I422ToARGBRow_C(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ int x;
+ for (x = 0; x < width - 1; x += 2) {
+ YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
+ rgb_buf + 2, yuvconstants);
+ rgb_buf[3] = 255;
+ YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
+ rgb_buf + 6, yuvconstants);
+ rgb_buf[7] = 255;
+ src_y += 2;
+ src_u += 1;
+ src_v += 1;
+ rgb_buf += 8; // Advance 2 pixels.
+ }
+ if (width & 1) {
+ YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
+ rgb_buf + 2, yuvconstants);
+ rgb_buf[3] = 255;
+ }
+}
+
+// 10 bit YUV to ARGB
+void I210ToARGBRow_C(const uint16_t* src_y,
+ const uint16_t* src_u,
+ const uint16_t* src_v,
+ uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ int x;
+ for (x = 0; x < width - 1; x += 2) {
+ YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
+ rgb_buf + 2, yuvconstants);
+ rgb_buf[3] = 255;
+ YuvPixel10(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
+ rgb_buf + 6, yuvconstants);
+ rgb_buf[7] = 255;
+ src_y += 2;
+ src_u += 1;
+ src_v += 1;
+ rgb_buf += 8; // Advance 2 pixels.
+ }
+ if (width & 1) {
+ YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
+ rgb_buf + 2, yuvconstants);
+ rgb_buf[3] = 255;
+ }
+}
+
+static void StoreAR30(uint8_t* rgb_buf, int b, int g, int r) {
+ uint32_t ar30;
+ b = b >> 4; // convert 10.6 to 10 bit.
+ g = g >> 4;
+ r = r >> 4;
+ b = Clamp10(b);
+ g = Clamp10(g);
+ r = Clamp10(r);
+ ar30 = b | ((uint32_t)g << 10) | ((uint32_t)r << 20) | 0xc0000000;
+ (*(uint32_t*)rgb_buf) = ar30;
+}
+
+// 10 bit YUV to 10 bit AR30
+void I210ToAR30Row_C(const uint16_t* src_y,
+ const uint16_t* src_u,
+ const uint16_t* src_v,
+ uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ int x;
+ int b;
+ int g;
+ int r;
+ for (x = 0; x < width - 1; x += 2) {
+ YuvPixel16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
+ StoreAR30(rgb_buf, b, g, r);
+ YuvPixel16(src_y[1], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
+ StoreAR30(rgb_buf + 4, b, g, r);
+ src_y += 2;
+ src_u += 1;
+ src_v += 1;
+ rgb_buf += 8; // Advance 2 pixels.
+ }
+ if (width & 1) {
+ YuvPixel16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
+ StoreAR30(rgb_buf, b, g, r);
+ }
+}
+
+// 8 bit YUV to 10 bit AR30
+// Uses same code as 10 bit YUV bit shifts the 8 bit values up to 10 bits.
+void I422ToAR30Row_C(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ int x;
+ int b;
+ int g;
+ int r;
+ for (x = 0; x < width - 1; x += 2) {
+ YuvPixel8_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
+ StoreAR30(rgb_buf, b, g, r);
+ YuvPixel8_16(src_y[1], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
+ StoreAR30(rgb_buf + 4, b, g, r);
+ src_y += 2;
+ src_u += 1;
+ src_v += 1;
+ rgb_buf += 8; // Advance 2 pixels.
+ }
+ if (width & 1) {
+ YuvPixel8_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
+ StoreAR30(rgb_buf, b, g, r);
+ }
+}
+
+void I422AlphaToARGBRow_C(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ const uint8_t* src_a,
+ uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ int x;
+ for (x = 0; x < width - 1; x += 2) {
+ YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
+ rgb_buf + 2, yuvconstants);
+ rgb_buf[3] = src_a[0];
+ YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
+ rgb_buf + 6, yuvconstants);
+ rgb_buf[7] = src_a[1];
+ src_y += 2;
+ src_u += 1;
+ src_v += 1;
+ src_a += 2;
+ rgb_buf += 8; // Advance 2 pixels.
+ }
+ if (width & 1) {
+ YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
+ rgb_buf + 2, yuvconstants);
+ rgb_buf[3] = src_a[0];
+ }
+}
+
+void I422ToRGB24Row_C(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ int x;
+ for (x = 0; x < width - 1; x += 2) {
+ YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
+ rgb_buf + 2, yuvconstants);
+ YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 3, rgb_buf + 4,
+ rgb_buf + 5, yuvconstants);
+ src_y += 2;
+ src_u += 1;
+ src_v += 1;
+ rgb_buf += 6; // Advance 2 pixels.
+ }
+ if (width & 1) {
+ YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
+ rgb_buf + 2, yuvconstants);
+ }
+}
+
+void I422ToARGB4444Row_C(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb4444,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ uint8_t b0;
+ uint8_t g0;
+ uint8_t r0;
+ uint8_t b1;
+ uint8_t g1;
+ uint8_t r1;
+ int x;
+ for (x = 0; x < width - 1; x += 2) {
+ YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
+ YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants);
+ b0 = b0 >> 4;
+ g0 = g0 >> 4;
+ r0 = r0 >> 4;
+ b1 = b1 >> 4;
+ g1 = g1 >> 4;
+ r1 = r1 >> 4;
+ *(uint32_t*)(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) | (b1 << 16) |
+ (g1 << 20) | (r1 << 24) | 0xf000f000;
+ src_y += 2;
+ src_u += 1;
+ src_v += 1;
+ dst_argb4444 += 4; // Advance 2 pixels.
+ }
+ if (width & 1) {
+ YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
+ b0 = b0 >> 4;
+ g0 = g0 >> 4;
+ r0 = r0 >> 4;
+ *(uint16_t*)(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) | 0xf000;
+ }
+}
+
+void I422ToARGB1555Row_C(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb1555,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ uint8_t b0;
+ uint8_t g0;
+ uint8_t r0;
+ uint8_t b1;
+ uint8_t g1;
+ uint8_t r1;
+ int x;
+ for (x = 0; x < width - 1; x += 2) {
+ YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
+ YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants);
+ b0 = b0 >> 3;
+ g0 = g0 >> 3;
+ r0 = r0 >> 3;
+ b1 = b1 >> 3;
+ g1 = g1 >> 3;
+ r1 = r1 >> 3;
+ *(uint32_t*)(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) | (b1 << 16) |
+ (g1 << 21) | (r1 << 26) | 0x80008000;
+ src_y += 2;
+ src_u += 1;
+ src_v += 1;
+ dst_argb1555 += 4; // Advance 2 pixels.
+ }
+ if (width & 1) {
+ YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
+ b0 = b0 >> 3;
+ g0 = g0 >> 3;
+ r0 = r0 >> 3;
+ *(uint16_t*)(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) | 0x8000;
+ }
+}
+
+void I422ToRGB565Row_C(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_rgb565,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ uint8_t b0;
+ uint8_t g0;
+ uint8_t r0;
+ uint8_t b1;
+ uint8_t g1;
+ uint8_t r1;
+ int x;
+ for (x = 0; x < width - 1; x += 2) {
+ YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
+ YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants);
+ b0 = b0 >> 3;
+ g0 = g0 >> 2;
+ r0 = r0 >> 3;
+ b1 = b1 >> 3;
+ g1 = g1 >> 2;
+ r1 = r1 >> 3;
+ *(uint32_t*)(dst_rgb565) =
+ b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) | (r1 << 27);
+ src_y += 2;
+ src_u += 1;
+ src_v += 1;
+ dst_rgb565 += 4; // Advance 2 pixels.
+ }
+ if (width & 1) {
+ YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
+ b0 = b0 >> 3;
+ g0 = g0 >> 2;
+ r0 = r0 >> 3;
+ *(uint16_t*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11);
+ }
+}
+
+void NV12ToARGBRow_C(const uint8_t* src_y,
+ const uint8_t* src_uv,
+ uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ int x;
+ for (x = 0; x < width - 1; x += 2) {
+ YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
+ rgb_buf + 2, yuvconstants);
+ rgb_buf[3] = 255;
+ YuvPixel(src_y[1], src_uv[0], src_uv[1], rgb_buf + 4, rgb_buf + 5,
+ rgb_buf + 6, yuvconstants);
+ rgb_buf[7] = 255;
+ src_y += 2;
+ src_uv += 2;
+ rgb_buf += 8; // Advance 2 pixels.
+ }
+ if (width & 1) {
+ YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
+ rgb_buf + 2, yuvconstants);
+ rgb_buf[3] = 255;
+ }
+}
+
+void NV21ToARGBRow_C(const uint8_t* src_y,
+ const uint8_t* src_vu,
+ uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ int x;
+ for (x = 0; x < width - 1; x += 2) {
+ YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
+ rgb_buf + 2, yuvconstants);
+ rgb_buf[3] = 255;
+ YuvPixel(src_y[1], src_vu[1], src_vu[0], rgb_buf + 4, rgb_buf + 5,
+ rgb_buf + 6, yuvconstants);
+ rgb_buf[7] = 255;
+ src_y += 2;
+ src_vu += 2;
+ rgb_buf += 8; // Advance 2 pixels.
+ }
+ if (width & 1) {
+ YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
+ rgb_buf + 2, yuvconstants);
+ rgb_buf[3] = 255;
+ }
+}
+
+void NV12ToRGB24Row_C(const uint8_t* src_y,
+ const uint8_t* src_uv,
+ uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ int x;
+ for (x = 0; x < width - 1; x += 2) {
+ YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
+ rgb_buf + 2, yuvconstants);
+ YuvPixel(src_y[1], src_uv[0], src_uv[1], rgb_buf + 3, rgb_buf + 4,
+ rgb_buf + 5, yuvconstants);
+ src_y += 2;
+ src_uv += 2;
+ rgb_buf += 6; // Advance 2 pixels.
+ }
+ if (width & 1) {
+ YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
+ rgb_buf + 2, yuvconstants);
+ }
+}
+
+void NV21ToRGB24Row_C(const uint8_t* src_y,
+ const uint8_t* src_vu,
+ uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ int x;
+ for (x = 0; x < width - 1; x += 2) {
+ YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
+ rgb_buf + 2, yuvconstants);
+ YuvPixel(src_y[1], src_vu[1], src_vu[0], rgb_buf + 3, rgb_buf + 4,
+ rgb_buf + 5, yuvconstants);
+ src_y += 2;
+ src_vu += 2;
+ rgb_buf += 6; // Advance 2 pixels.
+ }
+ if (width & 1) {
+ YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
+ rgb_buf + 2, yuvconstants);
+ }
+}
+
+void NV12ToRGB565Row_C(const uint8_t* src_y,
+ const uint8_t* src_uv,
+ uint8_t* dst_rgb565,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ uint8_t b0;
+ uint8_t g0;
+ uint8_t r0;
+ uint8_t b1;
+ uint8_t g1;
+ uint8_t r1;
+ int x;
+ for (x = 0; x < width - 1; x += 2) {
+ YuvPixel(src_y[0], src_uv[0], src_uv[1], &b0, &g0, &r0, yuvconstants);
+ YuvPixel(src_y[1], src_uv[0], src_uv[1], &b1, &g1, &r1, yuvconstants);
+ b0 = b0 >> 3;
+ g0 = g0 >> 2;
+ r0 = r0 >> 3;
+ b1 = b1 >> 3;
+ g1 = g1 >> 2;
+ r1 = r1 >> 3;
+ *(uint32_t*)(dst_rgb565) =
+ b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) | (r1 << 27);
+ src_y += 2;
+ src_uv += 2;
+ dst_rgb565 += 4; // Advance 2 pixels.
+ }
+ if (width & 1) {
+ YuvPixel(src_y[0], src_uv[0], src_uv[1], &b0, &g0, &r0, yuvconstants);
+ b0 = b0 >> 3;
+ g0 = g0 >> 2;
+ r0 = r0 >> 3;
+ *(uint16_t*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11);
+ }
+}
+
+void YUY2ToARGBRow_C(const uint8_t* src_yuy2,
+ uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ int x;
+ for (x = 0; x < width - 1; x += 2) {
+ YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3], rgb_buf + 0, rgb_buf + 1,
+ rgb_buf + 2, yuvconstants);
+ rgb_buf[3] = 255;
+ YuvPixel(src_yuy2[2], src_yuy2[1], src_yuy2[3], rgb_buf + 4, rgb_buf + 5,
+ rgb_buf + 6, yuvconstants);
+ rgb_buf[7] = 255;
+ src_yuy2 += 4;
+ rgb_buf += 8; // Advance 2 pixels.
+ }
+ if (width & 1) {
+ YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3], rgb_buf + 0, rgb_buf + 1,
+ rgb_buf + 2, yuvconstants);
+ rgb_buf[3] = 255;
+ }
+}
+
+void UYVYToARGBRow_C(const uint8_t* src_uyvy,
+ uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ int x;
+ for (x = 0; x < width - 1; x += 2) {
+ YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2], rgb_buf + 0, rgb_buf + 1,
+ rgb_buf + 2, yuvconstants);
+ rgb_buf[3] = 255;
+ YuvPixel(src_uyvy[3], src_uyvy[0], src_uyvy[2], rgb_buf + 4, rgb_buf + 5,
+ rgb_buf + 6, yuvconstants);
+ rgb_buf[7] = 255;
+ src_uyvy += 4;
+ rgb_buf += 8; // Advance 2 pixels.
+ }
+ if (width & 1) {
+ YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2], rgb_buf + 0, rgb_buf + 1,
+ rgb_buf + 2, yuvconstants);
+ rgb_buf[3] = 255;
+ }
+}
+
+void I422ToRGBARow_C(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ int x;
+ for (x = 0; x < width - 1; x += 2) {
+ YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 1, rgb_buf + 2,
+ rgb_buf + 3, yuvconstants);
+ rgb_buf[0] = 255;
+ YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 5, rgb_buf + 6,
+ rgb_buf + 7, yuvconstants);
+ rgb_buf[4] = 255;
+ src_y += 2;
+ src_u += 1;
+ src_v += 1;
+ rgb_buf += 8; // Advance 2 pixels.
+ }
+ if (width & 1) {
+ YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 1, rgb_buf + 2,
+ rgb_buf + 3, yuvconstants);
+ rgb_buf[0] = 255;
+ }
+}
+
+void I400ToARGBRow_C(const uint8_t* src_y, uint8_t* rgb_buf, int width) {
+ int x;
+ for (x = 0; x < width - 1; x += 2) {
+ YPixel(src_y[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
+ rgb_buf[3] = 255;
+ YPixel(src_y[1], rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
+ rgb_buf[7] = 255;
+ src_y += 2;
+ rgb_buf += 8; // Advance 2 pixels.
+ }
+ if (width & 1) {
+ YPixel(src_y[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
+ rgb_buf[3] = 255;
+ }
+}
+
+void MirrorRow_C(const uint8_t* src, uint8_t* dst, int width) {
+ int x;
+ src += width - 1;
+ for (x = 0; x < width - 1; x += 2) {
+ dst[x] = src[0];
+ dst[x + 1] = src[-1];
+ src -= 2;
+ }
+ if (width & 1) {
+ dst[width - 1] = src[0];
+ }
+}
+
+void MirrorUVRow_C(const uint8_t* src_uv,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ int x;
+ src_uv += (width - 1) << 1;
+ for (x = 0; x < width - 1; x += 2) {
+ dst_u[x] = src_uv[0];
+ dst_u[x + 1] = src_uv[-2];
+ dst_v[x] = src_uv[1];
+ dst_v[x + 1] = src_uv[-2 + 1];
+ src_uv -= 4;
+ }
+ if (width & 1) {
+ dst_u[width - 1] = src_uv[0];
+ dst_v[width - 1] = src_uv[1];
+ }
+}
+
+void ARGBMirrorRow_C(const uint8_t* src, uint8_t* dst, int width) {
+ int x;
+ const uint32_t* src32 = (const uint32_t*)(src);
+ uint32_t* dst32 = (uint32_t*)(dst);
+ src32 += width - 1;
+ for (x = 0; x < width - 1; x += 2) {
+ dst32[x] = src32[0];
+ dst32[x + 1] = src32[-1];
+ src32 -= 2;
+ }
+ if (width & 1) {
+ dst32[width - 1] = src32[0];
+ }
+}
+
+void SplitUVRow_C(const uint8_t* src_uv,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ int x;
+ for (x = 0; x < width - 1; x += 2) {
+ dst_u[x] = src_uv[0];
+ dst_u[x + 1] = src_uv[2];
+ dst_v[x] = src_uv[1];
+ dst_v[x + 1] = src_uv[3];
+ src_uv += 4;
+ }
+ if (width & 1) {
+ dst_u[width - 1] = src_uv[0];
+ dst_v[width - 1] = src_uv[1];
+ }
+}
+
+void MergeUVRow_C(const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_uv,
+ int width) {
+ int x;
+ for (x = 0; x < width - 1; x += 2) {
+ dst_uv[0] = src_u[x];
+ dst_uv[1] = src_v[x];
+ dst_uv[2] = src_u[x + 1];
+ dst_uv[3] = src_v[x + 1];
+ dst_uv += 4;
+ }
+ if (width & 1) {
+ dst_uv[0] = src_u[width - 1];
+ dst_uv[1] = src_v[width - 1];
+ }
+}
+
+void SplitRGBRow_C(const uint8_t* src_rgb,
+ uint8_t* dst_r,
+ uint8_t* dst_g,
+ uint8_t* dst_b,
+ int width) {
+ int x;
+ for (x = 0; x < width; ++x) {
+ dst_r[x] = src_rgb[0];
+ dst_g[x] = src_rgb[1];
+ dst_b[x] = src_rgb[2];
+ src_rgb += 3;
+ }
+}
+
+void MergeRGBRow_C(const uint8_t* src_r,
+ const uint8_t* src_g,
+ const uint8_t* src_b,
+ uint8_t* dst_rgb,
+ int width) {
+ int x;
+ for (x = 0; x < width; ++x) {
+ dst_rgb[0] = src_r[x];
+ dst_rgb[1] = src_g[x];
+ dst_rgb[2] = src_b[x];
+ dst_rgb += 3;
+ }
+}
+
+// Use scale to convert lsb formats to msb, depending how many bits there are:
+// 128 = 9 bits
+// 64 = 10 bits
+// 16 = 12 bits
+// 1 = 16 bits
+void MergeUVRow_16_C(const uint16_t* src_u,
+ const uint16_t* src_v,
+ uint16_t* dst_uv,
+ int scale,
+ int width) {
+ int x;
+ for (x = 0; x < width - 1; x += 2) {
+ dst_uv[0] = src_u[x] * scale;
+ dst_uv[1] = src_v[x] * scale;
+ dst_uv[2] = src_u[x + 1] * scale;
+ dst_uv[3] = src_v[x + 1] * scale;
+ dst_uv += 4;
+ }
+ if (width & 1) {
+ dst_uv[0] = src_u[width - 1] * scale;
+ dst_uv[1] = src_v[width - 1] * scale;
+ }
+}
+
+void MultiplyRow_16_C(const uint16_t* src_y,
+ uint16_t* dst_y,
+ int scale,
+ int width) {
+ int x;
+ for (x = 0; x < width; ++x) {
+ dst_y[x] = src_y[x] * scale;
+ }
+}
+
+// Use scale to convert lsb formats to msb, depending how many bits there are:
+// 32768 = 9 bits
+// 16384 = 10 bits
+// 4096 = 12 bits
+// 256 = 16 bits
+void Convert16To8Row_C(const uint16_t* src_y,
+ uint8_t* dst_y,
+ int scale,
+ int width) {
+ int x;
+ for (x = 0; x < width; ++x) {
+ dst_y[x] = clamp255((src_y[x] * scale) >> 16);
+ }
+}
+
+// Use scale to convert lsb formats to msb, depending how many bits there are:
+// 1024 = 10 bits
+void Convert8To16Row_C(const uint8_t* src_y,
+ uint16_t* dst_y,
+ int scale,
+ int width) {
+ int x;
+ scale *= 0x0101; // replicates the byte.
+ for (x = 0; x < width; ++x) {
+ dst_y[x] = (src_y[x] * scale) >> 16;
+ }
+}
+
+void CopyRow_C(const uint8_t* src, uint8_t* dst, int count) {
+ memcpy(dst, src, count);
+}
+
+void CopyRow_16_C(const uint16_t* src, uint16_t* dst, int count) {
+ memcpy(dst, src, count * 2);
+}
+
+void SetRow_C(uint8_t* dst, uint8_t v8, int width) {
+ memset(dst, v8, width);
+}
+
+void ARGBSetRow_C(uint8_t* dst_argb, uint32_t v32, int width) {
+ uint32_t* d = (uint32_t*)(dst_argb);
+ int x;
+ for (x = 0; x < width; ++x) {
+ d[x] = v32;
+ }
+}
+
+// Filter 2 rows of YUY2 UV's (422) into U and V (420).
+void YUY2ToUVRow_C(const uint8_t* src_yuy2,
+ int src_stride_yuy2,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ // Output a row of UV values, filtering 2 rows of YUY2.
+ int x;
+ for (x = 0; x < width; x += 2) {
+ dst_u[0] = (src_yuy2[1] + src_yuy2[src_stride_yuy2 + 1] + 1) >> 1;
+ dst_v[0] = (src_yuy2[3] + src_yuy2[src_stride_yuy2 + 3] + 1) >> 1;
+ src_yuy2 += 4;
+ dst_u += 1;
+ dst_v += 1;
+ }
+}
+
+// Copy row of YUY2 UV's (422) into U and V (422).
+void YUY2ToUV422Row_C(const uint8_t* src_yuy2,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ // Output a row of UV values.
+ int x;
+ for (x = 0; x < width; x += 2) {
+ dst_u[0] = src_yuy2[1];
+ dst_v[0] = src_yuy2[3];
+ src_yuy2 += 4;
+ dst_u += 1;
+ dst_v += 1;
+ }
+}
+
+// Copy row of YUY2 Y's (422) into Y (420/422).
+void YUY2ToYRow_C(const uint8_t* src_yuy2, uint8_t* dst_y, int width) {
+ // Output a row of Y values.
+ int x;
+ for (x = 0; x < width - 1; x += 2) {
+ dst_y[x] = src_yuy2[0];
+ dst_y[x + 1] = src_yuy2[2];
+ src_yuy2 += 4;
+ }
+ if (width & 1) {
+ dst_y[width - 1] = src_yuy2[0];
+ }
+}
+
+// Filter 2 rows of UYVY UV's (422) into U and V (420).
+void UYVYToUVRow_C(const uint8_t* src_uyvy,
+ int src_stride_uyvy,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ // Output a row of UV values.
+ int x;
+ for (x = 0; x < width; x += 2) {
+ dst_u[0] = (src_uyvy[0] + src_uyvy[src_stride_uyvy + 0] + 1) >> 1;
+ dst_v[0] = (src_uyvy[2] + src_uyvy[src_stride_uyvy + 2] + 1) >> 1;
+ src_uyvy += 4;
+ dst_u += 1;
+ dst_v += 1;
+ }
+}
+
+// Copy row of UYVY UV's (422) into U and V (422).
+void UYVYToUV422Row_C(const uint8_t* src_uyvy,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ // Output a row of UV values.
+ int x;
+ for (x = 0; x < width; x += 2) {
+ dst_u[0] = src_uyvy[0];
+ dst_v[0] = src_uyvy[2];
+ src_uyvy += 4;
+ dst_u += 1;
+ dst_v += 1;
+ }
+}
+
+// Copy row of UYVY Y's (422) into Y (420/422).
+void UYVYToYRow_C(const uint8_t* src_uyvy, uint8_t* dst_y, int width) {
+ // Output a row of Y values.
+ int x;
+ for (x = 0; x < width - 1; x += 2) {
+ dst_y[x] = src_uyvy[1];
+ dst_y[x + 1] = src_uyvy[3];
+ src_uyvy += 4;
+ }
+ if (width & 1) {
+ dst_y[width - 1] = src_uyvy[1];
+ }
+}
+
+#define BLEND(f, b, a) (((256 - a) * b) >> 8) + f
+
+// Blend src_argb0 over src_argb1 and store to dst_argb.
+// dst_argb may be src_argb0 or src_argb1.
+// This code mimics the SSSE3 version for better testability.
+void ARGBBlendRow_C(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
+ int width) {
+ int x;
+ for (x = 0; x < width - 1; x += 2) {
+ uint32_t fb = src_argb0[0];
+ uint32_t fg = src_argb0[1];
+ uint32_t fr = src_argb0[2];
+ uint32_t a = src_argb0[3];
+ uint32_t bb = src_argb1[0];
+ uint32_t bg = src_argb1[1];
+ uint32_t br = src_argb1[2];
+ dst_argb[0] = BLEND(fb, bb, a);
+ dst_argb[1] = BLEND(fg, bg, a);
+ dst_argb[2] = BLEND(fr, br, a);
+ dst_argb[3] = 255u;
+
+ fb = src_argb0[4 + 0];
+ fg = src_argb0[4 + 1];
+ fr = src_argb0[4 + 2];
+ a = src_argb0[4 + 3];
+ bb = src_argb1[4 + 0];
+ bg = src_argb1[4 + 1];
+ br = src_argb1[4 + 2];
+ dst_argb[4 + 0] = BLEND(fb, bb, a);
+ dst_argb[4 + 1] = BLEND(fg, bg, a);
+ dst_argb[4 + 2] = BLEND(fr, br, a);
+ dst_argb[4 + 3] = 255u;
+ src_argb0 += 8;
+ src_argb1 += 8;
+ dst_argb += 8;
+ }
+
+ if (width & 1) {
+ uint32_t fb = src_argb0[0];
+ uint32_t fg = src_argb0[1];
+ uint32_t fr = src_argb0[2];
+ uint32_t a = src_argb0[3];
+ uint32_t bb = src_argb1[0];
+ uint32_t bg = src_argb1[1];
+ uint32_t br = src_argb1[2];
+ dst_argb[0] = BLEND(fb, bb, a);
+ dst_argb[1] = BLEND(fg, bg, a);
+ dst_argb[2] = BLEND(fr, br, a);
+ dst_argb[3] = 255u;
+ }
+}
+#undef BLEND
+
+#define UBLEND(f, b, a) (((a)*f) + ((255 - a) * b) + 255) >> 8
+void BlendPlaneRow_C(const uint8_t* src0,
+ const uint8_t* src1,
+ const uint8_t* alpha,
+ uint8_t* dst,
+ int width) {
+ int x;
+ for (x = 0; x < width - 1; x += 2) {
+ dst[0] = UBLEND(src0[0], src1[0], alpha[0]);
+ dst[1] = UBLEND(src0[1], src1[1], alpha[1]);
+ src0 += 2;
+ src1 += 2;
+ alpha += 2;
+ dst += 2;
+ }
+ if (width & 1) {
+ dst[0] = UBLEND(src0[0], src1[0], alpha[0]);
+ }
+}
+#undef UBLEND
+
+#define ATTENUATE(f, a) (a | (a << 8)) * (f | (f << 8)) >> 24
+
+// Multiply source RGB by alpha and store to destination.
+// This code mimics the SSSE3 version for better testability.
+void ARGBAttenuateRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width) {
+ int i;
+ for (i = 0; i < width - 1; i += 2) {
+ uint32_t b = src_argb[0];
+ uint32_t g = src_argb[1];
+ uint32_t r = src_argb[2];
+ uint32_t a = src_argb[3];
+ dst_argb[0] = ATTENUATE(b, a);
+ dst_argb[1] = ATTENUATE(g, a);
+ dst_argb[2] = ATTENUATE(r, a);
+ dst_argb[3] = a;
+ b = src_argb[4];
+ g = src_argb[5];
+ r = src_argb[6];
+ a = src_argb[7];
+ dst_argb[4] = ATTENUATE(b, a);
+ dst_argb[5] = ATTENUATE(g, a);
+ dst_argb[6] = ATTENUATE(r, a);
+ dst_argb[7] = a;
+ src_argb += 8;
+ dst_argb += 8;
+ }
+
+ if (width & 1) {
+ const uint32_t b = src_argb[0];
+ const uint32_t g = src_argb[1];
+ const uint32_t r = src_argb[2];
+ const uint32_t a = src_argb[3];
+ dst_argb[0] = ATTENUATE(b, a);
+ dst_argb[1] = ATTENUATE(g, a);
+ dst_argb[2] = ATTENUATE(r, a);
+ dst_argb[3] = a;
+ }
+}
+#undef ATTENUATE
+
+// Divide source RGB by alpha and store to destination.
+// b = (b * 255 + (a / 2)) / a;
+// g = (g * 255 + (a / 2)) / a;
+// r = (r * 255 + (a / 2)) / a;
+// Reciprocal method is off by 1 on some values. ie 125
+// 8.8 fixed point inverse table with 1.0 in upper short and 1 / a in lower.
+#define T(a) 0x01000000 + (0x10000 / a)
+const uint32_t fixed_invtbl8[256] = {
+ 0x01000000, 0x0100ffff, T(0x02), T(0x03), T(0x04), T(0x05), T(0x06),
+ T(0x07), T(0x08), T(0x09), T(0x0a), T(0x0b), T(0x0c), T(0x0d),
+ T(0x0e), T(0x0f), T(0x10), T(0x11), T(0x12), T(0x13), T(0x14),
+ T(0x15), T(0x16), T(0x17), T(0x18), T(0x19), T(0x1a), T(0x1b),
+ T(0x1c), T(0x1d), T(0x1e), T(0x1f), T(0x20), T(0x21), T(0x22),
+ T(0x23), T(0x24), T(0x25), T(0x26), T(0x27), T(0x28), T(0x29),
+ T(0x2a), T(0x2b), T(0x2c), T(0x2d), T(0x2e), T(0x2f), T(0x30),
+ T(0x31), T(0x32), T(0x33), T(0x34), T(0x35), T(0x36), T(0x37),
+ T(0x38), T(0x39), T(0x3a), T(0x3b), T(0x3c), T(0x3d), T(0x3e),
+ T(0x3f), T(0x40), T(0x41), T(0x42), T(0x43), T(0x44), T(0x45),
+ T(0x46), T(0x47), T(0x48), T(0x49), T(0x4a), T(0x4b), T(0x4c),
+ T(0x4d), T(0x4e), T(0x4f), T(0x50), T(0x51), T(0x52), T(0x53),
+ T(0x54), T(0x55), T(0x56), T(0x57), T(0x58), T(0x59), T(0x5a),
+ T(0x5b), T(0x5c), T(0x5d), T(0x5e), T(0x5f), T(0x60), T(0x61),
+ T(0x62), T(0x63), T(0x64), T(0x65), T(0x66), T(0x67), T(0x68),
+ T(0x69), T(0x6a), T(0x6b), T(0x6c), T(0x6d), T(0x6e), T(0x6f),
+ T(0x70), T(0x71), T(0x72), T(0x73), T(0x74), T(0x75), T(0x76),
+ T(0x77), T(0x78), T(0x79), T(0x7a), T(0x7b), T(0x7c), T(0x7d),
+ T(0x7e), T(0x7f), T(0x80), T(0x81), T(0x82), T(0x83), T(0x84),
+ T(0x85), T(0x86), T(0x87), T(0x88), T(0x89), T(0x8a), T(0x8b),
+ T(0x8c), T(0x8d), T(0x8e), T(0x8f), T(0x90), T(0x91), T(0x92),
+ T(0x93), T(0x94), T(0x95), T(0x96), T(0x97), T(0x98), T(0x99),
+ T(0x9a), T(0x9b), T(0x9c), T(0x9d), T(0x9e), T(0x9f), T(0xa0),
+ T(0xa1), T(0xa2), T(0xa3), T(0xa4), T(0xa5), T(0xa6), T(0xa7),
+ T(0xa8), T(0xa9), T(0xaa), T(0xab), T(0xac), T(0xad), T(0xae),
+ T(0xaf), T(0xb0), T(0xb1), T(0xb2), T(0xb3), T(0xb4), T(0xb5),
+ T(0xb6), T(0xb7), T(0xb8), T(0xb9), T(0xba), T(0xbb), T(0xbc),
+ T(0xbd), T(0xbe), T(0xbf), T(0xc0), T(0xc1), T(0xc2), T(0xc3),
+ T(0xc4), T(0xc5), T(0xc6), T(0xc7), T(0xc8), T(0xc9), T(0xca),
+ T(0xcb), T(0xcc), T(0xcd), T(0xce), T(0xcf), T(0xd0), T(0xd1),
+ T(0xd2), T(0xd3), T(0xd4), T(0xd5), T(0xd6), T(0xd7), T(0xd8),
+ T(0xd9), T(0xda), T(0xdb), T(0xdc), T(0xdd), T(0xde), T(0xdf),
+ T(0xe0), T(0xe1), T(0xe2), T(0xe3), T(0xe4), T(0xe5), T(0xe6),
+ T(0xe7), T(0xe8), T(0xe9), T(0xea), T(0xeb), T(0xec), T(0xed),
+ T(0xee), T(0xef), T(0xf0), T(0xf1), T(0xf2), T(0xf3), T(0xf4),
+ T(0xf5), T(0xf6), T(0xf7), T(0xf8), T(0xf9), T(0xfa), T(0xfb),
+ T(0xfc), T(0xfd), T(0xfe), 0x01000100};
+#undef T
+
+void ARGBUnattenuateRow_C(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ int width) {
+ int i;
+ for (i = 0; i < width; ++i) {
+ uint32_t b = src_argb[0];
+ uint32_t g = src_argb[1];
+ uint32_t r = src_argb[2];
+ const uint32_t a = src_argb[3];
+ const uint32_t ia = fixed_invtbl8[a] & 0xffff; // 8.8 fixed point
+ b = (b * ia) >> 8;
+ g = (g * ia) >> 8;
+ r = (r * ia) >> 8;
+ // Clamping should not be necessary but is free in assembly.
+ dst_argb[0] = clamp255(b);
+ dst_argb[1] = clamp255(g);
+ dst_argb[2] = clamp255(r);
+ dst_argb[3] = a;
+ src_argb += 4;
+ dst_argb += 4;
+ }
+}
+
+void ComputeCumulativeSumRow_C(const uint8_t* row,
+ int32_t* cumsum,
+ const int32_t* previous_cumsum,
+ int width) {
+ int32_t row_sum[4] = {0, 0, 0, 0};
+ int x;
+ for (x = 0; x < width; ++x) {
+ row_sum[0] += row[x * 4 + 0];
+ row_sum[1] += row[x * 4 + 1];
+ row_sum[2] += row[x * 4 + 2];
+ row_sum[3] += row[x * 4 + 3];
+ cumsum[x * 4 + 0] = row_sum[0] + previous_cumsum[x * 4 + 0];
+ cumsum[x * 4 + 1] = row_sum[1] + previous_cumsum[x * 4 + 1];
+ cumsum[x * 4 + 2] = row_sum[2] + previous_cumsum[x * 4 + 2];
+ cumsum[x * 4 + 3] = row_sum[3] + previous_cumsum[x * 4 + 3];
+ }
+}
+
+void CumulativeSumToAverageRow_C(const int32_t* tl,
+ const int32_t* bl,
+ int w,
+ int area,
+ uint8_t* dst,
+ int count) {
+ float ooa = 1.0f / area;
+ int i;
+ for (i = 0; i < count; ++i) {
+ dst[0] = (uint8_t)((bl[w + 0] + tl[0] - bl[0] - tl[w + 0]) * ooa);
+ dst[1] = (uint8_t)((bl[w + 1] + tl[1] - bl[1] - tl[w + 1]) * ooa);
+ dst[2] = (uint8_t)((bl[w + 2] + tl[2] - bl[2] - tl[w + 2]) * ooa);
+ dst[3] = (uint8_t)((bl[w + 3] + tl[3] - bl[3] - tl[w + 3]) * ooa);
+ dst += 4;
+ tl += 4;
+ bl += 4;
+ }
+}
+
+// Copy pixels from rotated source to destination row with a slope.
+LIBYUV_API
+void ARGBAffineRow_C(const uint8_t* src_argb,
+ int src_argb_stride,
+ uint8_t* dst_argb,
+ const float* uv_dudv,
+ int width) {
+ int i;
+ // Render a row of pixels from source into a buffer.
+ float uv[2];
+ uv[0] = uv_dudv[0];
+ uv[1] = uv_dudv[1];
+ for (i = 0; i < width; ++i) {
+ int x = (int)(uv[0]);
+ int y = (int)(uv[1]);
+ *(uint32_t*)(dst_argb) =
+ *(const uint32_t*)(src_argb + y * src_argb_stride + x * 4);
+ dst_argb += 4;
+ uv[0] += uv_dudv[2];
+ uv[1] += uv_dudv[3];
+ }
+}
+
+// Blend 2 rows into 1.
+static void HalfRow_C(const uint8_t* src_uv,
+ ptrdiff_t src_uv_stride,
+ uint8_t* dst_uv,
+ int width) {
+ int x;
+ for (x = 0; x < width; ++x) {
+ dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1;
+ }
+}
+
+static void HalfRow_16_C(const uint16_t* src_uv,
+ ptrdiff_t src_uv_stride,
+ uint16_t* dst_uv,
+ int width) {
+ int x;
+ for (x = 0; x < width; ++x) {
+ dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1;
+ }
+}
+
+// C version 2x2 -> 2x1.
+void InterpolateRow_C(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ int width,
+ int source_y_fraction) {
+ int y1_fraction = source_y_fraction;
+ int y0_fraction = 256 - y1_fraction;
+ const uint8_t* src_ptr1 = src_ptr + src_stride;
+ int x;
+ if (y1_fraction == 0) {
+ memcpy(dst_ptr, src_ptr, width);
+ return;
+ }
+ if (y1_fraction == 128) {
+ HalfRow_C(src_ptr, src_stride, dst_ptr, width);
+ return;
+ }
+ for (x = 0; x < width - 1; x += 2) {
+ dst_ptr[0] =
+ (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8;
+ dst_ptr[1] =
+ (src_ptr[1] * y0_fraction + src_ptr1[1] * y1_fraction + 128) >> 8;
+ src_ptr += 2;
+ src_ptr1 += 2;
+ dst_ptr += 2;
+ }
+ if (width & 1) {
+ dst_ptr[0] =
+ (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8;
+ }
+}
+
+void InterpolateRow_16_C(uint16_t* dst_ptr,
+ const uint16_t* src_ptr,
+ ptrdiff_t src_stride,
+ int width,
+ int source_y_fraction) {
+ int y1_fraction = source_y_fraction;
+ int y0_fraction = 256 - y1_fraction;
+ const uint16_t* src_ptr1 = src_ptr + src_stride;
+ int x;
+ if (source_y_fraction == 0) {
+ memcpy(dst_ptr, src_ptr, width * 2);
+ return;
+ }
+ if (source_y_fraction == 128) {
+ HalfRow_16_C(src_ptr, src_stride, dst_ptr, width);
+ return;
+ }
+ for (x = 0; x < width - 1; x += 2) {
+ dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8;
+ dst_ptr[1] = (src_ptr[1] * y0_fraction + src_ptr1[1] * y1_fraction) >> 8;
+ src_ptr += 2;
+ src_ptr1 += 2;
+ dst_ptr += 2;
+ }
+ if (width & 1) {
+ dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8;
+ }
+}
+
+// Use first 4 shuffler values to reorder ARGB channels.
+void ARGBShuffleRow_C(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ const uint8_t* shuffler,
+ int width) {
+ int index0 = shuffler[0];
+ int index1 = shuffler[1];
+ int index2 = shuffler[2];
+ int index3 = shuffler[3];
+ // Shuffle a row of ARGB.
+ int x;
+ for (x = 0; x < width; ++x) {
+ // To support in-place conversion.
+ uint8_t b = src_argb[index0];
+ uint8_t g = src_argb[index1];
+ uint8_t r = src_argb[index2];
+ uint8_t a = src_argb[index3];
+ dst_argb[0] = b;
+ dst_argb[1] = g;
+ dst_argb[2] = r;
+ dst_argb[3] = a;
+ src_argb += 4;
+ dst_argb += 4;
+ }
+}
+
+void I422ToYUY2Row_C(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_frame,
+ int width) {
+ int x;
+ for (x = 0; x < width - 1; x += 2) {
+ dst_frame[0] = src_y[0];
+ dst_frame[1] = src_u[0];
+ dst_frame[2] = src_y[1];
+ dst_frame[3] = src_v[0];
+ dst_frame += 4;
+ src_y += 2;
+ src_u += 1;
+ src_v += 1;
+ }
+ if (width & 1) {
+ dst_frame[0] = src_y[0];
+ dst_frame[1] = src_u[0];
+ dst_frame[2] = 0;
+ dst_frame[3] = src_v[0];
+ }
+}
+
+void I422ToUYVYRow_C(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_frame,
+ int width) {
+ int x;
+ for (x = 0; x < width - 1; x += 2) {
+ dst_frame[0] = src_u[0];
+ dst_frame[1] = src_y[0];
+ dst_frame[2] = src_v[0];
+ dst_frame[3] = src_y[1];
+ dst_frame += 4;
+ src_y += 2;
+ src_u += 1;
+ src_v += 1;
+ }
+ if (width & 1) {
+ dst_frame[0] = src_u[0];
+ dst_frame[1] = src_y[0];
+ dst_frame[2] = src_v[0];
+ dst_frame[3] = 0;
+ }
+}
+
+void ARGBPolynomialRow_C(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ const float* poly,
+ int width) {
+ int i;
+ for (i = 0; i < width; ++i) {
+ float b = (float)(src_argb[0]);
+ float g = (float)(src_argb[1]);
+ float r = (float)(src_argb[2]);
+ float a = (float)(src_argb[3]);
+ float b2 = b * b;
+ float g2 = g * g;
+ float r2 = r * r;
+ float a2 = a * a;
+ float db = poly[0] + poly[4] * b;
+ float dg = poly[1] + poly[5] * g;
+ float dr = poly[2] + poly[6] * r;
+ float da = poly[3] + poly[7] * a;
+ float b3 = b2 * b;
+ float g3 = g2 * g;
+ float r3 = r2 * r;
+ float a3 = a2 * a;
+ db += poly[8] * b2;
+ dg += poly[9] * g2;
+ dr += poly[10] * r2;
+ da += poly[11] * a2;
+ db += poly[12] * b3;
+ dg += poly[13] * g3;
+ dr += poly[14] * r3;
+ da += poly[15] * a3;
+
+ dst_argb[0] = Clamp((int32_t)(db));
+ dst_argb[1] = Clamp((int32_t)(dg));
+ dst_argb[2] = Clamp((int32_t)(dr));
+ dst_argb[3] = Clamp((int32_t)(da));
+ src_argb += 4;
+ dst_argb += 4;
+ }
+}
+
+// Samples assumed to be unsigned in low 9, 10 or 12 bits. Scale factor
+// adjust the source integer range to the half float range desired.
+
+// This magic constant is 2^-112. Multiplying by this
+// is the same as subtracting 112 from the exponent, which
+// is the difference in exponent bias between 32-bit and
+// 16-bit floats. Once we've done this subtraction, we can
+// simply extract the low bits of the exponent and the high
+// bits of the mantissa from our float and we're done.
+
+// Work around GCC 7 punning warning -Wstrict-aliasing
+#if defined(__GNUC__)
+typedef uint32_t __attribute__((__may_alias__)) uint32_alias_t;
+#else
+typedef uint32_t uint32_alias_t;
+#endif
+
+void HalfFloatRow_C(const uint16_t* src,
+ uint16_t* dst,
+ float scale,
+ int width) {
+ int i;
+ float mult = 1.9259299444e-34f * scale;
+ for (i = 0; i < width; ++i) {
+ float value = src[i] * mult;
+ dst[i] = (uint16_t)((*(const uint32_alias_t*)&value) >> 13);
+ }
+}
+
+void ByteToFloatRow_C(const uint8_t* src, float* dst, float scale, int width) {
+ int i;
+ for (i = 0; i < width; ++i) {
+ float value = src[i] * scale;
+ dst[i] = value;
+ }
+}
+
+void ARGBLumaColorTableRow_C(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ int width,
+ const uint8_t* luma,
+ uint32_t lumacoeff) {
+ uint32_t bc = lumacoeff & 0xff;
+ uint32_t gc = (lumacoeff >> 8) & 0xff;
+ uint32_t rc = (lumacoeff >> 16) & 0xff;
+
+ int i;
+ for (i = 0; i < width - 1; i += 2) {
+ // Luminance in rows, color values in columns.
+ const uint8_t* luma0 =
+ ((src_argb[0] * bc + src_argb[1] * gc + src_argb[2] * rc) & 0x7F00u) +
+ luma;
+ const uint8_t* luma1;
+ dst_argb[0] = luma0[src_argb[0]];
+ dst_argb[1] = luma0[src_argb[1]];
+ dst_argb[2] = luma0[src_argb[2]];
+ dst_argb[3] = src_argb[3];
+ luma1 =
+ ((src_argb[4] * bc + src_argb[5] * gc + src_argb[6] * rc) & 0x7F00u) +
+ luma;
+ dst_argb[4] = luma1[src_argb[4]];
+ dst_argb[5] = luma1[src_argb[5]];
+ dst_argb[6] = luma1[src_argb[6]];
+ dst_argb[7] = src_argb[7];
+ src_argb += 8;
+ dst_argb += 8;
+ }
+ if (width & 1) {
+ // Luminance in rows, color values in columns.
+ const uint8_t* luma0 =
+ ((src_argb[0] * bc + src_argb[1] * gc + src_argb[2] * rc) & 0x7F00u) +
+ luma;
+ dst_argb[0] = luma0[src_argb[0]];
+ dst_argb[1] = luma0[src_argb[1]];
+ dst_argb[2] = luma0[src_argb[2]];
+ dst_argb[3] = src_argb[3];
+ }
+}
+
+void ARGBCopyAlphaRow_C(const uint8_t* src, uint8_t* dst, int width) {
+ int i;
+ for (i = 0; i < width - 1; i += 2) {
+ dst[3] = src[3];
+ dst[7] = src[7];
+ dst += 8;
+ src += 8;
+ }
+ if (width & 1) {
+ dst[3] = src[3];
+ }
+}
+
+void ARGBExtractAlphaRow_C(const uint8_t* src_argb, uint8_t* dst_a, int width) {
+ int i;
+ for (i = 0; i < width - 1; i += 2) {
+ dst_a[0] = src_argb[3];
+ dst_a[1] = src_argb[7];
+ dst_a += 2;
+ src_argb += 8;
+ }
+ if (width & 1) {
+ dst_a[0] = src_argb[3];
+ }
+}
+
+void ARGBCopyYToAlphaRow_C(const uint8_t* src, uint8_t* dst, int width) {
+ int i;
+ for (i = 0; i < width - 1; i += 2) {
+ dst[3] = src[0];
+ dst[7] = src[1];
+ dst += 8;
+ src += 2;
+ }
+ if (width & 1) {
+ dst[3] = src[0];
+ }
+}
+
+// Maximum temporary width for wrappers to process at a time, in pixels.
+#define MAXTWIDTH 2048
+
+#if !(defined(_MSC_VER) && defined(_M_IX86)) && \
+ defined(HAS_I422TORGB565ROW_SSSE3)
+// row_win.cc has asm version, but GCC uses 2 step wrapper.
+void I422ToRGB565Row_SSSE3(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_rgb565,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
+ while (width > 0) {
+ int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
+ I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth);
+ ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
+ src_y += twidth;
+ src_u += twidth / 2;
+ src_v += twidth / 2;
+ dst_rgb565 += twidth * 2;
+ width -= twidth;
+ }
+}
+#endif
+
+#if defined(HAS_I422TOARGB1555ROW_SSSE3)
+void I422ToARGB1555Row_SSSE3(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb1555,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ // Row buffer for intermediate ARGB pixels.
+ SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
+ while (width > 0) {
+ int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
+ I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth);
+ ARGBToARGB1555Row_SSE2(row, dst_argb1555, twidth);
+ src_y += twidth;
+ src_u += twidth / 2;
+ src_v += twidth / 2;
+ dst_argb1555 += twidth * 2;
+ width -= twidth;
+ }
+}
+#endif
+
+#if defined(HAS_I422TOARGB4444ROW_SSSE3)
+void I422ToARGB4444Row_SSSE3(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb4444,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ // Row buffer for intermediate ARGB pixels.
+ SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
+ while (width > 0) {
+ int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
+ I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth);
+ ARGBToARGB4444Row_SSE2(row, dst_argb4444, twidth);
+ src_y += twidth;
+ src_u += twidth / 2;
+ src_v += twidth / 2;
+ dst_argb4444 += twidth * 2;
+ width -= twidth;
+ }
+}
+#endif
+
+#if defined(HAS_NV12TORGB565ROW_SSSE3)
+void NV12ToRGB565Row_SSSE3(const uint8_t* src_y,
+ const uint8_t* src_uv,
+ uint8_t* dst_rgb565,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ // Row buffer for intermediate ARGB pixels.
+ SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
+ while (width > 0) {
+ int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
+ NV12ToARGBRow_SSSE3(src_y, src_uv, row, yuvconstants, twidth);
+ ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
+ src_y += twidth;
+ src_uv += twidth;
+ dst_rgb565 += twidth * 2;
+ width -= twidth;
+ }
+}
+#endif
+
+#if defined(HAS_NV12TORGB24ROW_SSSE3)
+void NV12ToRGB24Row_SSSE3(const uint8_t* src_y,
+ const uint8_t* src_uv,
+ uint8_t* dst_rgb24,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ // Row buffer for intermediate ARGB pixels.
+ SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
+ while (width > 0) {
+ int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
+ NV12ToARGBRow_SSSE3(src_y, src_uv, row, yuvconstants, twidth);
+ ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
+ src_y += twidth;
+ src_uv += twidth;
+ dst_rgb24 += twidth * 3;
+ width -= twidth;
+ }
+}
+#endif
+
+#if defined(HAS_NV21TORGB24ROW_SSSE3)
+void NV21ToRGB24Row_SSSE3(const uint8_t* src_y,
+ const uint8_t* src_vu,
+ uint8_t* dst_rgb24,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ // Row buffer for intermediate ARGB pixels.
+ SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
+ while (width > 0) {
+ int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
+ NV21ToARGBRow_SSSE3(src_y, src_vu, row, yuvconstants, twidth);
+ ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
+ src_y += twidth;
+ src_vu += twidth;
+ dst_rgb24 += twidth * 3;
+ width -= twidth;
+ }
+}
+#endif
+
+#if defined(HAS_NV12TORGB24ROW_AVX2)
+void NV12ToRGB24Row_AVX2(const uint8_t* src_y,
+ const uint8_t* src_uv,
+ uint8_t* dst_rgb24,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ // Row buffer for intermediate ARGB pixels.
+ SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
+ while (width > 0) {
+ int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
+ NV12ToARGBRow_AVX2(src_y, src_uv, row, yuvconstants, twidth);
+#if defined(HAS_ARGBTORGB24ROW_AVX2)
+ ARGBToRGB24Row_AVX2(row, dst_rgb24, twidth);
+#else
+ ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
+#endif
+ src_y += twidth;
+ src_uv += twidth;
+ dst_rgb24 += twidth * 3;
+ width -= twidth;
+ }
+}
+#endif
+
+#if defined(HAS_NV21TORGB24ROW_AVX2)
+void NV21ToRGB24Row_AVX2(const uint8_t* src_y,
+ const uint8_t* src_vu,
+ uint8_t* dst_rgb24,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ // Row buffer for intermediate ARGB pixels.
+ SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
+ while (width > 0) {
+ int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
+ NV21ToARGBRow_AVX2(src_y, src_vu, row, yuvconstants, twidth);
+#if defined(HAS_ARGBTORGB24ROW_AVX2)
+ ARGBToRGB24Row_AVX2(row, dst_rgb24, twidth);
+#else
+ ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
+#endif
+ src_y += twidth;
+ src_vu += twidth;
+ dst_rgb24 += twidth * 3;
+ width -= twidth;
+ }
+}
+#endif
+
+#if defined(HAS_I422TORGB565ROW_AVX2)
+void I422ToRGB565Row_AVX2(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_rgb565,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
+ while (width > 0) {
+ int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
+ I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
+#if defined(HAS_ARGBTORGB565ROW_AVX2)
+ ARGBToRGB565Row_AVX2(row, dst_rgb565, twidth);
+#else
+ ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
+#endif
+ src_y += twidth;
+ src_u += twidth / 2;
+ src_v += twidth / 2;
+ dst_rgb565 += twidth * 2;
+ width -= twidth;
+ }
+}
+#endif
+
+#if defined(HAS_I422TOARGB1555ROW_AVX2)
+void I422ToARGB1555Row_AVX2(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb1555,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ // Row buffer for intermediate ARGB pixels.
+ SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
+ while (width > 0) {
+ int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
+ I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
+#if defined(HAS_ARGBTOARGB1555ROW_AVX2)
+ ARGBToARGB1555Row_AVX2(row, dst_argb1555, twidth);
+#else
+ ARGBToARGB1555Row_SSE2(row, dst_argb1555, twidth);
+#endif
+ src_y += twidth;
+ src_u += twidth / 2;
+ src_v += twidth / 2;
+ dst_argb1555 += twidth * 2;
+ width -= twidth;
+ }
+}
+#endif
+
+#if defined(HAS_I422TOARGB4444ROW_AVX2)
+void I422ToARGB4444Row_AVX2(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb4444,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ // Row buffer for intermediate ARGB pixels.
+ SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
+ while (width > 0) {
+ int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
+ I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
+#if defined(HAS_ARGBTOARGB4444ROW_AVX2)
+ ARGBToARGB4444Row_AVX2(row, dst_argb4444, twidth);
+#else
+ ARGBToARGB4444Row_SSE2(row, dst_argb4444, twidth);
+#endif
+ src_y += twidth;
+ src_u += twidth / 2;
+ src_v += twidth / 2;
+ dst_argb4444 += twidth * 2;
+ width -= twidth;
+ }
+}
+#endif
+
+#if defined(HAS_I422TORGB24ROW_AVX2)
+void I422ToRGB24Row_AVX2(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_rgb24,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ // Row buffer for intermediate ARGB pixels.
+ SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
+ while (width > 0) {
+ int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
+ I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
+#if defined(HAS_ARGBTORGB24ROW_AVX2)
+ ARGBToRGB24Row_AVX2(row, dst_rgb24, twidth);
+#else
+ ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
+#endif
+ src_y += twidth;
+ src_u += twidth / 2;
+ src_v += twidth / 2;
+ dst_rgb24 += twidth * 3;
+ width -= twidth;
+ }
+}
+#endif
+
+#if defined(HAS_NV12TORGB565ROW_AVX2)
+void NV12ToRGB565Row_AVX2(const uint8_t* src_y,
+ const uint8_t* src_uv,
+ uint8_t* dst_rgb565,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ // Row buffer for intermediate ARGB pixels.
+ SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
+ while (width > 0) {
+ int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
+ NV12ToARGBRow_AVX2(src_y, src_uv, row, yuvconstants, twidth);
+#if defined(HAS_ARGBTORGB565ROW_AVX2)
+ ARGBToRGB565Row_AVX2(row, dst_rgb565, twidth);
+#else
+ ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
+#endif
+ src_y += twidth;
+ src_uv += twidth;
+ dst_rgb565 += twidth * 2;
+ width -= twidth;
+ }
+}
+#endif
+
+float ScaleSumSamples_C(const float* src, float* dst, float scale, int width) {
+ float fsum = 0.f;
+ int i;
+#if defined(__clang__)
+#pragma clang loop vectorize_width(4)
+#endif
+ for (i = 0; i < width; ++i) {
+ float v = *src++;
+ fsum += v * v;
+ *dst++ = v * scale;
+ }
+ return fsum;
+}
+
+float ScaleMaxSamples_C(const float* src, float* dst, float scale, int width) {
+ float fmax = 0.f;
+ int i;
+ for (i = 0; i < width; ++i) {
+ float v = *src++;
+ float vs = v * scale;
+ fmax = (v > fmax) ? v : fmax;
+ *dst++ = vs;
+ }
+ return fmax;
+}
+
+void ScaleSamples_C(const float* src, float* dst, float scale, int width) {
+ int i;
+ for (i = 0; i < width; ++i) {
+ *dst++ = *src++ * scale;
+ }
+}
+
+void GaussRow_C(const uint32_t* src, uint16_t* dst, int width) {
+ int i;
+ for (i = 0; i < width; ++i) {
+ *dst++ =
+ (src[0] + src[1] * 4 + src[2] * 6 + src[3] * 4 + src[4] + 128) >> 8;
+ ++src;
+ }
+}
+
+// filter 5 rows with 1, 4, 6, 4, 1 coefficients to produce 1 row.
+void GaussCol_C(const uint16_t* src0,
+ const uint16_t* src1,
+ const uint16_t* src2,
+ const uint16_t* src3,
+ const uint16_t* src4,
+ uint32_t* dst,
+ int width) {
+ int i;
+ for (i = 0; i < width; ++i) {
+ *dst++ = *src0++ + *src1++ * 4 + *src2++ * 6 + *src3++ * 4 + *src4++;
+ }
+}
+
+#ifdef __cplusplus
+} // extern "C"
+} // namespace libyuv
+#endif
diff --git a/media/libvpx/libvpx/third_party/libyuv/source/row_gcc.cc b/media/libvpx/libvpx/third_party/libyuv/source/row_gcc.cc
new file mode 100644
index 0000000000..8d3cb81cec
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/libyuv/source/row_gcc.cc
@@ -0,0 +1,6677 @@
+/*
+ * Copyright 2011 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/row.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+// This module is for GCC x86 and x64.
+#if !defined(LIBYUV_DISABLE_X86) && \
+ (defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER)))
+
+#if defined(HAS_ARGBTOYROW_SSSE3) || defined(HAS_ARGBGRAYROW_SSSE3)
+
+// Constants for ARGB
+static const vec8 kARGBToY = {13, 65, 33, 0, 13, 65, 33, 0,
+ 13, 65, 33, 0, 13, 65, 33, 0};
+
+// JPeg full range.
+static const vec8 kARGBToYJ = {15, 75, 38, 0, 15, 75, 38, 0,
+ 15, 75, 38, 0, 15, 75, 38, 0};
+#endif // defined(HAS_ARGBTOYROW_SSSE3) || defined(HAS_ARGBGRAYROW_SSSE3)
+
+#if defined(HAS_ARGBTOYROW_SSSE3) || defined(HAS_I422TOARGBROW_SSSE3)
+
+static const vec8 kARGBToU = {112, -74, -38, 0, 112, -74, -38, 0,
+ 112, -74, -38, 0, 112, -74, -38, 0};
+
+static const vec8 kARGBToUJ = {127, -84, -43, 0, 127, -84, -43, 0,
+ 127, -84, -43, 0, 127, -84, -43, 0};
+
+static const vec8 kARGBToV = {-18, -94, 112, 0, -18, -94, 112, 0,
+ -18, -94, 112, 0, -18, -94, 112, 0};
+
+static const vec8 kARGBToVJ = {-20, -107, 127, 0, -20, -107, 127, 0,
+ -20, -107, 127, 0, -20, -107, 127, 0};
+
+// Constants for BGRA
+static const vec8 kBGRAToY = {0, 33, 65, 13, 0, 33, 65, 13,
+ 0, 33, 65, 13, 0, 33, 65, 13};
+
+static const vec8 kBGRAToU = {0, -38, -74, 112, 0, -38, -74, 112,
+ 0, -38, -74, 112, 0, -38, -74, 112};
+
+static const vec8 kBGRAToV = {0, 112, -94, -18, 0, 112, -94, -18,
+ 0, 112, -94, -18, 0, 112, -94, -18};
+
+// Constants for ABGR
+static const vec8 kABGRToY = {33, 65, 13, 0, 33, 65, 13, 0,
+ 33, 65, 13, 0, 33, 65, 13, 0};
+
+static const vec8 kABGRToU = {-38, -74, 112, 0, -38, -74, 112, 0,
+ -38, -74, 112, 0, -38, -74, 112, 0};
+
+static const vec8 kABGRToV = {112, -94, -18, 0, 112, -94, -18, 0,
+ 112, -94, -18, 0, 112, -94, -18, 0};
+
+// Constants for RGBA.
+static const vec8 kRGBAToY = {0, 13, 65, 33, 0, 13, 65, 33,
+ 0, 13, 65, 33, 0, 13, 65, 33};
+
+static const vec8 kRGBAToU = {0, 112, -74, -38, 0, 112, -74, -38,
+ 0, 112, -74, -38, 0, 112, -74, -38};
+
+static const vec8 kRGBAToV = {0, -18, -94, 112, 0, -18, -94, 112,
+ 0, -18, -94, 112, 0, -18, -94, 112};
+
+static const uvec8 kAddY16 = {16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u,
+ 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u};
+
+// 7 bit fixed point 0.5.
+static const vec16 kAddYJ64 = {64, 64, 64, 64, 64, 64, 64, 64};
+
+static const uvec8 kAddUV128 = {128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u,
+ 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u};
+
+static const uvec16 kAddUVJ128 = {0x8080u, 0x8080u, 0x8080u, 0x8080u,
+ 0x8080u, 0x8080u, 0x8080u, 0x8080u};
+#endif // defined(HAS_ARGBTOYROW_SSSE3) || defined(HAS_I422TOARGBROW_SSSE3)
+
+#ifdef HAS_RGB24TOARGBROW_SSSE3
+
+// Shuffle table for converting RGB24 to ARGB.
+static const uvec8 kShuffleMaskRGB24ToARGB = {
+ 0u, 1u, 2u, 12u, 3u, 4u, 5u, 13u, 6u, 7u, 8u, 14u, 9u, 10u, 11u, 15u};
+
+// Shuffle table for converting RAW to ARGB.
+static const uvec8 kShuffleMaskRAWToARGB = {2u, 1u, 0u, 12u, 5u, 4u, 3u, 13u,
+ 8u, 7u, 6u, 14u, 11u, 10u, 9u, 15u};
+
+// Shuffle table for converting RAW to RGB24. First 8.
+static const uvec8 kShuffleMaskRAWToRGB24_0 = {
+ 2u, 1u, 0u, 5u, 4u, 3u, 8u, 7u,
+ 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u};
+
+// Shuffle table for converting RAW to RGB24. Middle 8.
+static const uvec8 kShuffleMaskRAWToRGB24_1 = {
+ 2u, 7u, 6u, 5u, 10u, 9u, 8u, 13u,
+ 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u};
+
+// Shuffle table for converting RAW to RGB24. Last 8.
+static const uvec8 kShuffleMaskRAWToRGB24_2 = {
+ 8u, 7u, 12u, 11u, 10u, 15u, 14u, 13u,
+ 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u};
+
+// Shuffle table for converting ARGB to RGB24.
+static const uvec8 kShuffleMaskARGBToRGB24 = {
+ 0u, 1u, 2u, 4u, 5u, 6u, 8u, 9u, 10u, 12u, 13u, 14u, 128u, 128u, 128u, 128u};
+
+// Shuffle table for converting ARGB to RAW.
+static const uvec8 kShuffleMaskARGBToRAW = {
+ 2u, 1u, 0u, 6u, 5u, 4u, 10u, 9u, 8u, 14u, 13u, 12u, 128u, 128u, 128u, 128u};
+
+// Shuffle table for converting ARGBToRGB24 for I422ToRGB24. First 8 + next 4
+static const uvec8 kShuffleMaskARGBToRGB24_0 = {
+ 0u, 1u, 2u, 4u, 5u, 6u, 8u, 9u, 128u, 128u, 128u, 128u, 10u, 12u, 13u, 14u};
+
+// YUY2 shuf 16 Y to 32 Y.
+static const lvec8 kShuffleYUY2Y = {0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10,
+ 10, 12, 12, 14, 14, 0, 0, 2, 2, 4, 4,
+ 6, 6, 8, 8, 10, 10, 12, 12, 14, 14};
+
+// YUY2 shuf 8 UV to 16 UV.
+static const lvec8 kShuffleYUY2UV = {1, 3, 1, 3, 5, 7, 5, 7, 9, 11, 9,
+ 11, 13, 15, 13, 15, 1, 3, 1, 3, 5, 7,
+ 5, 7, 9, 11, 9, 11, 13, 15, 13, 15};
+
+// UYVY shuf 16 Y to 32 Y.
+static const lvec8 kShuffleUYVYY = {1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11,
+ 11, 13, 13, 15, 15, 1, 1, 3, 3, 5, 5,
+ 7, 7, 9, 9, 11, 11, 13, 13, 15, 15};
+
+// UYVY shuf 8 UV to 16 UV.
+static const lvec8 kShuffleUYVYUV = {0, 2, 0, 2, 4, 6, 4, 6, 8, 10, 8,
+ 10, 12, 14, 12, 14, 0, 2, 0, 2, 4, 6,
+ 4, 6, 8, 10, 8, 10, 12, 14, 12, 14};
+
+// NV21 shuf 8 VU to 16 UV.
+static const lvec8 kShuffleNV21 = {
+ 1, 0, 1, 0, 3, 2, 3, 2, 5, 4, 5, 4, 7, 6, 7, 6,
+ 1, 0, 1, 0, 3, 2, 3, 2, 5, 4, 5, 4, 7, 6, 7, 6,
+};
+#endif // HAS_RGB24TOARGBROW_SSSE3
+
+#ifdef HAS_J400TOARGBROW_SSE2
+void J400ToARGBRow_SSE2(const uint8_t* src_y, uint8_t* dst_argb, int width) {
+ asm volatile(
+ "pcmpeqb %%xmm5,%%xmm5 \n"
+ "pslld $0x18,%%xmm5 \n"
+
+ LABELALIGN
+ "1: \n"
+ "movq (%0),%%xmm0 \n"
+ "lea 0x8(%0),%0 \n"
+ "punpcklbw %%xmm0,%%xmm0 \n"
+ "movdqa %%xmm0,%%xmm1 \n"
+ "punpcklwd %%xmm0,%%xmm0 \n"
+ "punpckhwd %%xmm1,%%xmm1 \n"
+ "por %%xmm5,%%xmm0 \n"
+ "por %%xmm5,%%xmm1 \n"
+ "movdqu %%xmm0,(%1) \n"
+ "movdqu %%xmm1,0x10(%1) \n"
+ "lea 0x20(%1),%1 \n"
+ "sub $0x8,%2 \n"
+ "jg 1b \n"
+ : "+r"(src_y), // %0
+ "+r"(dst_argb), // %1
+ "+r"(width) // %2
+ ::"memory",
+ "cc", "xmm0", "xmm1", "xmm5");
+}
+#endif // HAS_J400TOARGBROW_SSE2
+
+#ifdef HAS_RGB24TOARGBROW_SSSE3
+void RGB24ToARGBRow_SSSE3(const uint8_t* src_rgb24,
+ uint8_t* dst_argb,
+ int width) {
+ asm volatile(
+ "pcmpeqb %%xmm5,%%xmm5 \n" // 0xff000000
+ "pslld $0x18,%%xmm5 \n"
+ "movdqa %3,%%xmm4 \n"
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "movdqu 0x20(%0),%%xmm3 \n"
+ "lea 0x30(%0),%0 \n"
+ "movdqa %%xmm3,%%xmm2 \n"
+ "palignr $0x8,%%xmm1,%%xmm2 \n"
+ "pshufb %%xmm4,%%xmm2 \n"
+ "por %%xmm5,%%xmm2 \n"
+ "palignr $0xc,%%xmm0,%%xmm1 \n"
+ "pshufb %%xmm4,%%xmm0 \n"
+ "movdqu %%xmm2,0x20(%1) \n"
+ "por %%xmm5,%%xmm0 \n"
+ "pshufb %%xmm4,%%xmm1 \n"
+ "movdqu %%xmm0,(%1) \n"
+ "por %%xmm5,%%xmm1 \n"
+ "palignr $0x4,%%xmm3,%%xmm3 \n"
+ "pshufb %%xmm4,%%xmm3 \n"
+ "movdqu %%xmm1,0x10(%1) \n"
+ "por %%xmm5,%%xmm3 \n"
+ "movdqu %%xmm3,0x30(%1) \n"
+ "lea 0x40(%1),%1 \n"
+ "sub $0x10,%2 \n"
+ "jg 1b \n"
+ : "+r"(src_rgb24), // %0
+ "+r"(dst_argb), // %1
+ "+r"(width) // %2
+ : "m"(kShuffleMaskRGB24ToARGB) // %3
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5");
+}
+
+void RAWToARGBRow_SSSE3(const uint8_t* src_raw, uint8_t* dst_argb, int width) {
+ asm volatile(
+ "pcmpeqb %%xmm5,%%xmm5 \n" // 0xff000000
+ "pslld $0x18,%%xmm5 \n"
+ "movdqa %3,%%xmm4 \n"
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "movdqu 0x20(%0),%%xmm3 \n"
+ "lea 0x30(%0),%0 \n"
+ "movdqa %%xmm3,%%xmm2 \n"
+ "palignr $0x8,%%xmm1,%%xmm2 \n"
+ "pshufb %%xmm4,%%xmm2 \n"
+ "por %%xmm5,%%xmm2 \n"
+ "palignr $0xc,%%xmm0,%%xmm1 \n"
+ "pshufb %%xmm4,%%xmm0 \n"
+ "movdqu %%xmm2,0x20(%1) \n"
+ "por %%xmm5,%%xmm0 \n"
+ "pshufb %%xmm4,%%xmm1 \n"
+ "movdqu %%xmm0,(%1) \n"
+ "por %%xmm5,%%xmm1 \n"
+ "palignr $0x4,%%xmm3,%%xmm3 \n"
+ "pshufb %%xmm4,%%xmm3 \n"
+ "movdqu %%xmm1,0x10(%1) \n"
+ "por %%xmm5,%%xmm3 \n"
+ "movdqu %%xmm3,0x30(%1) \n"
+ "lea 0x40(%1),%1 \n"
+ "sub $0x10,%2 \n"
+ "jg 1b \n"
+ : "+r"(src_raw), // %0
+ "+r"(dst_argb), // %1
+ "+r"(width) // %2
+ : "m"(kShuffleMaskRAWToARGB) // %3
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5");
+}
+
+void RAWToRGB24Row_SSSE3(const uint8_t* src_raw,
+ uint8_t* dst_rgb24,
+ int width) {
+ asm volatile(
+ "movdqa %3,%%xmm3 \n"
+ "movdqa %4,%%xmm4 \n"
+ "movdqa %5,%%xmm5 \n"
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x4(%0),%%xmm1 \n"
+ "movdqu 0x8(%0),%%xmm2 \n"
+ "lea 0x18(%0),%0 \n"
+ "pshufb %%xmm3,%%xmm0 \n"
+ "pshufb %%xmm4,%%xmm1 \n"
+ "pshufb %%xmm5,%%xmm2 \n"
+ "movq %%xmm0,(%1) \n"
+ "movq %%xmm1,0x8(%1) \n"
+ "movq %%xmm2,0x10(%1) \n"
+ "lea 0x18(%1),%1 \n"
+ "sub $0x8,%2 \n"
+ "jg 1b \n"
+ : "+r"(src_raw), // %0
+ "+r"(dst_rgb24), // %1
+ "+r"(width) // %2
+ : "m"(kShuffleMaskRAWToRGB24_0), // %3
+ "m"(kShuffleMaskRAWToRGB24_1), // %4
+ "m"(kShuffleMaskRAWToRGB24_2) // %5
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5");
+}
+
+void RGB565ToARGBRow_SSE2(const uint8_t* src, uint8_t* dst, int width) {
+ asm volatile(
+ "mov $0x1080108,%%eax \n"
+ "movd %%eax,%%xmm5 \n"
+ "pshufd $0x0,%%xmm5,%%xmm5 \n"
+ "mov $0x20802080,%%eax \n"
+ "movd %%eax,%%xmm6 \n"
+ "pshufd $0x0,%%xmm6,%%xmm6 \n"
+ "pcmpeqb %%xmm3,%%xmm3 \n"
+ "psllw $0xb,%%xmm3 \n"
+ "pcmpeqb %%xmm4,%%xmm4 \n"
+ "psllw $0xa,%%xmm4 \n"
+ "psrlw $0x5,%%xmm4 \n"
+ "pcmpeqb %%xmm7,%%xmm7 \n"
+ "psllw $0x8,%%xmm7 \n"
+ "sub %0,%1 \n"
+ "sub %0,%1 \n"
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqa %%xmm0,%%xmm1 \n"
+ "movdqa %%xmm0,%%xmm2 \n"
+ "pand %%xmm3,%%xmm1 \n"
+ "psllw $0xb,%%xmm2 \n"
+ "pmulhuw %%xmm5,%%xmm1 \n"
+ "pmulhuw %%xmm5,%%xmm2 \n"
+ "psllw $0x8,%%xmm1 \n"
+ "por %%xmm2,%%xmm1 \n"
+ "pand %%xmm4,%%xmm0 \n"
+ "pmulhuw %%xmm6,%%xmm0 \n"
+ "por %%xmm7,%%xmm0 \n"
+ "movdqa %%xmm1,%%xmm2 \n"
+ "punpcklbw %%xmm0,%%xmm1 \n"
+ "punpckhbw %%xmm0,%%xmm2 \n"
+ "movdqu %%xmm1,0x00(%1,%0,2) \n"
+ "movdqu %%xmm2,0x10(%1,%0,2) \n"
+ "lea 0x10(%0),%0 \n"
+ "sub $0x8,%2 \n"
+ "jg 1b \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width) // %2
+ :
+ : "memory", "cc", "eax", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5",
+ "xmm6", "xmm7");
+}
+
+void ARGB1555ToARGBRow_SSE2(const uint8_t* src, uint8_t* dst, int width) {
+ asm volatile(
+ "mov $0x1080108,%%eax \n"
+ "movd %%eax,%%xmm5 \n"
+ "pshufd $0x0,%%xmm5,%%xmm5 \n"
+ "mov $0x42004200,%%eax \n"
+ "movd %%eax,%%xmm6 \n"
+ "pshufd $0x0,%%xmm6,%%xmm6 \n"
+ "pcmpeqb %%xmm3,%%xmm3 \n"
+ "psllw $0xb,%%xmm3 \n"
+ "movdqa %%xmm3,%%xmm4 \n"
+ "psrlw $0x6,%%xmm4 \n"
+ "pcmpeqb %%xmm7,%%xmm7 \n"
+ "psllw $0x8,%%xmm7 \n"
+ "sub %0,%1 \n"
+ "sub %0,%1 \n"
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqa %%xmm0,%%xmm1 \n"
+ "movdqa %%xmm0,%%xmm2 \n"
+ "psllw $0x1,%%xmm1 \n"
+ "psllw $0xb,%%xmm2 \n"
+ "pand %%xmm3,%%xmm1 \n"
+ "pmulhuw %%xmm5,%%xmm2 \n"
+ "pmulhuw %%xmm5,%%xmm1 \n"
+ "psllw $0x8,%%xmm1 \n"
+ "por %%xmm2,%%xmm1 \n"
+ "movdqa %%xmm0,%%xmm2 \n"
+ "pand %%xmm4,%%xmm0 \n"
+ "psraw $0x8,%%xmm2 \n"
+ "pmulhuw %%xmm6,%%xmm0 \n"
+ "pand %%xmm7,%%xmm2 \n"
+ "por %%xmm2,%%xmm0 \n"
+ "movdqa %%xmm1,%%xmm2 \n"
+ "punpcklbw %%xmm0,%%xmm1 \n"
+ "punpckhbw %%xmm0,%%xmm2 \n"
+ "movdqu %%xmm1,0x00(%1,%0,2) \n"
+ "movdqu %%xmm2,0x10(%1,%0,2) \n"
+ "lea 0x10(%0),%0 \n"
+ "sub $0x8,%2 \n"
+ "jg 1b \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width) // %2
+ :
+ : "memory", "cc", "eax", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5",
+ "xmm6", "xmm7");
+}
+
+void ARGB4444ToARGBRow_SSE2(const uint8_t* src, uint8_t* dst, int width) {
+ asm volatile(
+ "mov $0xf0f0f0f,%%eax \n"
+ "movd %%eax,%%xmm4 \n"
+ "pshufd $0x0,%%xmm4,%%xmm4 \n"
+ "movdqa %%xmm4,%%xmm5 \n"
+ "pslld $0x4,%%xmm5 \n"
+ "sub %0,%1 \n"
+ "sub %0,%1 \n"
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqa %%xmm0,%%xmm2 \n"
+ "pand %%xmm4,%%xmm0 \n"
+ "pand %%xmm5,%%xmm2 \n"
+ "movdqa %%xmm0,%%xmm1 \n"
+ "movdqa %%xmm2,%%xmm3 \n"
+ "psllw $0x4,%%xmm1 \n"
+ "psrlw $0x4,%%xmm3 \n"
+ "por %%xmm1,%%xmm0 \n"
+ "por %%xmm3,%%xmm2 \n"
+ "movdqa %%xmm0,%%xmm1 \n"
+ "punpcklbw %%xmm2,%%xmm0 \n"
+ "punpckhbw %%xmm2,%%xmm1 \n"
+ "movdqu %%xmm0,0x00(%1,%0,2) \n"
+ "movdqu %%xmm1,0x10(%1,%0,2) \n"
+ "lea 0x10(%0),%0 \n"
+ "sub $0x8,%2 \n"
+ "jg 1b \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width) // %2
+ :
+ : "memory", "cc", "eax", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5");
+}
+
+void ARGBToRGB24Row_SSSE3(const uint8_t* src, uint8_t* dst, int width) {
+ asm volatile(
+
+ "movdqa %3,%%xmm6 \n"
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "movdqu 0x20(%0),%%xmm2 \n"
+ "movdqu 0x30(%0),%%xmm3 \n"
+ "lea 0x40(%0),%0 \n"
+ "pshufb %%xmm6,%%xmm0 \n"
+ "pshufb %%xmm6,%%xmm1 \n"
+ "pshufb %%xmm6,%%xmm2 \n"
+ "pshufb %%xmm6,%%xmm3 \n"
+ "movdqa %%xmm1,%%xmm4 \n"
+ "psrldq $0x4,%%xmm1 \n"
+ "pslldq $0xc,%%xmm4 \n"
+ "movdqa %%xmm2,%%xmm5 \n"
+ "por %%xmm4,%%xmm0 \n"
+ "pslldq $0x8,%%xmm5 \n"
+ "movdqu %%xmm0,(%1) \n"
+ "por %%xmm5,%%xmm1 \n"
+ "psrldq $0x8,%%xmm2 \n"
+ "pslldq $0x4,%%xmm3 \n"
+ "por %%xmm3,%%xmm2 \n"
+ "movdqu %%xmm1,0x10(%1) \n"
+ "movdqu %%xmm2,0x20(%1) \n"
+ "lea 0x30(%1),%1 \n"
+ "sub $0x10,%2 \n"
+ "jg 1b \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width) // %2
+ : "m"(kShuffleMaskARGBToRGB24) // %3
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6");
+}
+
+void ARGBToRAWRow_SSSE3(const uint8_t* src, uint8_t* dst, int width) {
+ asm volatile(
+
+ "movdqa %3,%%xmm6 \n"
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "movdqu 0x20(%0),%%xmm2 \n"
+ "movdqu 0x30(%0),%%xmm3 \n"
+ "lea 0x40(%0),%0 \n"
+ "pshufb %%xmm6,%%xmm0 \n"
+ "pshufb %%xmm6,%%xmm1 \n"
+ "pshufb %%xmm6,%%xmm2 \n"
+ "pshufb %%xmm6,%%xmm3 \n"
+ "movdqa %%xmm1,%%xmm4 \n"
+ "psrldq $0x4,%%xmm1 \n"
+ "pslldq $0xc,%%xmm4 \n"
+ "movdqa %%xmm2,%%xmm5 \n"
+ "por %%xmm4,%%xmm0 \n"
+ "pslldq $0x8,%%xmm5 \n"
+ "movdqu %%xmm0,(%1) \n"
+ "por %%xmm5,%%xmm1 \n"
+ "psrldq $0x8,%%xmm2 \n"
+ "pslldq $0x4,%%xmm3 \n"
+ "por %%xmm3,%%xmm2 \n"
+ "movdqu %%xmm1,0x10(%1) \n"
+ "movdqu %%xmm2,0x20(%1) \n"
+ "lea 0x30(%1),%1 \n"
+ "sub $0x10,%2 \n"
+ "jg 1b \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width) // %2
+ : "m"(kShuffleMaskARGBToRAW) // %3
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6");
+}
+
+#ifdef HAS_ARGBTORGB24ROW_AVX2
+// vpermd for 12+12 to 24
+static const lvec32 kPermdRGB24_AVX = {0, 1, 2, 4, 5, 6, 3, 7};
+
+void ARGBToRGB24Row_AVX2(const uint8_t* src, uint8_t* dst, int width) {
+ asm volatile(
+ "vbroadcastf128 %3,%%ymm6 \n"
+ "vmovdqa %4,%%ymm7 \n"
+
+ LABELALIGN
+ "1: \n"
+ "vmovdqu (%0),%%ymm0 \n"
+ "vmovdqu 0x20(%0),%%ymm1 \n"
+ "vmovdqu 0x40(%0),%%ymm2 \n"
+ "vmovdqu 0x60(%0),%%ymm3 \n"
+ "lea 0x80(%0),%0 \n"
+ "vpshufb %%ymm6,%%ymm0,%%ymm0 \n" // xxx0yyy0
+ "vpshufb %%ymm6,%%ymm1,%%ymm1 \n"
+ "vpshufb %%ymm6,%%ymm2,%%ymm2 \n"
+ "vpshufb %%ymm6,%%ymm3,%%ymm3 \n"
+ "vpermd %%ymm0,%%ymm7,%%ymm0 \n" // pack to 24 bytes
+ "vpermd %%ymm1,%%ymm7,%%ymm1 \n"
+ "vpermd %%ymm2,%%ymm7,%%ymm2 \n"
+ "vpermd %%ymm3,%%ymm7,%%ymm3 \n"
+ "vpermq $0x3f,%%ymm1,%%ymm4 \n" // combine 24 + 8
+ "vpor %%ymm4,%%ymm0,%%ymm0 \n"
+ "vmovdqu %%ymm0,(%1) \n"
+ "vpermq $0xf9,%%ymm1,%%ymm1 \n" // combine 16 + 16
+ "vpermq $0x4f,%%ymm2,%%ymm4 \n"
+ "vpor %%ymm4,%%ymm1,%%ymm1 \n"
+ "vmovdqu %%ymm1,0x20(%1) \n"
+ "vpermq $0xfe,%%ymm2,%%ymm2 \n" // combine 8 + 24
+ "vpermq $0x93,%%ymm3,%%ymm3 \n"
+ "vpor %%ymm3,%%ymm2,%%ymm2 \n"
+ "vmovdqu %%ymm2,0x40(%1) \n"
+ "lea 0x60(%1),%1 \n"
+ "sub $0x20,%2 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width) // %2
+ : "m"(kShuffleMaskARGBToRGB24), // %3
+ "m"(kPermdRGB24_AVX) // %4
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
+ "xmm7");
+}
+#endif
+
+#ifdef HAS_ARGBTORGB24ROW_AVX512VBMI
+// Shuffle table for converting ARGBToRGB24
+static const ulvec8 kPermARGBToRGB24_0 = {
+ 0u, 1u, 2u, 4u, 5u, 6u, 8u, 9u, 10u, 12u, 13u,
+ 14u, 16u, 17u, 18u, 20u, 21u, 22u, 24u, 25u, 26u, 28u,
+ 29u, 30u, 32u, 33u, 34u, 36u, 37u, 38u, 40u, 41u};
+static const ulvec8 kPermARGBToRGB24_1 = {
+ 10u, 12u, 13u, 14u, 16u, 17u, 18u, 20u, 21u, 22u, 24u,
+ 25u, 26u, 28u, 29u, 30u, 32u, 33u, 34u, 36u, 37u, 38u,
+ 40u, 41u, 42u, 44u, 45u, 46u, 48u, 49u, 50u, 52u};
+static const ulvec8 kPermARGBToRGB24_2 = {
+ 21u, 22u, 24u, 25u, 26u, 28u, 29u, 30u, 32u, 33u, 34u,
+ 36u, 37u, 38u, 40u, 41u, 42u, 44u, 45u, 46u, 48u, 49u,
+ 50u, 52u, 53u, 54u, 56u, 57u, 58u, 60u, 61u, 62u};
+
+void ARGBToRGB24Row_AVX512VBMI(const uint8_t* src, uint8_t* dst, int width) {
+ asm volatile(
+ "vmovdqa %3,%%ymm5 \n"
+ "vmovdqa %4,%%ymm6 \n"
+ "vmovdqa %5,%%ymm7 \n"
+
+ LABELALIGN
+ "1: \n"
+ "vmovdqu (%0),%%ymm0 \n"
+ "vmovdqu 0x20(%0),%%ymm1 \n"
+ "vmovdqu 0x40(%0),%%ymm2 \n"
+ "vmovdqu 0x60(%0),%%ymm3 \n"
+ "lea 0x80(%0),%0 \n"
+ "vpermt2b %%ymm1,%%ymm5,%%ymm0 \n"
+ "vpermt2b %%ymm2,%%ymm6,%%ymm1 \n"
+ "vpermt2b %%ymm3,%%ymm7,%%ymm2 \n"
+ "vmovdqu %%ymm0,(%1) \n"
+ "vmovdqu %%ymm1,0x20(%1) \n"
+ "vmovdqu %%ymm2,0x40(%1) \n"
+ "lea 0x60(%1),%1 \n"
+ "sub $0x20,%2 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width) // %2
+ : "m"(kPermARGBToRGB24_0), // %3
+ "m"(kPermARGBToRGB24_1), // %4
+ "m"(kPermARGBToRGB24_2) // %5
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5", "xmm6", "xmm7");
+}
+#endif
+
+#ifdef HAS_ARGBTORAWROW_AVX2
+void ARGBToRAWRow_AVX2(const uint8_t* src, uint8_t* dst, int width) {
+ asm volatile(
+ "vbroadcastf128 %3,%%ymm6 \n"
+ "vmovdqa %4,%%ymm7 \n"
+
+ LABELALIGN
+ "1: \n"
+ "vmovdqu (%0),%%ymm0 \n"
+ "vmovdqu 0x20(%0),%%ymm1 \n"
+ "vmovdqu 0x40(%0),%%ymm2 \n"
+ "vmovdqu 0x60(%0),%%ymm3 \n"
+ "lea 0x80(%0),%0 \n"
+ "vpshufb %%ymm6,%%ymm0,%%ymm0 \n" // xxx0yyy0
+ "vpshufb %%ymm6,%%ymm1,%%ymm1 \n"
+ "vpshufb %%ymm6,%%ymm2,%%ymm2 \n"
+ "vpshufb %%ymm6,%%ymm3,%%ymm3 \n"
+ "vpermd %%ymm0,%%ymm7,%%ymm0 \n" // pack to 24 bytes
+ "vpermd %%ymm1,%%ymm7,%%ymm1 \n"
+ "vpermd %%ymm2,%%ymm7,%%ymm2 \n"
+ "vpermd %%ymm3,%%ymm7,%%ymm3 \n"
+ "vpermq $0x3f,%%ymm1,%%ymm4 \n" // combine 24 + 8
+ "vpor %%ymm4,%%ymm0,%%ymm0 \n"
+ "vmovdqu %%ymm0,(%1) \n"
+ "vpermq $0xf9,%%ymm1,%%ymm1 \n" // combine 16 + 16
+ "vpermq $0x4f,%%ymm2,%%ymm4 \n"
+ "vpor %%ymm4,%%ymm1,%%ymm1 \n"
+ "vmovdqu %%ymm1,0x20(%1) \n"
+ "vpermq $0xfe,%%ymm2,%%ymm2 \n" // combine 8 + 24
+ "vpermq $0x93,%%ymm3,%%ymm3 \n"
+ "vpor %%ymm3,%%ymm2,%%ymm2 \n"
+ "vmovdqu %%ymm2,0x40(%1) \n"
+ "lea 0x60(%1),%1 \n"
+ "sub $0x20,%2 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width) // %2
+ : "m"(kShuffleMaskARGBToRAW), // %3
+ "m"(kPermdRGB24_AVX) // %4
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
+ "xmm7");
+}
+#endif
+
+void ARGBToRGB565Row_SSE2(const uint8_t* src, uint8_t* dst, int width) {
+ asm volatile(
+ "pcmpeqb %%xmm3,%%xmm3 \n"
+ "psrld $0x1b,%%xmm3 \n"
+ "pcmpeqb %%xmm4,%%xmm4 \n"
+ "psrld $0x1a,%%xmm4 \n"
+ "pslld $0x5,%%xmm4 \n"
+ "pcmpeqb %%xmm5,%%xmm5 \n"
+ "pslld $0xb,%%xmm5 \n"
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqa %%xmm0,%%xmm1 \n"
+ "movdqa %%xmm0,%%xmm2 \n"
+ "pslld $0x8,%%xmm0 \n"
+ "psrld $0x3,%%xmm1 \n"
+ "psrld $0x5,%%xmm2 \n"
+ "psrad $0x10,%%xmm0 \n"
+ "pand %%xmm3,%%xmm1 \n"
+ "pand %%xmm4,%%xmm2 \n"
+ "pand %%xmm5,%%xmm0 \n"
+ "por %%xmm2,%%xmm1 \n"
+ "por %%xmm1,%%xmm0 \n"
+ "packssdw %%xmm0,%%xmm0 \n"
+ "lea 0x10(%0),%0 \n"
+ "movq %%xmm0,(%1) \n"
+ "lea 0x8(%1),%1 \n"
+ "sub $0x4,%2 \n"
+ "jg 1b \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width) // %2
+ ::"memory",
+ "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5");
+}
+
+void ARGBToRGB565DitherRow_SSE2(const uint8_t* src,
+ uint8_t* dst,
+ const uint32_t dither4,
+ int width) {
+ asm volatile(
+ "movd %3,%%xmm6 \n"
+ "punpcklbw %%xmm6,%%xmm6 \n"
+ "movdqa %%xmm6,%%xmm7 \n"
+ "punpcklwd %%xmm6,%%xmm6 \n"
+ "punpckhwd %%xmm7,%%xmm7 \n"
+ "pcmpeqb %%xmm3,%%xmm3 \n"
+ "psrld $0x1b,%%xmm3 \n"
+ "pcmpeqb %%xmm4,%%xmm4 \n"
+ "psrld $0x1a,%%xmm4 \n"
+ "pslld $0x5,%%xmm4 \n"
+ "pcmpeqb %%xmm5,%%xmm5 \n"
+ "pslld $0xb,%%xmm5 \n"
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "paddusb %%xmm6,%%xmm0 \n"
+ "movdqa %%xmm0,%%xmm1 \n"
+ "movdqa %%xmm0,%%xmm2 \n"
+ "pslld $0x8,%%xmm0 \n"
+ "psrld $0x3,%%xmm1 \n"
+ "psrld $0x5,%%xmm2 \n"
+ "psrad $0x10,%%xmm0 \n"
+ "pand %%xmm3,%%xmm1 \n"
+ "pand %%xmm4,%%xmm2 \n"
+ "pand %%xmm5,%%xmm0 \n"
+ "por %%xmm2,%%xmm1 \n"
+ "por %%xmm1,%%xmm0 \n"
+ "packssdw %%xmm0,%%xmm0 \n"
+ "lea 0x10(%0),%0 \n"
+ "movq %%xmm0,(%1) \n"
+ "lea 0x8(%1),%1 \n"
+ "sub $0x4,%2 \n"
+ "jg 1b \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width) // %2
+ : "m"(dither4) // %3
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
+ "xmm7");
+}
+
+#ifdef HAS_ARGBTORGB565DITHERROW_AVX2
+void ARGBToRGB565DitherRow_AVX2(const uint8_t* src,
+ uint8_t* dst,
+ const uint32_t dither4,
+ int width) {
+ asm volatile(
+ "vbroadcastss %3,%%xmm6 \n"
+ "vpunpcklbw %%xmm6,%%xmm6,%%xmm6 \n"
+ "vpermq $0xd8,%%ymm6,%%ymm6 \n"
+ "vpunpcklwd %%ymm6,%%ymm6,%%ymm6 \n"
+ "vpcmpeqb %%ymm3,%%ymm3,%%ymm3 \n"
+ "vpsrld $0x1b,%%ymm3,%%ymm3 \n"
+ "vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n"
+ "vpsrld $0x1a,%%ymm4,%%ymm4 \n"
+ "vpslld $0x5,%%ymm4,%%ymm4 \n"
+ "vpslld $0xb,%%ymm3,%%ymm5 \n"
+
+ LABELALIGN
+ "1: \n"
+ "vmovdqu (%0),%%ymm0 \n"
+ "vpaddusb %%ymm6,%%ymm0,%%ymm0 \n"
+ "vpsrld $0x5,%%ymm0,%%ymm2 \n"
+ "vpsrld $0x3,%%ymm0,%%ymm1 \n"
+ "vpsrld $0x8,%%ymm0,%%ymm0 \n"
+ "vpand %%ymm4,%%ymm2,%%ymm2 \n"
+ "vpand %%ymm3,%%ymm1,%%ymm1 \n"
+ "vpand %%ymm5,%%ymm0,%%ymm0 \n"
+ "vpor %%ymm2,%%ymm1,%%ymm1 \n"
+ "vpor %%ymm1,%%ymm0,%%ymm0 \n"
+ "vpackusdw %%ymm0,%%ymm0,%%ymm0 \n"
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n"
+ "lea 0x20(%0),%0 \n"
+ "vmovdqu %%xmm0,(%1) \n"
+ "lea 0x10(%1),%1 \n"
+ "sub $0x8,%2 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width) // %2
+ : "m"(dither4) // %3
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
+ "xmm7");
+}
+#endif // HAS_ARGBTORGB565DITHERROW_AVX2
+
+void ARGBToARGB1555Row_SSE2(const uint8_t* src, uint8_t* dst, int width) {
+ asm volatile(
+ "pcmpeqb %%xmm4,%%xmm4 \n"
+ "psrld $0x1b,%%xmm4 \n"
+ "movdqa %%xmm4,%%xmm5 \n"
+ "pslld $0x5,%%xmm5 \n"
+ "movdqa %%xmm4,%%xmm6 \n"
+ "pslld $0xa,%%xmm6 \n"
+ "pcmpeqb %%xmm7,%%xmm7 \n"
+ "pslld $0xf,%%xmm7 \n"
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqa %%xmm0,%%xmm1 \n"
+ "movdqa %%xmm0,%%xmm2 \n"
+ "movdqa %%xmm0,%%xmm3 \n"
+ "psrad $0x10,%%xmm0 \n"
+ "psrld $0x3,%%xmm1 \n"
+ "psrld $0x6,%%xmm2 \n"
+ "psrld $0x9,%%xmm3 \n"
+ "pand %%xmm7,%%xmm0 \n"
+ "pand %%xmm4,%%xmm1 \n"
+ "pand %%xmm5,%%xmm2 \n"
+ "pand %%xmm6,%%xmm3 \n"
+ "por %%xmm1,%%xmm0 \n"
+ "por %%xmm3,%%xmm2 \n"
+ "por %%xmm2,%%xmm0 \n"
+ "packssdw %%xmm0,%%xmm0 \n"
+ "lea 0x10(%0),%0 \n"
+ "movq %%xmm0,(%1) \n"
+ "lea 0x8(%1),%1 \n"
+ "sub $0x4,%2 \n"
+ "jg 1b \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width) // %2
+ ::"memory",
+ "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7");
+}
+
+void ARGBToARGB4444Row_SSE2(const uint8_t* src, uint8_t* dst, int width) {
+ asm volatile(
+ "pcmpeqb %%xmm4,%%xmm4 \n"
+ "psllw $0xc,%%xmm4 \n"
+ "movdqa %%xmm4,%%xmm3 \n"
+ "psrlw $0x8,%%xmm3 \n"
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqa %%xmm0,%%xmm1 \n"
+ "pand %%xmm3,%%xmm0 \n"
+ "pand %%xmm4,%%xmm1 \n"
+ "psrlq $0x4,%%xmm0 \n"
+ "psrlq $0x8,%%xmm1 \n"
+ "por %%xmm1,%%xmm0 \n"
+ "packuswb %%xmm0,%%xmm0 \n"
+ "lea 0x10(%0),%0 \n"
+ "movq %%xmm0,(%1) \n"
+ "lea 0x8(%1),%1 \n"
+ "sub $0x4,%2 \n"
+ "jg 1b \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width) // %2
+ ::"memory",
+ "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4");
+}
+#endif // HAS_RGB24TOARGBROW_SSSE3
+
+/*
+
+ARGBToAR30Row:
+
+Red Blue
+With the 8 bit value in the upper bits of a short, vpmulhuw by (1024+4) will
+produce a 10 bit value in the low 10 bits of each 16 bit value. This is whats
+wanted for the blue channel. The red needs to be shifted 4 left, so multiply by
+(1024+4)*16 for red.
+
+Alpha Green
+Alpha and Green are already in the high bits so vpand can zero out the other
+bits, keeping just 2 upper bits of alpha and 8 bit green. The same multiplier
+could be used for Green - (1024+4) putting the 10 bit green in the lsb. Alpha
+would be a simple multiplier to shift it into position. It wants a gap of 10
+above the green. Green is 10 bits, so there are 6 bits in the low short. 4
+more are needed, so a multiplier of 4 gets the 2 bits into the upper 16 bits,
+and then a shift of 4 is a multiply of 16, so (4*16) = 64. Then shift the
+result left 10 to position the A and G channels.
+*/
+
+// Shuffle table for converting RAW to RGB24. Last 8.
+static const uvec8 kShuffleRB30 = {128u, 0u, 128u, 2u, 128u, 4u, 128u, 6u,
+ 128u, 8u, 128u, 10u, 128u, 12u, 128u, 14u};
+
+static const uvec8 kShuffleBR30 = {128u, 2u, 128u, 0u, 128u, 6u, 128u, 4u,
+ 128u, 10u, 128u, 8u, 128u, 14u, 128u, 12u};
+
+static const uint32_t kMulRB10 = 1028 * 16 * 65536 + 1028;
+static const uint32_t kMaskRB10 = 0x3ff003ff;
+static const uint32_t kMaskAG10 = 0xc000ff00;
+static const uint32_t kMulAG10 = 64 * 65536 + 1028;
+
+void ARGBToAR30Row_SSSE3(const uint8_t* src, uint8_t* dst, int width) {
+ asm volatile(
+ "movdqa %3,%%xmm2 \n" // shuffler for RB
+ "movd %4,%%xmm3 \n" // multipler for RB
+ "movd %5,%%xmm4 \n" // mask for R10 B10
+ "movd %6,%%xmm5 \n" // mask for AG
+ "movd %7,%%xmm6 \n" // multipler for AG
+ "pshufd $0x0,%%xmm3,%%xmm3 \n"
+ "pshufd $0x0,%%xmm4,%%xmm4 \n"
+ "pshufd $0x0,%%xmm5,%%xmm5 \n"
+ "pshufd $0x0,%%xmm6,%%xmm6 \n"
+ "sub %0,%1 \n"
+
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n" // fetch 4 ARGB pixels
+ "movdqa %%xmm0,%%xmm1 \n"
+ "pshufb %%xmm2,%%xmm1 \n" // R0B0
+ "pand %%xmm5,%%xmm0 \n" // A0G0
+ "pmulhuw %%xmm3,%%xmm1 \n" // X2 R16 X4 B10
+ "pmulhuw %%xmm6,%%xmm0 \n" // X10 A2 X10 G10
+ "pand %%xmm4,%%xmm1 \n" // X2 R10 X10 B10
+ "pslld $10,%%xmm0 \n" // A2 x10 G10 x10
+ "por %%xmm1,%%xmm0 \n" // A2 R10 G10 B10
+ "movdqu %%xmm0,(%1,%0) \n" // store 4 AR30 pixels
+ "add $0x10,%0 \n"
+ "sub $0x4,%2 \n"
+ "jg 1b \n"
+
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width) // %2
+ : "m"(kShuffleRB30), // %3
+ "m"(kMulRB10), // %4
+ "m"(kMaskRB10), // %5
+ "m"(kMaskAG10), // %6
+ "m"(kMulAG10) // %7
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6");
+}
+
+void ABGRToAR30Row_SSSE3(const uint8_t* src, uint8_t* dst, int width) {
+ asm volatile(
+ "movdqa %3,%%xmm2 \n" // shuffler for RB
+ "movd %4,%%xmm3 \n" // multipler for RB
+ "movd %5,%%xmm4 \n" // mask for R10 B10
+ "movd %6,%%xmm5 \n" // mask for AG
+ "movd %7,%%xmm6 \n" // multipler for AG
+ "pshufd $0x0,%%xmm3,%%xmm3 \n"
+ "pshufd $0x0,%%xmm4,%%xmm4 \n"
+ "pshufd $0x0,%%xmm5,%%xmm5 \n"
+ "pshufd $0x0,%%xmm6,%%xmm6 \n"
+ "sub %0,%1 \n"
+
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n" // fetch 4 ABGR pixels
+ "movdqa %%xmm0,%%xmm1 \n"
+ "pshufb %%xmm2,%%xmm1 \n" // R0B0
+ "pand %%xmm5,%%xmm0 \n" // A0G0
+ "pmulhuw %%xmm3,%%xmm1 \n" // X2 R16 X4 B10
+ "pmulhuw %%xmm6,%%xmm0 \n" // X10 A2 X10 G10
+ "pand %%xmm4,%%xmm1 \n" // X2 R10 X10 B10
+ "pslld $10,%%xmm0 \n" // A2 x10 G10 x10
+ "por %%xmm1,%%xmm0 \n" // A2 R10 G10 B10
+ "movdqu %%xmm0,(%1,%0) \n" // store 4 AR30 pixels
+ "add $0x10,%0 \n"
+ "sub $0x4,%2 \n"
+ "jg 1b \n"
+
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width) // %2
+ : "m"(kShuffleBR30), // %3 reversed shuffler
+ "m"(kMulRB10), // %4
+ "m"(kMaskRB10), // %5
+ "m"(kMaskAG10), // %6
+ "m"(kMulAG10) // %7
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6");
+}
+
+#ifdef HAS_ARGBTOAR30ROW_AVX2
+void ARGBToAR30Row_AVX2(const uint8_t* src, uint8_t* dst, int width) {
+ asm volatile(
+ "vbroadcastf128 %3,%%ymm2 \n" // shuffler for RB
+ "vbroadcastss %4,%%ymm3 \n" // multipler for RB
+ "vbroadcastss %5,%%ymm4 \n" // mask for R10 B10
+ "vbroadcastss %6,%%ymm5 \n" // mask for AG
+ "vbroadcastss %7,%%ymm6 \n" // multipler for AG
+ "sub %0,%1 \n"
+
+ "1: \n"
+ "vmovdqu (%0),%%ymm0 \n" // fetch 8 ARGB pixels
+ "vpshufb %%ymm2,%%ymm0,%%ymm1 \n" // R0B0
+ "vpand %%ymm5,%%ymm0,%%ymm0 \n" // A0G0
+ "vpmulhuw %%ymm3,%%ymm1,%%ymm1 \n" // X2 R16 X4 B10
+ "vpmulhuw %%ymm6,%%ymm0,%%ymm0 \n" // X10 A2 X10 G10
+ "vpand %%ymm4,%%ymm1,%%ymm1 \n" // X2 R10 X10 B10
+ "vpslld $10,%%ymm0,%%ymm0 \n" // A2 x10 G10 x10
+ "vpor %%ymm1,%%ymm0,%%ymm0 \n" // A2 R10 G10 B10
+ "vmovdqu %%ymm0,(%1,%0) \n" // store 8 AR30 pixels
+ "add $0x20,%0 \n"
+ "sub $0x8,%2 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width) // %2
+ : "m"(kShuffleRB30), // %3
+ "m"(kMulRB10), // %4
+ "m"(kMaskRB10), // %5
+ "m"(kMaskAG10), // %6
+ "m"(kMulAG10) // %7
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6");
+}
+#endif
+
+#ifdef HAS_ABGRTOAR30ROW_AVX2
+void ABGRToAR30Row_AVX2(const uint8_t* src, uint8_t* dst, int width) {
+ asm volatile(
+ "vbroadcastf128 %3,%%ymm2 \n" // shuffler for RB
+ "vbroadcastss %4,%%ymm3 \n" // multipler for RB
+ "vbroadcastss %5,%%ymm4 \n" // mask for R10 B10
+ "vbroadcastss %6,%%ymm5 \n" // mask for AG
+ "vbroadcastss %7,%%ymm6 \n" // multipler for AG
+ "sub %0,%1 \n"
+
+ "1: \n"
+ "vmovdqu (%0),%%ymm0 \n" // fetch 8 ABGR pixels
+ "vpshufb %%ymm2,%%ymm0,%%ymm1 \n" // R0B0
+ "vpand %%ymm5,%%ymm0,%%ymm0 \n" // A0G0
+ "vpmulhuw %%ymm3,%%ymm1,%%ymm1 \n" // X2 R16 X4 B10
+ "vpmulhuw %%ymm6,%%ymm0,%%ymm0 \n" // X10 A2 X10 G10
+ "vpand %%ymm4,%%ymm1,%%ymm1 \n" // X2 R10 X10 B10
+ "vpslld $10,%%ymm0,%%ymm0 \n" // A2 x10 G10 x10
+ "vpor %%ymm1,%%ymm0,%%ymm0 \n" // A2 R10 G10 B10
+ "vmovdqu %%ymm0,(%1,%0) \n" // store 8 AR30 pixels
+ "add $0x20,%0 \n"
+ "sub $0x8,%2 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width) // %2
+ : "m"(kShuffleBR30), // %3 reversed shuffler
+ "m"(kMulRB10), // %4
+ "m"(kMaskRB10), // %5
+ "m"(kMaskAG10), // %6
+ "m"(kMulAG10) // %7
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6");
+}
+#endif
+
+#ifdef HAS_ARGBTOYROW_SSSE3
+// Convert 16 ARGB pixels (64 bytes) to 16 Y values.
+void ARGBToYRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_y, int width) {
+ asm volatile(
+ "movdqa %3,%%xmm4 \n"
+ "movdqa %4,%%xmm5 \n"
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "movdqu 0x20(%0),%%xmm2 \n"
+ "movdqu 0x30(%0),%%xmm3 \n"
+ "pmaddubsw %%xmm4,%%xmm0 \n"
+ "pmaddubsw %%xmm4,%%xmm1 \n"
+ "pmaddubsw %%xmm4,%%xmm2 \n"
+ "pmaddubsw %%xmm4,%%xmm3 \n"
+ "lea 0x40(%0),%0 \n"
+ "phaddw %%xmm1,%%xmm0 \n"
+ "phaddw %%xmm3,%%xmm2 \n"
+ "psrlw $0x7,%%xmm0 \n"
+ "psrlw $0x7,%%xmm2 \n"
+ "packuswb %%xmm2,%%xmm0 \n"
+ "paddb %%xmm5,%%xmm0 \n"
+ "movdqu %%xmm0,(%1) \n"
+ "lea 0x10(%1),%1 \n"
+ "sub $0x10,%2 \n"
+ "jg 1b \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_y), // %1
+ "+r"(width) // %2
+ : "m"(kARGBToY), // %3
+ "m"(kAddY16) // %4
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5");
+}
+#endif // HAS_ARGBTOYROW_SSSE3
+
+#ifdef HAS_ARGBTOYJROW_SSSE3
+// Convert 16 ARGB pixels (64 bytes) to 16 YJ values.
+// Same as ARGBToYRow but different coefficients, no add 16, but do rounding.
+void ARGBToYJRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_y, int width) {
+ asm volatile(
+ "movdqa %3,%%xmm4 \n"
+ "movdqa %4,%%xmm5 \n"
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "movdqu 0x20(%0),%%xmm2 \n"
+ "movdqu 0x30(%0),%%xmm3 \n"
+ "pmaddubsw %%xmm4,%%xmm0 \n"
+ "pmaddubsw %%xmm4,%%xmm1 \n"
+ "pmaddubsw %%xmm4,%%xmm2 \n"
+ "pmaddubsw %%xmm4,%%xmm3 \n"
+ "lea 0x40(%0),%0 \n"
+ "phaddw %%xmm1,%%xmm0 \n"
+ "phaddw %%xmm3,%%xmm2 \n"
+ "paddw %%xmm5,%%xmm0 \n"
+ "paddw %%xmm5,%%xmm2 \n"
+ "psrlw $0x7,%%xmm0 \n"
+ "psrlw $0x7,%%xmm2 \n"
+ "packuswb %%xmm2,%%xmm0 \n"
+ "movdqu %%xmm0,(%1) \n"
+ "lea 0x10(%1),%1 \n"
+ "sub $0x10,%2 \n"
+ "jg 1b \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_y), // %1
+ "+r"(width) // %2
+ : "m"(kARGBToYJ), // %3
+ "m"(kAddYJ64) // %4
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5");
+}
+#endif // HAS_ARGBTOYJROW_SSSE3
+
+#ifdef HAS_ARGBTOYROW_AVX2
+// vpermd for vphaddw + vpackuswb vpermd.
+static const lvec32 kPermdARGBToY_AVX = {0, 4, 1, 5, 2, 6, 3, 7};
+
+// Convert 32 ARGB pixels (128 bytes) to 32 Y values.
+void ARGBToYRow_AVX2(const uint8_t* src_argb, uint8_t* dst_y, int width) {
+ asm volatile(
+ "vbroadcastf128 %3,%%ymm4 \n"
+ "vbroadcastf128 %4,%%ymm5 \n"
+ "vmovdqu %5,%%ymm6 \n"
+
+ LABELALIGN
+ "1: \n"
+ "vmovdqu (%0),%%ymm0 \n"
+ "vmovdqu 0x20(%0),%%ymm1 \n"
+ "vmovdqu 0x40(%0),%%ymm2 \n"
+ "vmovdqu 0x60(%0),%%ymm3 \n"
+ "vpmaddubsw %%ymm4,%%ymm0,%%ymm0 \n"
+ "vpmaddubsw %%ymm4,%%ymm1,%%ymm1 \n"
+ "vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n"
+ "vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n"
+ "lea 0x80(%0),%0 \n"
+ "vphaddw %%ymm1,%%ymm0,%%ymm0 \n" // mutates.
+ "vphaddw %%ymm3,%%ymm2,%%ymm2 \n"
+ "vpsrlw $0x7,%%ymm0,%%ymm0 \n"
+ "vpsrlw $0x7,%%ymm2,%%ymm2 \n"
+ "vpackuswb %%ymm2,%%ymm0,%%ymm0 \n" // mutates.
+ "vpermd %%ymm0,%%ymm6,%%ymm0 \n" // unmutate.
+ "vpaddb %%ymm5,%%ymm0,%%ymm0 \n" // add 16 for Y
+ "vmovdqu %%ymm0,(%1) \n"
+ "lea 0x20(%1),%1 \n"
+ "sub $0x20,%2 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_y), // %1
+ "+r"(width) // %2
+ : "m"(kARGBToY), // %3
+ "m"(kAddY16), // %4
+ "m"(kPermdARGBToY_AVX) // %5
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6");
+}
+#endif // HAS_ARGBTOYROW_AVX2
+
+#ifdef HAS_ARGBTOYJROW_AVX2
+// Convert 32 ARGB pixels (128 bytes) to 32 Y values.
+void ARGBToYJRow_AVX2(const uint8_t* src_argb, uint8_t* dst_y, int width) {
+ asm volatile(
+ "vbroadcastf128 %3,%%ymm4 \n"
+ "vbroadcastf128 %4,%%ymm5 \n"
+ "vmovdqu %5,%%ymm6 \n"
+
+ LABELALIGN
+ "1: \n"
+ "vmovdqu (%0),%%ymm0 \n"
+ "vmovdqu 0x20(%0),%%ymm1 \n"
+ "vmovdqu 0x40(%0),%%ymm2 \n"
+ "vmovdqu 0x60(%0),%%ymm3 \n"
+ "vpmaddubsw %%ymm4,%%ymm0,%%ymm0 \n"
+ "vpmaddubsw %%ymm4,%%ymm1,%%ymm1 \n"
+ "vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n"
+ "vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n"
+ "lea 0x80(%0),%0 \n"
+ "vphaddw %%ymm1,%%ymm0,%%ymm0 \n" // mutates.
+ "vphaddw %%ymm3,%%ymm2,%%ymm2 \n"
+ "vpaddw %%ymm5,%%ymm0,%%ymm0 \n" // Add .5 for rounding.
+ "vpaddw %%ymm5,%%ymm2,%%ymm2 \n"
+ "vpsrlw $0x7,%%ymm0,%%ymm0 \n"
+ "vpsrlw $0x7,%%ymm2,%%ymm2 \n"
+ "vpackuswb %%ymm2,%%ymm0,%%ymm0 \n" // mutates.
+ "vpermd %%ymm0,%%ymm6,%%ymm0 \n" // unmutate.
+ "vmovdqu %%ymm0,(%1) \n"
+ "lea 0x20(%1),%1 \n"
+ "sub $0x20,%2 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_y), // %1
+ "+r"(width) // %2
+ : "m"(kARGBToYJ), // %3
+ "m"(kAddYJ64), // %4
+ "m"(kPermdARGBToY_AVX) // %5
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6");
+}
+#endif // HAS_ARGBTOYJROW_AVX2
+
+#ifdef HAS_ARGBTOUVROW_SSSE3
+void ARGBToUVRow_SSSE3(const uint8_t* src_argb0,
+ int src_stride_argb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ asm volatile(
+ "movdqa %5,%%xmm3 \n"
+ "movdqa %6,%%xmm4 \n"
+ "movdqa %7,%%xmm5 \n"
+ "sub %1,%2 \n"
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x00(%0,%4,1),%%xmm7 \n"
+ "pavgb %%xmm7,%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "movdqu 0x10(%0,%4,1),%%xmm7 \n"
+ "pavgb %%xmm7,%%xmm1 \n"
+ "movdqu 0x20(%0),%%xmm2 \n"
+ "movdqu 0x20(%0,%4,1),%%xmm7 \n"
+ "pavgb %%xmm7,%%xmm2 \n"
+ "movdqu 0x30(%0),%%xmm6 \n"
+ "movdqu 0x30(%0,%4,1),%%xmm7 \n"
+ "pavgb %%xmm7,%%xmm6 \n"
+
+ "lea 0x40(%0),%0 \n"
+ "movdqa %%xmm0,%%xmm7 \n"
+ "shufps $0x88,%%xmm1,%%xmm0 \n"
+ "shufps $0xdd,%%xmm1,%%xmm7 \n"
+ "pavgb %%xmm7,%%xmm0 \n"
+ "movdqa %%xmm2,%%xmm7 \n"
+ "shufps $0x88,%%xmm6,%%xmm2 \n"
+ "shufps $0xdd,%%xmm6,%%xmm7 \n"
+ "pavgb %%xmm7,%%xmm2 \n"
+ "movdqa %%xmm0,%%xmm1 \n"
+ "movdqa %%xmm2,%%xmm6 \n"
+ "pmaddubsw %%xmm4,%%xmm0 \n"
+ "pmaddubsw %%xmm4,%%xmm2 \n"
+ "pmaddubsw %%xmm3,%%xmm1 \n"
+ "pmaddubsw %%xmm3,%%xmm6 \n"
+ "phaddw %%xmm2,%%xmm0 \n"
+ "phaddw %%xmm6,%%xmm1 \n"
+ "psraw $0x8,%%xmm0 \n"
+ "psraw $0x8,%%xmm1 \n"
+ "packsswb %%xmm1,%%xmm0 \n"
+ "paddb %%xmm5,%%xmm0 \n"
+ "movlps %%xmm0,(%1) \n"
+ "movhps %%xmm0,0x00(%1,%2,1) \n"
+ "lea 0x8(%1),%1 \n"
+ "sub $0x10,%3 \n"
+ "jg 1b \n"
+ : "+r"(src_argb0), // %0
+ "+r"(dst_u), // %1
+ "+r"(dst_v), // %2
+ "+rm"(width) // %3
+ : "r"((intptr_t)(src_stride_argb)), // %4
+ "m"(kARGBToV), // %5
+ "m"(kARGBToU), // %6
+ "m"(kAddUV128) // %7
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm6", "xmm7");
+}
+#endif // HAS_ARGBTOUVROW_SSSE3
+
+#ifdef HAS_ARGBTOUVROW_AVX2
+// vpshufb for vphaddw + vpackuswb packed to shorts.
+static const lvec8 kShufARGBToUV_AVX = {
+ 0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15,
+ 0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15};
+void ARGBToUVRow_AVX2(const uint8_t* src_argb0,
+ int src_stride_argb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ asm volatile(
+ "vbroadcastf128 %5,%%ymm5 \n"
+ "vbroadcastf128 %6,%%ymm6 \n"
+ "vbroadcastf128 %7,%%ymm7 \n"
+ "sub %1,%2 \n"
+
+ LABELALIGN
+ "1: \n"
+ "vmovdqu (%0),%%ymm0 \n"
+ "vmovdqu 0x20(%0),%%ymm1 \n"
+ "vmovdqu 0x40(%0),%%ymm2 \n"
+ "vmovdqu 0x60(%0),%%ymm3 \n"
+ "vpavgb 0x00(%0,%4,1),%%ymm0,%%ymm0 \n"
+ "vpavgb 0x20(%0,%4,1),%%ymm1,%%ymm1 \n"
+ "vpavgb 0x40(%0,%4,1),%%ymm2,%%ymm2 \n"
+ "vpavgb 0x60(%0,%4,1),%%ymm3,%%ymm3 \n"
+ "lea 0x80(%0),%0 \n"
+ "vshufps $0x88,%%ymm1,%%ymm0,%%ymm4 \n"
+ "vshufps $0xdd,%%ymm1,%%ymm0,%%ymm0 \n"
+ "vpavgb %%ymm4,%%ymm0,%%ymm0 \n"
+ "vshufps $0x88,%%ymm3,%%ymm2,%%ymm4 \n"
+ "vshufps $0xdd,%%ymm3,%%ymm2,%%ymm2 \n"
+ "vpavgb %%ymm4,%%ymm2,%%ymm2 \n"
+
+ "vpmaddubsw %%ymm7,%%ymm0,%%ymm1 \n"
+ "vpmaddubsw %%ymm7,%%ymm2,%%ymm3 \n"
+ "vpmaddubsw %%ymm6,%%ymm0,%%ymm0 \n"
+ "vpmaddubsw %%ymm6,%%ymm2,%%ymm2 \n"
+ "vphaddw %%ymm3,%%ymm1,%%ymm1 \n"
+ "vphaddw %%ymm2,%%ymm0,%%ymm0 \n"
+ "vpsraw $0x8,%%ymm1,%%ymm1 \n"
+ "vpsraw $0x8,%%ymm0,%%ymm0 \n"
+ "vpacksswb %%ymm0,%%ymm1,%%ymm0 \n"
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n"
+ "vpshufb %8,%%ymm0,%%ymm0 \n"
+ "vpaddb %%ymm5,%%ymm0,%%ymm0 \n"
+
+ "vextractf128 $0x0,%%ymm0,(%1) \n"
+ "vextractf128 $0x1,%%ymm0,0x0(%1,%2,1) \n"
+ "lea 0x10(%1),%1 \n"
+ "sub $0x20,%3 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_argb0), // %0
+ "+r"(dst_u), // %1
+ "+r"(dst_v), // %2
+ "+rm"(width) // %3
+ : "r"((intptr_t)(src_stride_argb)), // %4
+ "m"(kAddUV128), // %5
+ "m"(kARGBToV), // %6
+ "m"(kARGBToU), // %7
+ "m"(kShufARGBToUV_AVX) // %8
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
+ "xmm7");
+}
+#endif // HAS_ARGBTOUVROW_AVX2
+
+#ifdef HAS_ARGBTOUVJROW_AVX2
+void ARGBToUVJRow_AVX2(const uint8_t* src_argb0,
+ int src_stride_argb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ asm volatile(
+ "vbroadcastf128 %5,%%ymm5 \n"
+ "vbroadcastf128 %6,%%ymm6 \n"
+ "vbroadcastf128 %7,%%ymm7 \n"
+ "sub %1,%2 \n"
+
+ LABELALIGN
+ "1: \n"
+ "vmovdqu (%0),%%ymm0 \n"
+ "vmovdqu 0x20(%0),%%ymm1 \n"
+ "vmovdqu 0x40(%0),%%ymm2 \n"
+ "vmovdqu 0x60(%0),%%ymm3 \n"
+ "vpavgb 0x00(%0,%4,1),%%ymm0,%%ymm0 \n"
+ "vpavgb 0x20(%0,%4,1),%%ymm1,%%ymm1 \n"
+ "vpavgb 0x40(%0,%4,1),%%ymm2,%%ymm2 \n"
+ "vpavgb 0x60(%0,%4,1),%%ymm3,%%ymm3 \n"
+ "lea 0x80(%0),%0 \n"
+ "vshufps $0x88,%%ymm1,%%ymm0,%%ymm4 \n"
+ "vshufps $0xdd,%%ymm1,%%ymm0,%%ymm0 \n"
+ "vpavgb %%ymm4,%%ymm0,%%ymm0 \n"
+ "vshufps $0x88,%%ymm3,%%ymm2,%%ymm4 \n"
+ "vshufps $0xdd,%%ymm3,%%ymm2,%%ymm2 \n"
+ "vpavgb %%ymm4,%%ymm2,%%ymm2 \n"
+
+ "vpmaddubsw %%ymm7,%%ymm0,%%ymm1 \n"
+ "vpmaddubsw %%ymm7,%%ymm2,%%ymm3 \n"
+ "vpmaddubsw %%ymm6,%%ymm0,%%ymm0 \n"
+ "vpmaddubsw %%ymm6,%%ymm2,%%ymm2 \n"
+ "vphaddw %%ymm3,%%ymm1,%%ymm1 \n"
+ "vphaddw %%ymm2,%%ymm0,%%ymm0 \n"
+ "vpaddw %%ymm5,%%ymm0,%%ymm0 \n"
+ "vpaddw %%ymm5,%%ymm1,%%ymm1 \n"
+ "vpsraw $0x8,%%ymm1,%%ymm1 \n"
+ "vpsraw $0x8,%%ymm0,%%ymm0 \n"
+ "vpacksswb %%ymm0,%%ymm1,%%ymm0 \n"
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n"
+ "vpshufb %8,%%ymm0,%%ymm0 \n"
+
+ "vextractf128 $0x0,%%ymm0,(%1) \n"
+ "vextractf128 $0x1,%%ymm0,0x0(%1,%2,1) \n"
+ "lea 0x10(%1),%1 \n"
+ "sub $0x20,%3 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_argb0), // %0
+ "+r"(dst_u), // %1
+ "+r"(dst_v), // %2
+ "+rm"(width) // %3
+ : "r"((intptr_t)(src_stride_argb)), // %4
+ "m"(kAddUVJ128), // %5
+ "m"(kARGBToVJ), // %6
+ "m"(kARGBToUJ), // %7
+ "m"(kShufARGBToUV_AVX) // %8
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
+ "xmm7");
+}
+#endif // HAS_ARGBTOUVJROW_AVX2
+
+#ifdef HAS_ARGBTOUVJROW_SSSE3
+void ARGBToUVJRow_SSSE3(const uint8_t* src_argb0,
+ int src_stride_argb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ asm volatile(
+ "movdqa %5,%%xmm3 \n"
+ "movdqa %6,%%xmm4 \n"
+ "movdqa %7,%%xmm5 \n"
+ "sub %1,%2 \n"
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x00(%0,%4,1),%%xmm7 \n"
+ "pavgb %%xmm7,%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "movdqu 0x10(%0,%4,1),%%xmm7 \n"
+ "pavgb %%xmm7,%%xmm1 \n"
+ "movdqu 0x20(%0),%%xmm2 \n"
+ "movdqu 0x20(%0,%4,1),%%xmm7 \n"
+ "pavgb %%xmm7,%%xmm2 \n"
+ "movdqu 0x30(%0),%%xmm6 \n"
+ "movdqu 0x30(%0,%4,1),%%xmm7 \n"
+ "pavgb %%xmm7,%%xmm6 \n"
+
+ "lea 0x40(%0),%0 \n"
+ "movdqa %%xmm0,%%xmm7 \n"
+ "shufps $0x88,%%xmm1,%%xmm0 \n"
+ "shufps $0xdd,%%xmm1,%%xmm7 \n"
+ "pavgb %%xmm7,%%xmm0 \n"
+ "movdqa %%xmm2,%%xmm7 \n"
+ "shufps $0x88,%%xmm6,%%xmm2 \n"
+ "shufps $0xdd,%%xmm6,%%xmm7 \n"
+ "pavgb %%xmm7,%%xmm2 \n"
+ "movdqa %%xmm0,%%xmm1 \n"
+ "movdqa %%xmm2,%%xmm6 \n"
+ "pmaddubsw %%xmm4,%%xmm0 \n"
+ "pmaddubsw %%xmm4,%%xmm2 \n"
+ "pmaddubsw %%xmm3,%%xmm1 \n"
+ "pmaddubsw %%xmm3,%%xmm6 \n"
+ "phaddw %%xmm2,%%xmm0 \n"
+ "phaddw %%xmm6,%%xmm1 \n"
+ "paddw %%xmm5,%%xmm0 \n"
+ "paddw %%xmm5,%%xmm1 \n"
+ "psraw $0x8,%%xmm0 \n"
+ "psraw $0x8,%%xmm1 \n"
+ "packsswb %%xmm1,%%xmm0 \n"
+ "movlps %%xmm0,(%1) \n"
+ "movhps %%xmm0,0x00(%1,%2,1) \n"
+ "lea 0x8(%1),%1 \n"
+ "sub $0x10,%3 \n"
+ "jg 1b \n"
+ : "+r"(src_argb0), // %0
+ "+r"(dst_u), // %1
+ "+r"(dst_v), // %2
+ "+rm"(width) // %3
+ : "r"((intptr_t)(src_stride_argb)), // %4
+ "m"(kARGBToVJ), // %5
+ "m"(kARGBToUJ), // %6
+ "m"(kAddUVJ128) // %7
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm6", "xmm7");
+}
+#endif // HAS_ARGBTOUVJROW_SSSE3
+
+#ifdef HAS_ARGBTOUV444ROW_SSSE3
+void ARGBToUV444Row_SSSE3(const uint8_t* src_argb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ asm volatile(
+ "movdqa %4,%%xmm3 \n"
+ "movdqa %5,%%xmm4 \n"
+ "movdqa %6,%%xmm5 \n"
+ "sub %1,%2 \n"
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "movdqu 0x20(%0),%%xmm2 \n"
+ "movdqu 0x30(%0),%%xmm6 \n"
+ "pmaddubsw %%xmm4,%%xmm0 \n"
+ "pmaddubsw %%xmm4,%%xmm1 \n"
+ "pmaddubsw %%xmm4,%%xmm2 \n"
+ "pmaddubsw %%xmm4,%%xmm6 \n"
+ "phaddw %%xmm1,%%xmm0 \n"
+ "phaddw %%xmm6,%%xmm2 \n"
+ "psraw $0x8,%%xmm0 \n"
+ "psraw $0x8,%%xmm2 \n"
+ "packsswb %%xmm2,%%xmm0 \n"
+ "paddb %%xmm5,%%xmm0 \n"
+ "movdqu %%xmm0,(%1) \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "movdqu 0x20(%0),%%xmm2 \n"
+ "movdqu 0x30(%0),%%xmm6 \n"
+ "pmaddubsw %%xmm3,%%xmm0 \n"
+ "pmaddubsw %%xmm3,%%xmm1 \n"
+ "pmaddubsw %%xmm3,%%xmm2 \n"
+ "pmaddubsw %%xmm3,%%xmm6 \n"
+ "phaddw %%xmm1,%%xmm0 \n"
+ "phaddw %%xmm6,%%xmm2 \n"
+ "psraw $0x8,%%xmm0 \n"
+ "psraw $0x8,%%xmm2 \n"
+ "packsswb %%xmm2,%%xmm0 \n"
+ "paddb %%xmm5,%%xmm0 \n"
+ "lea 0x40(%0),%0 \n"
+ "movdqu %%xmm0,0x00(%1,%2,1) \n"
+ "lea 0x10(%1),%1 \n"
+ "sub $0x10,%3 \n"
+ "jg 1b \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_u), // %1
+ "+r"(dst_v), // %2
+ "+rm"(width) // %3
+ : "m"(kARGBToV), // %4
+ "m"(kARGBToU), // %5
+ "m"(kAddUV128) // %6
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm6");
+}
+#endif // HAS_ARGBTOUV444ROW_SSSE3
+
+void BGRAToYRow_SSSE3(const uint8_t* src_bgra, uint8_t* dst_y, int width) {
+ asm volatile(
+ "movdqa %4,%%xmm5 \n"
+ "movdqa %3,%%xmm4 \n"
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "movdqu 0x20(%0),%%xmm2 \n"
+ "movdqu 0x30(%0),%%xmm3 \n"
+ "pmaddubsw %%xmm4,%%xmm0 \n"
+ "pmaddubsw %%xmm4,%%xmm1 \n"
+ "pmaddubsw %%xmm4,%%xmm2 \n"
+ "pmaddubsw %%xmm4,%%xmm3 \n"
+ "lea 0x40(%0),%0 \n"
+ "phaddw %%xmm1,%%xmm0 \n"
+ "phaddw %%xmm3,%%xmm2 \n"
+ "psrlw $0x7,%%xmm0 \n"
+ "psrlw $0x7,%%xmm2 \n"
+ "packuswb %%xmm2,%%xmm0 \n"
+ "paddb %%xmm5,%%xmm0 \n"
+ "movdqu %%xmm0,(%1) \n"
+ "lea 0x10(%1),%1 \n"
+ "sub $0x10,%2 \n"
+ "jg 1b \n"
+ : "+r"(src_bgra), // %0
+ "+r"(dst_y), // %1
+ "+r"(width) // %2
+ : "m"(kBGRAToY), // %3
+ "m"(kAddY16) // %4
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5");
+}
+
+void BGRAToUVRow_SSSE3(const uint8_t* src_bgra0,
+ int src_stride_bgra,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ asm volatile(
+ "movdqa %5,%%xmm3 \n"
+ "movdqa %6,%%xmm4 \n"
+ "movdqa %7,%%xmm5 \n"
+ "sub %1,%2 \n"
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x00(%0,%4,1),%%xmm7 \n"
+ "pavgb %%xmm7,%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "movdqu 0x10(%0,%4,1),%%xmm7 \n"
+ "pavgb %%xmm7,%%xmm1 \n"
+ "movdqu 0x20(%0),%%xmm2 \n"
+ "movdqu 0x20(%0,%4,1),%%xmm7 \n"
+ "pavgb %%xmm7,%%xmm2 \n"
+ "movdqu 0x30(%0),%%xmm6 \n"
+ "movdqu 0x30(%0,%4,1),%%xmm7 \n"
+ "pavgb %%xmm7,%%xmm6 \n"
+
+ "lea 0x40(%0),%0 \n"
+ "movdqa %%xmm0,%%xmm7 \n"
+ "shufps $0x88,%%xmm1,%%xmm0 \n"
+ "shufps $0xdd,%%xmm1,%%xmm7 \n"
+ "pavgb %%xmm7,%%xmm0 \n"
+ "movdqa %%xmm2,%%xmm7 \n"
+ "shufps $0x88,%%xmm6,%%xmm2 \n"
+ "shufps $0xdd,%%xmm6,%%xmm7 \n"
+ "pavgb %%xmm7,%%xmm2 \n"
+ "movdqa %%xmm0,%%xmm1 \n"
+ "movdqa %%xmm2,%%xmm6 \n"
+ "pmaddubsw %%xmm4,%%xmm0 \n"
+ "pmaddubsw %%xmm4,%%xmm2 \n"
+ "pmaddubsw %%xmm3,%%xmm1 \n"
+ "pmaddubsw %%xmm3,%%xmm6 \n"
+ "phaddw %%xmm2,%%xmm0 \n"
+ "phaddw %%xmm6,%%xmm1 \n"
+ "psraw $0x8,%%xmm0 \n"
+ "psraw $0x8,%%xmm1 \n"
+ "packsswb %%xmm1,%%xmm0 \n"
+ "paddb %%xmm5,%%xmm0 \n"
+ "movlps %%xmm0,(%1) \n"
+ "movhps %%xmm0,0x00(%1,%2,1) \n"
+ "lea 0x8(%1),%1 \n"
+ "sub $0x10,%3 \n"
+ "jg 1b \n"
+ : "+r"(src_bgra0), // %0
+ "+r"(dst_u), // %1
+ "+r"(dst_v), // %2
+ "+rm"(width) // %3
+ : "r"((intptr_t)(src_stride_bgra)), // %4
+ "m"(kBGRAToV), // %5
+ "m"(kBGRAToU), // %6
+ "m"(kAddUV128) // %7
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm6", "xmm7");
+}
+
+void ABGRToYRow_SSSE3(const uint8_t* src_abgr, uint8_t* dst_y, int width) {
+ asm volatile(
+ "movdqa %4,%%xmm5 \n"
+ "movdqa %3,%%xmm4 \n"
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "movdqu 0x20(%0),%%xmm2 \n"
+ "movdqu 0x30(%0),%%xmm3 \n"
+ "pmaddubsw %%xmm4,%%xmm0 \n"
+ "pmaddubsw %%xmm4,%%xmm1 \n"
+ "pmaddubsw %%xmm4,%%xmm2 \n"
+ "pmaddubsw %%xmm4,%%xmm3 \n"
+ "lea 0x40(%0),%0 \n"
+ "phaddw %%xmm1,%%xmm0 \n"
+ "phaddw %%xmm3,%%xmm2 \n"
+ "psrlw $0x7,%%xmm0 \n"
+ "psrlw $0x7,%%xmm2 \n"
+ "packuswb %%xmm2,%%xmm0 \n"
+ "paddb %%xmm5,%%xmm0 \n"
+ "movdqu %%xmm0,(%1) \n"
+ "lea 0x10(%1),%1 \n"
+ "sub $0x10,%2 \n"
+ "jg 1b \n"
+ : "+r"(src_abgr), // %0
+ "+r"(dst_y), // %1
+ "+r"(width) // %2
+ : "m"(kABGRToY), // %3
+ "m"(kAddY16) // %4
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5");
+}
+
+void RGBAToYRow_SSSE3(const uint8_t* src_rgba, uint8_t* dst_y, int width) {
+ asm volatile(
+ "movdqa %4,%%xmm5 \n"
+ "movdqa %3,%%xmm4 \n"
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "movdqu 0x20(%0),%%xmm2 \n"
+ "movdqu 0x30(%0),%%xmm3 \n"
+ "pmaddubsw %%xmm4,%%xmm0 \n"
+ "pmaddubsw %%xmm4,%%xmm1 \n"
+ "pmaddubsw %%xmm4,%%xmm2 \n"
+ "pmaddubsw %%xmm4,%%xmm3 \n"
+ "lea 0x40(%0),%0 \n"
+ "phaddw %%xmm1,%%xmm0 \n"
+ "phaddw %%xmm3,%%xmm2 \n"
+ "psrlw $0x7,%%xmm0 \n"
+ "psrlw $0x7,%%xmm2 \n"
+ "packuswb %%xmm2,%%xmm0 \n"
+ "paddb %%xmm5,%%xmm0 \n"
+ "movdqu %%xmm0,(%1) \n"
+ "lea 0x10(%1),%1 \n"
+ "sub $0x10,%2 \n"
+ "jg 1b \n"
+ : "+r"(src_rgba), // %0
+ "+r"(dst_y), // %1
+ "+r"(width) // %2
+ : "m"(kRGBAToY), // %3
+ "m"(kAddY16) // %4
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5");
+}
+
+void ABGRToUVRow_SSSE3(const uint8_t* src_abgr0,
+ int src_stride_abgr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ asm volatile(
+ "movdqa %5,%%xmm3 \n"
+ "movdqa %6,%%xmm4 \n"
+ "movdqa %7,%%xmm5 \n"
+ "sub %1,%2 \n"
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x00(%0,%4,1),%%xmm7 \n"
+ "pavgb %%xmm7,%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "movdqu 0x10(%0,%4,1),%%xmm7 \n"
+ "pavgb %%xmm7,%%xmm1 \n"
+ "movdqu 0x20(%0),%%xmm2 \n"
+ "movdqu 0x20(%0,%4,1),%%xmm7 \n"
+ "pavgb %%xmm7,%%xmm2 \n"
+ "movdqu 0x30(%0),%%xmm6 \n"
+ "movdqu 0x30(%0,%4,1),%%xmm7 \n"
+ "pavgb %%xmm7,%%xmm6 \n"
+
+ "lea 0x40(%0),%0 \n"
+ "movdqa %%xmm0,%%xmm7 \n"
+ "shufps $0x88,%%xmm1,%%xmm0 \n"
+ "shufps $0xdd,%%xmm1,%%xmm7 \n"
+ "pavgb %%xmm7,%%xmm0 \n"
+ "movdqa %%xmm2,%%xmm7 \n"
+ "shufps $0x88,%%xmm6,%%xmm2 \n"
+ "shufps $0xdd,%%xmm6,%%xmm7 \n"
+ "pavgb %%xmm7,%%xmm2 \n"
+ "movdqa %%xmm0,%%xmm1 \n"
+ "movdqa %%xmm2,%%xmm6 \n"
+ "pmaddubsw %%xmm4,%%xmm0 \n"
+ "pmaddubsw %%xmm4,%%xmm2 \n"
+ "pmaddubsw %%xmm3,%%xmm1 \n"
+ "pmaddubsw %%xmm3,%%xmm6 \n"
+ "phaddw %%xmm2,%%xmm0 \n"
+ "phaddw %%xmm6,%%xmm1 \n"
+ "psraw $0x8,%%xmm0 \n"
+ "psraw $0x8,%%xmm1 \n"
+ "packsswb %%xmm1,%%xmm0 \n"
+ "paddb %%xmm5,%%xmm0 \n"
+ "movlps %%xmm0,(%1) \n"
+ "movhps %%xmm0,0x00(%1,%2,1) \n"
+ "lea 0x8(%1),%1 \n"
+ "sub $0x10,%3 \n"
+ "jg 1b \n"
+ : "+r"(src_abgr0), // %0
+ "+r"(dst_u), // %1
+ "+r"(dst_v), // %2
+ "+rm"(width) // %3
+ : "r"((intptr_t)(src_stride_abgr)), // %4
+ "m"(kABGRToV), // %5
+ "m"(kABGRToU), // %6
+ "m"(kAddUV128) // %7
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm6", "xmm7");
+}
+
+void RGBAToUVRow_SSSE3(const uint8_t* src_rgba0,
+ int src_stride_rgba,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ asm volatile(
+ "movdqa %5,%%xmm3 \n"
+ "movdqa %6,%%xmm4 \n"
+ "movdqa %7,%%xmm5 \n"
+ "sub %1,%2 \n"
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x00(%0,%4,1),%%xmm7 \n"
+ "pavgb %%xmm7,%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "movdqu 0x10(%0,%4,1),%%xmm7 \n"
+ "pavgb %%xmm7,%%xmm1 \n"
+ "movdqu 0x20(%0),%%xmm2 \n"
+ "movdqu 0x20(%0,%4,1),%%xmm7 \n"
+ "pavgb %%xmm7,%%xmm2 \n"
+ "movdqu 0x30(%0),%%xmm6 \n"
+ "movdqu 0x30(%0,%4,1),%%xmm7 \n"
+ "pavgb %%xmm7,%%xmm6 \n"
+
+ "lea 0x40(%0),%0 \n"
+ "movdqa %%xmm0,%%xmm7 \n"
+ "shufps $0x88,%%xmm1,%%xmm0 \n"
+ "shufps $0xdd,%%xmm1,%%xmm7 \n"
+ "pavgb %%xmm7,%%xmm0 \n"
+ "movdqa %%xmm2,%%xmm7 \n"
+ "shufps $0x88,%%xmm6,%%xmm2 \n"
+ "shufps $0xdd,%%xmm6,%%xmm7 \n"
+ "pavgb %%xmm7,%%xmm2 \n"
+ "movdqa %%xmm0,%%xmm1 \n"
+ "movdqa %%xmm2,%%xmm6 \n"
+ "pmaddubsw %%xmm4,%%xmm0 \n"
+ "pmaddubsw %%xmm4,%%xmm2 \n"
+ "pmaddubsw %%xmm3,%%xmm1 \n"
+ "pmaddubsw %%xmm3,%%xmm6 \n"
+ "phaddw %%xmm2,%%xmm0 \n"
+ "phaddw %%xmm6,%%xmm1 \n"
+ "psraw $0x8,%%xmm0 \n"
+ "psraw $0x8,%%xmm1 \n"
+ "packsswb %%xmm1,%%xmm0 \n"
+ "paddb %%xmm5,%%xmm0 \n"
+ "movlps %%xmm0,(%1) \n"
+ "movhps %%xmm0,0x00(%1,%2,1) \n"
+ "lea 0x8(%1),%1 \n"
+ "sub $0x10,%3 \n"
+ "jg 1b \n"
+ : "+r"(src_rgba0), // %0
+ "+r"(dst_u), // %1
+ "+r"(dst_v), // %2
+ "+rm"(width) // %3
+ : "r"((intptr_t)(src_stride_rgba)), // %4
+ "m"(kRGBAToV), // %5
+ "m"(kRGBAToU), // %6
+ "m"(kAddUV128) // %7
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm6", "xmm7");
+}
+
+#if defined(HAS_I422TOARGBROW_SSSE3) || defined(HAS_I422TOARGBROW_AVX2)
+
+// Read 8 UV from 444
+#define READYUV444 \
+ "movq (%[u_buf]),%%xmm0 \n" \
+ "movq 0x00(%[u_buf],%[v_buf],1),%%xmm1 \n" \
+ "lea 0x8(%[u_buf]),%[u_buf] \n" \
+ "punpcklbw %%xmm1,%%xmm0 \n" \
+ "movq (%[y_buf]),%%xmm4 \n" \
+ "punpcklbw %%xmm4,%%xmm4 \n" \
+ "lea 0x8(%[y_buf]),%[y_buf] \n"
+
+// Read 4 UV from 422, upsample to 8 UV
+#define READYUV422 \
+ "movd (%[u_buf]),%%xmm0 \n" \
+ "movd 0x00(%[u_buf],%[v_buf],1),%%xmm1 \n" \
+ "lea 0x4(%[u_buf]),%[u_buf] \n" \
+ "punpcklbw %%xmm1,%%xmm0 \n" \
+ "punpcklwd %%xmm0,%%xmm0 \n" \
+ "movq (%[y_buf]),%%xmm4 \n" \
+ "punpcklbw %%xmm4,%%xmm4 \n" \
+ "lea 0x8(%[y_buf]),%[y_buf] \n"
+
+// Read 4 UV from 422 10 bit, upsample to 8 UV
+// TODO(fbarchard): Consider shufb to replace pack/unpack
+// TODO(fbarchard): Consider pmulhuw to replace psraw
+// TODO(fbarchard): Consider pmullw to replace psllw and allow different bits.
+#define READYUV210 \
+ "movq (%[u_buf]),%%xmm0 \n" \
+ "movq 0x00(%[u_buf],%[v_buf],1),%%xmm1 \n" \
+ "lea 0x8(%[u_buf]),%[u_buf] \n" \
+ "punpcklwd %%xmm1,%%xmm0 \n" \
+ "psraw $0x2,%%xmm0 \n" \
+ "packuswb %%xmm0,%%xmm0 \n" \
+ "punpcklwd %%xmm0,%%xmm0 \n" \
+ "movdqu (%[y_buf]),%%xmm4 \n" \
+ "psllw $0x6,%%xmm4 \n" \
+ "lea 0x10(%[y_buf]),%[y_buf] \n"
+
+// Read 4 UV from 422, upsample to 8 UV. With 8 Alpha.
+#define READYUVA422 \
+ "movd (%[u_buf]),%%xmm0 \n" \
+ "movd 0x00(%[u_buf],%[v_buf],1),%%xmm1 \n" \
+ "lea 0x4(%[u_buf]),%[u_buf] \n" \
+ "punpcklbw %%xmm1,%%xmm0 \n" \
+ "punpcklwd %%xmm0,%%xmm0 \n" \
+ "movq (%[y_buf]),%%xmm4 \n" \
+ "punpcklbw %%xmm4,%%xmm4 \n" \
+ "lea 0x8(%[y_buf]),%[y_buf] \n" \
+ "movq (%[a_buf]),%%xmm5 \n" \
+ "lea 0x8(%[a_buf]),%[a_buf] \n"
+
+// Read 4 UV from NV12, upsample to 8 UV
+#define READNV12 \
+ "movq (%[uv_buf]),%%xmm0 \n" \
+ "lea 0x8(%[uv_buf]),%[uv_buf] \n" \
+ "punpcklwd %%xmm0,%%xmm0 \n" \
+ "movq (%[y_buf]),%%xmm4 \n" \
+ "punpcklbw %%xmm4,%%xmm4 \n" \
+ "lea 0x8(%[y_buf]),%[y_buf] \n"
+
+// Read 4 VU from NV21, upsample to 8 UV
+#define READNV21 \
+ "movq (%[vu_buf]),%%xmm0 \n" \
+ "lea 0x8(%[vu_buf]),%[vu_buf] \n" \
+ "pshufb %[kShuffleNV21], %%xmm0 \n" \
+ "movq (%[y_buf]),%%xmm4 \n" \
+ "punpcklbw %%xmm4,%%xmm4 \n" \
+ "lea 0x8(%[y_buf]),%[y_buf] \n"
+
+// Read 4 YUY2 with 8 Y and update 4 UV to 8 UV.
+#define READYUY2 \
+ "movdqu (%[yuy2_buf]),%%xmm4 \n" \
+ "pshufb %[kShuffleYUY2Y], %%xmm4 \n" \
+ "movdqu (%[yuy2_buf]),%%xmm0 \n" \
+ "pshufb %[kShuffleYUY2UV], %%xmm0 \n" \
+ "lea 0x10(%[yuy2_buf]),%[yuy2_buf] \n"
+
+// Read 4 UYVY with 8 Y and update 4 UV to 8 UV.
+#define READUYVY \
+ "movdqu (%[uyvy_buf]),%%xmm4 \n" \
+ "pshufb %[kShuffleUYVYY], %%xmm4 \n" \
+ "movdqu (%[uyvy_buf]),%%xmm0 \n" \
+ "pshufb %[kShuffleUYVYUV], %%xmm0 \n" \
+ "lea 0x10(%[uyvy_buf]),%[uyvy_buf] \n"
+
+#if defined(__x86_64__)
+#define YUVTORGB_SETUP(yuvconstants) \
+ "movdqa (%[yuvconstants]),%%xmm8 \n" \
+ "movdqa 32(%[yuvconstants]),%%xmm9 \n" \
+ "movdqa 64(%[yuvconstants]),%%xmm10 \n" \
+ "movdqa 96(%[yuvconstants]),%%xmm11 \n" \
+ "movdqa 128(%[yuvconstants]),%%xmm12 \n" \
+ "movdqa 160(%[yuvconstants]),%%xmm13 \n" \
+ "movdqa 192(%[yuvconstants]),%%xmm14 \n"
+// Convert 8 pixels: 8 UV and 8 Y
+#define YUVTORGB16(yuvconstants) \
+ "movdqa %%xmm0,%%xmm1 \n" \
+ "movdqa %%xmm0,%%xmm2 \n" \
+ "movdqa %%xmm0,%%xmm3 \n" \
+ "movdqa %%xmm11,%%xmm0 \n" \
+ "pmaddubsw %%xmm8,%%xmm1 \n" \
+ "psubw %%xmm1,%%xmm0 \n" \
+ "movdqa %%xmm12,%%xmm1 \n" \
+ "pmaddubsw %%xmm9,%%xmm2 \n" \
+ "psubw %%xmm2,%%xmm1 \n" \
+ "movdqa %%xmm13,%%xmm2 \n" \
+ "pmaddubsw %%xmm10,%%xmm3 \n" \
+ "psubw %%xmm3,%%xmm2 \n" \
+ "pmulhuw %%xmm14,%%xmm4 \n" \
+ "paddsw %%xmm4,%%xmm0 \n" \
+ "paddsw %%xmm4,%%xmm1 \n" \
+ "paddsw %%xmm4,%%xmm2 \n"
+#define YUVTORGB_REGS \
+ "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14",
+
+#else
+#define YUVTORGB_SETUP(yuvconstants)
+// Convert 8 pixels: 8 UV and 8 Y
+#define YUVTORGB16(yuvconstants) \
+ "movdqa %%xmm0,%%xmm1 \n" \
+ "movdqa %%xmm0,%%xmm2 \n" \
+ "movdqa %%xmm0,%%xmm3 \n" \
+ "movdqa 96(%[yuvconstants]),%%xmm0 \n" \
+ "pmaddubsw (%[yuvconstants]),%%xmm1 \n" \
+ "psubw %%xmm1,%%xmm0 \n" \
+ "movdqa 128(%[yuvconstants]),%%xmm1 \n" \
+ "pmaddubsw 32(%[yuvconstants]),%%xmm2 \n" \
+ "psubw %%xmm2,%%xmm1 \n" \
+ "movdqa 160(%[yuvconstants]),%%xmm2 \n" \
+ "pmaddubsw 64(%[yuvconstants]),%%xmm3 \n" \
+ "psubw %%xmm3,%%xmm2 \n" \
+ "pmulhuw 192(%[yuvconstants]),%%xmm4 \n" \
+ "paddsw %%xmm4,%%xmm0 \n" \
+ "paddsw %%xmm4,%%xmm1 \n" \
+ "paddsw %%xmm4,%%xmm2 \n"
+#define YUVTORGB_REGS
+#endif
+
+#define YUVTORGB(yuvconstants) \
+ YUVTORGB16(yuvconstants) \
+ "psraw $0x6,%%xmm0 \n" \
+ "psraw $0x6,%%xmm1 \n" \
+ "psraw $0x6,%%xmm2 \n" \
+ "packuswb %%xmm0,%%xmm0 \n" \
+ "packuswb %%xmm1,%%xmm1 \n" \
+ "packuswb %%xmm2,%%xmm2 \n"
+
+// Store 8 ARGB values.
+#define STOREARGB \
+ "punpcklbw %%xmm1,%%xmm0 \n" \
+ "punpcklbw %%xmm5,%%xmm2 \n" \
+ "movdqa %%xmm0,%%xmm1 \n" \
+ "punpcklwd %%xmm2,%%xmm0 \n" \
+ "punpckhwd %%xmm2,%%xmm1 \n" \
+ "movdqu %%xmm0,(%[dst_argb]) \n" \
+ "movdqu %%xmm1,0x10(%[dst_argb]) \n" \
+ "lea 0x20(%[dst_argb]), %[dst_argb] \n"
+
+// Store 8 RGBA values.
+#define STORERGBA \
+ "pcmpeqb %%xmm5,%%xmm5 \n" \
+ "punpcklbw %%xmm2,%%xmm1 \n" \
+ "punpcklbw %%xmm0,%%xmm5 \n" \
+ "movdqa %%xmm5,%%xmm0 \n" \
+ "punpcklwd %%xmm1,%%xmm5 \n" \
+ "punpckhwd %%xmm1,%%xmm0 \n" \
+ "movdqu %%xmm5,(%[dst_rgba]) \n" \
+ "movdqu %%xmm0,0x10(%[dst_rgba]) \n" \
+ "lea 0x20(%[dst_rgba]),%[dst_rgba] \n"
+
+// Store 8 AR30 values.
+#define STOREAR30 \
+ "psraw $0x4,%%xmm0 \n" \
+ "psraw $0x4,%%xmm1 \n" \
+ "psraw $0x4,%%xmm2 \n" \
+ "pminsw %%xmm7,%%xmm0 \n" \
+ "pminsw %%xmm7,%%xmm1 \n" \
+ "pminsw %%xmm7,%%xmm2 \n" \
+ "pmaxsw %%xmm6,%%xmm0 \n" \
+ "pmaxsw %%xmm6,%%xmm1 \n" \
+ "pmaxsw %%xmm6,%%xmm2 \n" \
+ "psllw $0x4,%%xmm2 \n" \
+ "movdqa %%xmm0,%%xmm3 \n" \
+ "punpcklwd %%xmm2,%%xmm0 \n" \
+ "punpckhwd %%xmm2,%%xmm3 \n" \
+ "movdqa %%xmm1,%%xmm2 \n" \
+ "punpcklwd %%xmm5,%%xmm1 \n" \
+ "punpckhwd %%xmm5,%%xmm2 \n" \
+ "pslld $0xa,%%xmm1 \n" \
+ "pslld $0xa,%%xmm2 \n" \
+ "por %%xmm1,%%xmm0 \n" \
+ "por %%xmm2,%%xmm3 \n" \
+ "movdqu %%xmm0,(%[dst_ar30]) \n" \
+ "movdqu %%xmm3,0x10(%[dst_ar30]) \n" \
+ "lea 0x20(%[dst_ar30]), %[dst_ar30] \n"
+
+void OMITFP I444ToARGBRow_SSSE3(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ asm volatile (
+ YUVTORGB_SETUP(yuvconstants)
+ "sub %[u_buf],%[v_buf] \n"
+ "pcmpeqb %%xmm5,%%xmm5 \n"
+
+ LABELALIGN
+ "1: \n"
+ READYUV444
+ YUVTORGB(yuvconstants)
+ STOREARGB
+ "sub $0x8,%[width] \n"
+ "jg 1b \n"
+ : [y_buf]"+r"(y_buf), // %[y_buf]
+ [u_buf]"+r"(u_buf), // %[u_buf]
+ [v_buf]"+r"(v_buf), // %[v_buf]
+ [dst_argb]"+r"(dst_argb), // %[dst_argb]
+ [width]"+rm"(width) // %[width]
+ : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
+ : "memory", "cc", YUVTORGB_REGS
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
+ );
+}
+
+void OMITFP I422ToRGB24Row_SSSE3(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_rgb24,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ asm volatile (
+ YUVTORGB_SETUP(yuvconstants)
+ "movdqa %[kShuffleMaskARGBToRGB24_0],%%xmm5 \n"
+ "movdqa %[kShuffleMaskARGBToRGB24],%%xmm6 \n"
+ "sub %[u_buf],%[v_buf] \n"
+
+ LABELALIGN
+ "1: \n"
+ READYUV422
+ YUVTORGB(yuvconstants)
+ "punpcklbw %%xmm1,%%xmm0 \n"
+ "punpcklbw %%xmm2,%%xmm2 \n"
+ "movdqa %%xmm0,%%xmm1 \n"
+ "punpcklwd %%xmm2,%%xmm0 \n"
+ "punpckhwd %%xmm2,%%xmm1 \n"
+ "pshufb %%xmm5,%%xmm0 \n"
+ "pshufb %%xmm6,%%xmm1 \n"
+ "palignr $0xc,%%xmm0,%%xmm1 \n"
+ "movq %%xmm0,(%[dst_rgb24]) \n"
+ "movdqu %%xmm1,0x8(%[dst_rgb24]) \n"
+ "lea 0x18(%[dst_rgb24]),%[dst_rgb24] \n"
+ "subl $0x8,%[width] \n"
+ "jg 1b \n"
+ : [y_buf]"+r"(y_buf), // %[y_buf]
+ [u_buf]"+r"(u_buf), // %[u_buf]
+ [v_buf]"+r"(v_buf), // %[v_buf]
+ [dst_rgb24]"+r"(dst_rgb24), // %[dst_rgb24]
+#if defined(__i386__)
+ [width]"+m"(width) // %[width]
+#else
+ [width]"+rm"(width) // %[width]
+#endif
+ : [yuvconstants]"r"(yuvconstants), // %[yuvconstants]
+ [kShuffleMaskARGBToRGB24_0]"m"(kShuffleMaskARGBToRGB24_0),
+ [kShuffleMaskARGBToRGB24]"m"(kShuffleMaskARGBToRGB24)
+ : "memory", "cc", YUVTORGB_REGS
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
+ );
+}
+
+void OMITFP I422ToARGBRow_SSSE3(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ asm volatile (
+ YUVTORGB_SETUP(yuvconstants)
+ "sub %[u_buf],%[v_buf] \n"
+ "pcmpeqb %%xmm5,%%xmm5 \n"
+
+ LABELALIGN
+ "1: \n"
+ READYUV422
+ YUVTORGB(yuvconstants)
+ STOREARGB
+ "sub $0x8,%[width] \n"
+ "jg 1b \n"
+ : [y_buf]"+r"(y_buf), // %[y_buf]
+ [u_buf]"+r"(u_buf), // %[u_buf]
+ [v_buf]"+r"(v_buf), // %[v_buf]
+ [dst_argb]"+r"(dst_argb), // %[dst_argb]
+ [width]"+rm"(width) // %[width]
+ : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
+ : "memory", "cc", YUVTORGB_REGS
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
+ );
+}
+
+void OMITFP I422ToAR30Row_SSSE3(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ar30,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ asm volatile (
+ YUVTORGB_SETUP(yuvconstants)
+ "sub %[u_buf],%[v_buf] \n"
+ "pcmpeqb %%xmm5,%%xmm5 \n" // AR30 constants
+ "psrlw $14,%%xmm5 \n"
+ "psllw $4,%%xmm5 \n" // 2 alpha bits
+ "pxor %%xmm6,%%xmm6 \n"
+ "pcmpeqb %%xmm7,%%xmm7 \n" // 0 for min
+ "psrlw $6,%%xmm7 \n" // 1023 for max
+
+ LABELALIGN
+ "1: \n"
+ READYUV422
+ YUVTORGB16(yuvconstants)
+ STOREAR30
+ "sub $0x8,%[width] \n"
+ "jg 1b \n"
+ : [y_buf]"+r"(y_buf), // %[y_buf]
+ [u_buf]"+r"(u_buf), // %[u_buf]
+ [v_buf]"+r"(v_buf), // %[v_buf]
+ [dst_ar30]"+r"(dst_ar30), // %[dst_ar30]
+ [width]"+rm"(width) // %[width]
+ : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
+ : "memory", "cc", YUVTORGB_REGS
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
+ );
+}
+
+// 10 bit YUV to ARGB
+void OMITFP I210ToARGBRow_SSSE3(const uint16_t* y_buf,
+ const uint16_t* u_buf,
+ const uint16_t* v_buf,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ asm volatile (
+ YUVTORGB_SETUP(yuvconstants)
+ "sub %[u_buf],%[v_buf] \n"
+ "pcmpeqb %%xmm5,%%xmm5 \n"
+
+ LABELALIGN
+ "1: \n"
+ READYUV210
+ YUVTORGB(yuvconstants)
+ STOREARGB
+ "sub $0x8,%[width] \n"
+ "jg 1b \n"
+ : [y_buf]"+r"(y_buf), // %[y_buf]
+ [u_buf]"+r"(u_buf), // %[u_buf]
+ [v_buf]"+r"(v_buf), // %[v_buf]
+ [dst_argb]"+r"(dst_argb), // %[dst_argb]
+ [width]"+rm"(width) // %[width]
+ : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
+ : "memory", "cc", YUVTORGB_REGS
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
+ );
+}
+
+// 10 bit YUV to AR30
+void OMITFP I210ToAR30Row_SSSE3(const uint16_t* y_buf,
+ const uint16_t* u_buf,
+ const uint16_t* v_buf,
+ uint8_t* dst_ar30,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ asm volatile (
+ YUVTORGB_SETUP(yuvconstants)
+ "sub %[u_buf],%[v_buf] \n"
+ "pcmpeqb %%xmm5,%%xmm5 \n"
+ "psrlw $14,%%xmm5 \n"
+ "psllw $4,%%xmm5 \n" // 2 alpha bits
+ "pxor %%xmm6,%%xmm6 \n"
+ "pcmpeqb %%xmm7,%%xmm7 \n" // 0 for min
+ "psrlw $6,%%xmm7 \n" // 1023 for max
+
+ LABELALIGN
+ "1: \n"
+ READYUV210
+ YUVTORGB16(yuvconstants)
+ STOREAR30
+ "sub $0x8,%[width] \n"
+ "jg 1b \n"
+ : [y_buf]"+r"(y_buf), // %[y_buf]
+ [u_buf]"+r"(u_buf), // %[u_buf]
+ [v_buf]"+r"(v_buf), // %[v_buf]
+ [dst_ar30]"+r"(dst_ar30), // %[dst_ar30]
+ [width]"+rm"(width) // %[width]
+ : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
+ : "memory", "cc", YUVTORGB_REGS
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
+ );
+}
+
+#ifdef HAS_I422ALPHATOARGBROW_SSSE3
+void OMITFP I422AlphaToARGBRow_SSSE3(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ const uint8_t* a_buf,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ // clang-format off
+ asm volatile (
+ YUVTORGB_SETUP(yuvconstants)
+ "sub %[u_buf],%[v_buf] \n"
+
+ LABELALIGN
+ "1: \n"
+ READYUVA422
+ YUVTORGB(yuvconstants)
+ STOREARGB
+ "subl $0x8,%[width] \n"
+ "jg 1b \n"
+ : [y_buf]"+r"(y_buf), // %[y_buf]
+ [u_buf]"+r"(u_buf), // %[u_buf]
+ [v_buf]"+r"(v_buf), // %[v_buf]
+ [a_buf]"+r"(a_buf), // %[a_buf]
+ [dst_argb]"+r"(dst_argb), // %[dst_argb]
+#if defined(__i386__)
+ [width]"+m"(width) // %[width]
+#else
+ [width]"+rm"(width) // %[width]
+#endif
+ : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
+ : "memory", "cc", YUVTORGB_REGS
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
+ );
+ // clang-format on
+}
+#endif // HAS_I422ALPHATOARGBROW_SSSE3
+
+void OMITFP NV12ToARGBRow_SSSE3(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ // clang-format off
+ asm volatile (
+ YUVTORGB_SETUP(yuvconstants)
+ "pcmpeqb %%xmm5,%%xmm5 \n"
+
+ LABELALIGN
+ "1: \n"
+ READNV12
+ YUVTORGB(yuvconstants)
+ STOREARGB
+ "sub $0x8,%[width] \n"
+ "jg 1b \n"
+ : [y_buf]"+r"(y_buf), // %[y_buf]
+ [uv_buf]"+r"(uv_buf), // %[uv_buf]
+ [dst_argb]"+r"(dst_argb), // %[dst_argb]
+ [width]"+rm"(width) // %[width]
+ : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
+ : "memory", "cc", YUVTORGB_REGS
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
+ );
+ // clang-format on
+}
+
+void OMITFP NV21ToARGBRow_SSSE3(const uint8_t* y_buf,
+ const uint8_t* vu_buf,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ // clang-format off
+ asm volatile (
+ YUVTORGB_SETUP(yuvconstants)
+ "pcmpeqb %%xmm5,%%xmm5 \n"
+
+ LABELALIGN
+ "1: \n"
+ READNV21
+ YUVTORGB(yuvconstants)
+ STOREARGB
+ "sub $0x8,%[width] \n"
+ "jg 1b \n"
+ : [y_buf]"+r"(y_buf), // %[y_buf]
+ [vu_buf]"+r"(vu_buf), // %[vu_buf]
+ [dst_argb]"+r"(dst_argb), // %[dst_argb]
+ [width]"+rm"(width) // %[width]
+ : [yuvconstants]"r"(yuvconstants), // %[yuvconstants]
+ [kShuffleNV21]"m"(kShuffleNV21)
+ : "memory", "cc", YUVTORGB_REGS
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
+ );
+ // clang-format on
+}
+
+void OMITFP YUY2ToARGBRow_SSSE3(const uint8_t* yuy2_buf,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ // clang-format off
+ asm volatile (
+ YUVTORGB_SETUP(yuvconstants)
+ "pcmpeqb %%xmm5,%%xmm5 \n"
+
+ LABELALIGN
+ "1: \n"
+ READYUY2
+ YUVTORGB(yuvconstants)
+ STOREARGB
+ "sub $0x8,%[width] \n"
+ "jg 1b \n"
+ : [yuy2_buf]"+r"(yuy2_buf), // %[yuy2_buf]
+ [dst_argb]"+r"(dst_argb), // %[dst_argb]
+ [width]"+rm"(width) // %[width]
+ : [yuvconstants]"r"(yuvconstants), // %[yuvconstants]
+ [kShuffleYUY2Y]"m"(kShuffleYUY2Y),
+ [kShuffleYUY2UV]"m"(kShuffleYUY2UV)
+ : "memory", "cc", YUVTORGB_REGS
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
+ );
+ // clang-format on
+}
+
+void OMITFP UYVYToARGBRow_SSSE3(const uint8_t* uyvy_buf,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ // clang-format off
+ asm volatile (
+ YUVTORGB_SETUP(yuvconstants)
+ "pcmpeqb %%xmm5,%%xmm5 \n"
+
+ LABELALIGN
+ "1: \n"
+ READUYVY
+ YUVTORGB(yuvconstants)
+ STOREARGB
+ "sub $0x8,%[width] \n"
+ "jg 1b \n"
+ : [uyvy_buf]"+r"(uyvy_buf), // %[uyvy_buf]
+ [dst_argb]"+r"(dst_argb), // %[dst_argb]
+ [width]"+rm"(width) // %[width]
+ : [yuvconstants]"r"(yuvconstants), // %[yuvconstants]
+ [kShuffleUYVYY]"m"(kShuffleUYVYY),
+ [kShuffleUYVYUV]"m"(kShuffleUYVYUV)
+ : "memory", "cc", YUVTORGB_REGS
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
+ );
+ // clang-format on
+}
+
+void OMITFP I422ToRGBARow_SSSE3(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_rgba,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ asm volatile (
+ YUVTORGB_SETUP(yuvconstants)
+ "sub %[u_buf],%[v_buf] \n"
+ "pcmpeqb %%xmm5,%%xmm5 \n"
+
+ LABELALIGN
+ "1: \n"
+ READYUV422
+ YUVTORGB(yuvconstants)
+ STORERGBA
+ "sub $0x8,%[width] \n"
+ "jg 1b \n"
+ : [y_buf]"+r"(y_buf), // %[y_buf]
+ [u_buf]"+r"(u_buf), // %[u_buf]
+ [v_buf]"+r"(v_buf), // %[v_buf]
+ [dst_rgba]"+r"(dst_rgba), // %[dst_rgba]
+ [width]"+rm"(width) // %[width]
+ : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
+ : "memory", "cc", YUVTORGB_REGS
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
+ );
+}
+
+#endif // HAS_I422TOARGBROW_SSSE3
+
+// Read 16 UV from 444
+#define READYUV444_AVX2 \
+ "vmovdqu (%[u_buf]),%%xmm0 \n" \
+ "vmovdqu 0x00(%[u_buf],%[v_buf],1),%%xmm1 \n" \
+ "lea 0x10(%[u_buf]),%[u_buf] \n" \
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n" \
+ "vpermq $0xd8,%%ymm1,%%ymm1 \n" \
+ "vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" \
+ "vmovdqu (%[y_buf]),%%xmm4 \n" \
+ "vpermq $0xd8,%%ymm4,%%ymm4 \n" \
+ "vpunpcklbw %%ymm4,%%ymm4,%%ymm4 \n" \
+ "lea 0x10(%[y_buf]),%[y_buf] \n"
+
+// Read 8 UV from 422, upsample to 16 UV.
+#define READYUV422_AVX2 \
+ "vmovq (%[u_buf]),%%xmm0 \n" \
+ "vmovq 0x00(%[u_buf],%[v_buf],1),%%xmm1 \n" \
+ "lea 0x8(%[u_buf]),%[u_buf] \n" \
+ "vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" \
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n" \
+ "vpunpcklwd %%ymm0,%%ymm0,%%ymm0 \n" \
+ "vmovdqu (%[y_buf]),%%xmm4 \n" \
+ "vpermq $0xd8,%%ymm4,%%ymm4 \n" \
+ "vpunpcklbw %%ymm4,%%ymm4,%%ymm4 \n" \
+ "lea 0x10(%[y_buf]),%[y_buf] \n"
+
+// Read 8 UV from 210 10 bit, upsample to 16 UV
+// TODO(fbarchard): Consider vshufb to replace pack/unpack
+// TODO(fbarchard): Consider vunpcklpd to combine the 2 registers into 1.
+#define READYUV210_AVX2 \
+ "vmovdqu (%[u_buf]),%%xmm0 \n" \
+ "vmovdqu 0x00(%[u_buf],%[v_buf],1),%%xmm1 \n" \
+ "lea 0x10(%[u_buf]),%[u_buf] \n" \
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n" \
+ "vpermq $0xd8,%%ymm1,%%ymm1 \n" \
+ "vpunpcklwd %%ymm1,%%ymm0,%%ymm0 \n" \
+ "vpsraw $0x2,%%ymm0,%%ymm0 \n" \
+ "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n" \
+ "vpunpcklwd %%ymm0,%%ymm0,%%ymm0 \n" \
+ "vmovdqu (%[y_buf]),%%ymm4 \n" \
+ "vpsllw $0x6,%%ymm4,%%ymm4 \n" \
+ "lea 0x20(%[y_buf]),%[y_buf] \n"
+
+// Read 8 UV from 422, upsample to 16 UV. With 16 Alpha.
+#define READYUVA422_AVX2 \
+ "vmovq (%[u_buf]),%%xmm0 \n" \
+ "vmovq 0x00(%[u_buf],%[v_buf],1),%%xmm1 \n" \
+ "lea 0x8(%[u_buf]),%[u_buf] \n" \
+ "vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" \
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n" \
+ "vpunpcklwd %%ymm0,%%ymm0,%%ymm0 \n" \
+ "vmovdqu (%[y_buf]),%%xmm4 \n" \
+ "vpermq $0xd8,%%ymm4,%%ymm4 \n" \
+ "vpunpcklbw %%ymm4,%%ymm4,%%ymm4 \n" \
+ "lea 0x10(%[y_buf]),%[y_buf] \n" \
+ "vmovdqu (%[a_buf]),%%xmm5 \n" \
+ "vpermq $0xd8,%%ymm5,%%ymm5 \n" \
+ "lea 0x10(%[a_buf]),%[a_buf] \n"
+
+// Read 8 UV from NV12, upsample to 16 UV.
+#define READNV12_AVX2 \
+ "vmovdqu (%[uv_buf]),%%xmm0 \n" \
+ "lea 0x10(%[uv_buf]),%[uv_buf] \n" \
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n" \
+ "vpunpcklwd %%ymm0,%%ymm0,%%ymm0 \n" \
+ "vmovdqu (%[y_buf]),%%xmm4 \n" \
+ "vpermq $0xd8,%%ymm4,%%ymm4 \n" \
+ "vpunpcklbw %%ymm4,%%ymm4,%%ymm4 \n" \
+ "lea 0x10(%[y_buf]),%[y_buf] \n"
+
+// Read 8 VU from NV21, upsample to 16 UV.
+#define READNV21_AVX2 \
+ "vmovdqu (%[vu_buf]),%%xmm0 \n" \
+ "lea 0x10(%[vu_buf]),%[vu_buf] \n" \
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n" \
+ "vpshufb %[kShuffleNV21], %%ymm0, %%ymm0 \n" \
+ "vmovdqu (%[y_buf]),%%xmm4 \n" \
+ "vpermq $0xd8,%%ymm4,%%ymm4 \n" \
+ "vpunpcklbw %%ymm4,%%ymm4,%%ymm4 \n" \
+ "lea 0x10(%[y_buf]),%[y_buf] \n"
+
+// Read 8 YUY2 with 16 Y and upsample 8 UV to 16 UV.
+#define READYUY2_AVX2 \
+ "vmovdqu (%[yuy2_buf]),%%ymm4 \n" \
+ "vpshufb %[kShuffleYUY2Y], %%ymm4, %%ymm4 \n" \
+ "vmovdqu (%[yuy2_buf]),%%ymm0 \n" \
+ "vpshufb %[kShuffleYUY2UV], %%ymm0, %%ymm0 \n" \
+ "lea 0x20(%[yuy2_buf]),%[yuy2_buf] \n"
+
+// Read 8 UYVY with 16 Y and upsample 8 UV to 16 UV.
+#define READUYVY_AVX2 \
+ "vmovdqu (%[uyvy_buf]),%%ymm4 \n" \
+ "vpshufb %[kShuffleUYVYY], %%ymm4, %%ymm4 \n" \
+ "vmovdqu (%[uyvy_buf]),%%ymm0 \n" \
+ "vpshufb %[kShuffleUYVYUV], %%ymm0, %%ymm0 \n" \
+ "lea 0x20(%[uyvy_buf]),%[uyvy_buf] \n"
+
+#if defined(__x86_64__)
+#define YUVTORGB_SETUP_AVX2(yuvconstants) \
+ "vmovdqa (%[yuvconstants]),%%ymm8 \n" \
+ "vmovdqa 32(%[yuvconstants]),%%ymm9 \n" \
+ "vmovdqa 64(%[yuvconstants]),%%ymm10 \n" \
+ "vmovdqa 96(%[yuvconstants]),%%ymm11 \n" \
+ "vmovdqa 128(%[yuvconstants]),%%ymm12 \n" \
+ "vmovdqa 160(%[yuvconstants]),%%ymm13 \n" \
+ "vmovdqa 192(%[yuvconstants]),%%ymm14 \n"
+
+#define YUVTORGB16_AVX2(yuvconstants) \
+ "vpmaddubsw %%ymm10,%%ymm0,%%ymm2 \n" \
+ "vpmaddubsw %%ymm9,%%ymm0,%%ymm1 \n" \
+ "vpmaddubsw %%ymm8,%%ymm0,%%ymm0 \n" \
+ "vpsubw %%ymm2,%%ymm13,%%ymm2 \n" \
+ "vpsubw %%ymm1,%%ymm12,%%ymm1 \n" \
+ "vpsubw %%ymm0,%%ymm11,%%ymm0 \n" \
+ "vpmulhuw %%ymm14,%%ymm4,%%ymm4 \n" \
+ "vpaddsw %%ymm4,%%ymm0,%%ymm0 \n" \
+ "vpaddsw %%ymm4,%%ymm1,%%ymm1 \n" \
+ "vpaddsw %%ymm4,%%ymm2,%%ymm2 \n"
+
+#define YUVTORGB_REGS_AVX2 \
+ "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14",
+
+#else // Convert 16 pixels: 16 UV and 16 Y.
+
+#define YUVTORGB_SETUP_AVX2(yuvconstants)
+#define YUVTORGB16_AVX2(yuvconstants) \
+ "vpmaddubsw 64(%[yuvconstants]),%%ymm0,%%ymm2 \n" \
+ "vpmaddubsw 32(%[yuvconstants]),%%ymm0,%%ymm1 \n" \
+ "vpmaddubsw (%[yuvconstants]),%%ymm0,%%ymm0 \n" \
+ "vmovdqu 160(%[yuvconstants]),%%ymm3 \n" \
+ "vpsubw %%ymm2,%%ymm3,%%ymm2 \n" \
+ "vmovdqu 128(%[yuvconstants]),%%ymm3 \n" \
+ "vpsubw %%ymm1,%%ymm3,%%ymm1 \n" \
+ "vmovdqu 96(%[yuvconstants]),%%ymm3 \n" \
+ "vpsubw %%ymm0,%%ymm3,%%ymm0 \n" \
+ "vpmulhuw 192(%[yuvconstants]),%%ymm4,%%ymm4 \n" \
+ "vpaddsw %%ymm4,%%ymm0,%%ymm0 \n" \
+ "vpaddsw %%ymm4,%%ymm1,%%ymm1 \n" \
+ "vpaddsw %%ymm4,%%ymm2,%%ymm2 \n"
+#define YUVTORGB_REGS_AVX2
+#endif
+
+#define YUVTORGB_AVX2(yuvconstants) \
+ YUVTORGB16_AVX2(yuvconstants) \
+ "vpsraw $0x6,%%ymm0,%%ymm0 \n" \
+ "vpsraw $0x6,%%ymm1,%%ymm1 \n" \
+ "vpsraw $0x6,%%ymm2,%%ymm2 \n" \
+ "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n" \
+ "vpackuswb %%ymm1,%%ymm1,%%ymm1 \n" \
+ "vpackuswb %%ymm2,%%ymm2,%%ymm2 \n"
+
+// Store 16 ARGB values.
+#define STOREARGB_AVX2 \
+ "vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" \
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n" \
+ "vpunpcklbw %%ymm5,%%ymm2,%%ymm2 \n" \
+ "vpermq $0xd8,%%ymm2,%%ymm2 \n" \
+ "vpunpcklwd %%ymm2,%%ymm0,%%ymm1 \n" \
+ "vpunpckhwd %%ymm2,%%ymm0,%%ymm0 \n" \
+ "vmovdqu %%ymm1,(%[dst_argb]) \n" \
+ "vmovdqu %%ymm0,0x20(%[dst_argb]) \n" \
+ "lea 0x40(%[dst_argb]), %[dst_argb] \n"
+
+// Store 16 AR30 values.
+#define STOREAR30_AVX2 \
+ "vpsraw $0x4,%%ymm0,%%ymm0 \n" \
+ "vpsraw $0x4,%%ymm1,%%ymm1 \n" \
+ "vpsraw $0x4,%%ymm2,%%ymm2 \n" \
+ "vpminsw %%ymm7,%%ymm0,%%ymm0 \n" \
+ "vpminsw %%ymm7,%%ymm1,%%ymm1 \n" \
+ "vpminsw %%ymm7,%%ymm2,%%ymm2 \n" \
+ "vpmaxsw %%ymm6,%%ymm0,%%ymm0 \n" \
+ "vpmaxsw %%ymm6,%%ymm1,%%ymm1 \n" \
+ "vpmaxsw %%ymm6,%%ymm2,%%ymm2 \n" \
+ "vpsllw $0x4,%%ymm2,%%ymm2 \n" \
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n" \
+ "vpermq $0xd8,%%ymm1,%%ymm1 \n" \
+ "vpermq $0xd8,%%ymm2,%%ymm2 \n" \
+ "vpunpckhwd %%ymm2,%%ymm0,%%ymm3 \n" \
+ "vpunpcklwd %%ymm2,%%ymm0,%%ymm0 \n" \
+ "vpunpckhwd %%ymm5,%%ymm1,%%ymm2 \n" \
+ "vpunpcklwd %%ymm5,%%ymm1,%%ymm1 \n" \
+ "vpslld $0xa,%%ymm1,%%ymm1 \n" \
+ "vpslld $0xa,%%ymm2,%%ymm2 \n" \
+ "vpor %%ymm1,%%ymm0,%%ymm0 \n" \
+ "vpor %%ymm2,%%ymm3,%%ymm3 \n" \
+ "vmovdqu %%ymm0,(%[dst_ar30]) \n" \
+ "vmovdqu %%ymm3,0x20(%[dst_ar30]) \n" \
+ "lea 0x40(%[dst_ar30]), %[dst_ar30] \n"
+
+#ifdef HAS_I444TOARGBROW_AVX2
+// 16 pixels
+// 16 UV values with 16 Y producing 16 ARGB (64 bytes).
+void OMITFP I444ToARGBRow_AVX2(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ asm volatile (
+ YUVTORGB_SETUP_AVX2(yuvconstants)
+ "sub %[u_buf],%[v_buf] \n"
+ "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
+
+ LABELALIGN
+ "1: \n"
+ READYUV444_AVX2
+ YUVTORGB_AVX2(yuvconstants)
+ STOREARGB_AVX2
+ "sub $0x10,%[width] \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : [y_buf]"+r"(y_buf), // %[y_buf]
+ [u_buf]"+r"(u_buf), // %[u_buf]
+ [v_buf]"+r"(v_buf), // %[v_buf]
+ [dst_argb]"+r"(dst_argb), // %[dst_argb]
+ [width]"+rm"(width) // %[width]
+ : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
+ : "memory", "cc", YUVTORGB_REGS_AVX2
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
+ );
+}
+#endif // HAS_I444TOARGBROW_AVX2
+
+#if defined(HAS_I422TOARGBROW_AVX2)
+// 16 pixels
+// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
+void OMITFP I422ToARGBRow_AVX2(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ asm volatile (
+ YUVTORGB_SETUP_AVX2(yuvconstants)
+ "sub %[u_buf],%[v_buf] \n"
+ "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
+
+ LABELALIGN
+ "1: \n"
+ READYUV422_AVX2
+ YUVTORGB_AVX2(yuvconstants)
+ STOREARGB_AVX2
+ "sub $0x10,%[width] \n"
+ "jg 1b \n"
+
+ "vzeroupper \n"
+ : [y_buf]"+r"(y_buf), // %[y_buf]
+ [u_buf]"+r"(u_buf), // %[u_buf]
+ [v_buf]"+r"(v_buf), // %[v_buf]
+ [dst_argb]"+r"(dst_argb), // %[dst_argb]
+ [width]"+rm"(width) // %[width]
+ : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
+ : "memory", "cc", YUVTORGB_REGS_AVX2
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
+ );
+}
+#endif // HAS_I422TOARGBROW_AVX2
+
+#if defined(HAS_I422TOAR30ROW_AVX2)
+// 16 pixels
+// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 AR30 (64 bytes).
+void OMITFP I422ToAR30Row_AVX2(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ar30,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ asm volatile (
+ YUVTORGB_SETUP_AVX2(yuvconstants)
+ "sub %[u_buf],%[v_buf] \n"
+ "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" // AR30 constants
+ "vpsrlw $14,%%ymm5,%%ymm5 \n"
+ "vpsllw $4,%%ymm5,%%ymm5 \n" // 2 alpha bits
+ "vpxor %%ymm6,%%ymm6,%%ymm6 \n" // 0 for min
+ "vpcmpeqb %%ymm7,%%ymm7,%%ymm7 \n" // 1023 for max
+ "vpsrlw $6,%%ymm7,%%ymm7 \n"
+
+ LABELALIGN
+ "1: \n"
+ READYUV422_AVX2
+ YUVTORGB16_AVX2(yuvconstants)
+ STOREAR30_AVX2
+ "sub $0x10,%[width] \n"
+ "jg 1b \n"
+
+ "vzeroupper \n"
+ : [y_buf]"+r"(y_buf), // %[y_buf]
+ [u_buf]"+r"(u_buf), // %[u_buf]
+ [v_buf]"+r"(v_buf), // %[v_buf]
+ [dst_ar30]"+r"(dst_ar30), // %[dst_ar30]
+ [width]"+rm"(width) // %[width]
+ : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
+ : "memory", "cc", YUVTORGB_REGS_AVX2
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
+ );
+}
+#endif // HAS_I422TOAR30ROW_AVX2
+
+#if defined(HAS_I210TOARGBROW_AVX2)
+// 16 pixels
+// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
+void OMITFP I210ToARGBRow_AVX2(const uint16_t* y_buf,
+ const uint16_t* u_buf,
+ const uint16_t* v_buf,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ asm volatile (
+ YUVTORGB_SETUP_AVX2(yuvconstants)
+ "sub %[u_buf],%[v_buf] \n"
+ "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
+
+ LABELALIGN
+ "1: \n"
+ READYUV210_AVX2
+ YUVTORGB_AVX2(yuvconstants)
+ STOREARGB_AVX2
+ "sub $0x10,%[width] \n"
+ "jg 1b \n"
+
+ "vzeroupper \n"
+ : [y_buf]"+r"(y_buf), // %[y_buf]
+ [u_buf]"+r"(u_buf), // %[u_buf]
+ [v_buf]"+r"(v_buf), // %[v_buf]
+ [dst_argb]"+r"(dst_argb), // %[dst_argb]
+ [width]"+rm"(width) // %[width]
+ : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
+ : "memory", "cc", YUVTORGB_REGS_AVX2
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
+ );
+}
+#endif // HAS_I210TOARGBROW_AVX2
+
+#if defined(HAS_I210TOAR30ROW_AVX2)
+// 16 pixels
+// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 AR30 (64 bytes).
+void OMITFP I210ToAR30Row_AVX2(const uint16_t* y_buf,
+ const uint16_t* u_buf,
+ const uint16_t* v_buf,
+ uint8_t* dst_ar30,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ asm volatile (
+ YUVTORGB_SETUP_AVX2(yuvconstants)
+ "sub %[u_buf],%[v_buf] \n"
+ "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" // AR30 constants
+ "vpsrlw $14,%%ymm5,%%ymm5 \n"
+ "vpsllw $4,%%ymm5,%%ymm5 \n" // 2 alpha bits
+ "vpxor %%ymm6,%%ymm6,%%ymm6 \n" // 0 for min
+ "vpcmpeqb %%ymm7,%%ymm7,%%ymm7 \n" // 1023 for max
+ "vpsrlw $6,%%ymm7,%%ymm7 \n"
+
+ LABELALIGN
+ "1: \n"
+ READYUV210_AVX2
+ YUVTORGB16_AVX2(yuvconstants)
+ STOREAR30_AVX2
+ "sub $0x10,%[width] \n"
+ "jg 1b \n"
+
+ "vzeroupper \n"
+ : [y_buf]"+r"(y_buf), // %[y_buf]
+ [u_buf]"+r"(u_buf), // %[u_buf]
+ [v_buf]"+r"(v_buf), // %[v_buf]
+ [dst_ar30]"+r"(dst_ar30), // %[dst_ar30]
+ [width]"+rm"(width) // %[width]
+ : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
+ : "memory", "cc", YUVTORGB_REGS_AVX2
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
+ );
+}
+#endif // HAS_I210TOAR30ROW_AVX2
+
+#if defined(HAS_I422ALPHATOARGBROW_AVX2)
+// 16 pixels
+// 8 UV values upsampled to 16 UV, mixed with 16 Y and 16 A producing 16 ARGB.
+void OMITFP I422AlphaToARGBRow_AVX2(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ const uint8_t* a_buf,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ // clang-format off
+ asm volatile (
+ YUVTORGB_SETUP_AVX2(yuvconstants)
+ "sub %[u_buf],%[v_buf] \n"
+
+ LABELALIGN
+ "1: \n"
+ READYUVA422_AVX2
+ YUVTORGB_AVX2(yuvconstants)
+ STOREARGB_AVX2
+ "subl $0x10,%[width] \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : [y_buf]"+r"(y_buf), // %[y_buf]
+ [u_buf]"+r"(u_buf), // %[u_buf]
+ [v_buf]"+r"(v_buf), // %[v_buf]
+ [a_buf]"+r"(a_buf), // %[a_buf]
+ [dst_argb]"+r"(dst_argb), // %[dst_argb]
+#if defined(__i386__)
+ [width]"+m"(width) // %[width]
+#else
+ [width]"+rm"(width) // %[width]
+#endif
+ : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
+ : "memory", "cc", YUVTORGB_REGS_AVX2
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
+ );
+ // clang-format on
+}
+#endif // HAS_I422ALPHATOARGBROW_AVX2
+
+#if defined(HAS_I422TORGBAROW_AVX2)
+// 16 pixels
+// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 RGBA (64 bytes).
+void OMITFP I422ToRGBARow_AVX2(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ asm volatile (
+ YUVTORGB_SETUP_AVX2(yuvconstants)
+ "sub %[u_buf],%[v_buf] \n"
+ "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
+
+ LABELALIGN
+ "1: \n"
+ READYUV422_AVX2
+ YUVTORGB_AVX2(yuvconstants)
+
+ // Step 3: Weave into RGBA
+ "vpunpcklbw %%ymm2,%%ymm1,%%ymm1 \n"
+ "vpermq $0xd8,%%ymm1,%%ymm1 \n"
+ "vpunpcklbw %%ymm0,%%ymm5,%%ymm2 \n"
+ "vpermq $0xd8,%%ymm2,%%ymm2 \n"
+ "vpunpcklwd %%ymm1,%%ymm2,%%ymm0 \n"
+ "vpunpckhwd %%ymm1,%%ymm2,%%ymm1 \n"
+ "vmovdqu %%ymm0,(%[dst_argb]) \n"
+ "vmovdqu %%ymm1,0x20(%[dst_argb]) \n"
+ "lea 0x40(%[dst_argb]),%[dst_argb] \n"
+ "sub $0x10,%[width] \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : [y_buf]"+r"(y_buf), // %[y_buf]
+ [u_buf]"+r"(u_buf), // %[u_buf]
+ [v_buf]"+r"(v_buf), // %[v_buf]
+ [dst_argb]"+r"(dst_argb), // %[dst_argb]
+ [width]"+rm"(width) // %[width]
+ : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
+ : "memory", "cc", YUVTORGB_REGS_AVX2
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
+ );
+}
+#endif // HAS_I422TORGBAROW_AVX2
+
+#if defined(HAS_NV12TOARGBROW_AVX2)
+// 16 pixels.
+// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
+void OMITFP NV12ToARGBRow_AVX2(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ // clang-format off
+ asm volatile (
+ YUVTORGB_SETUP_AVX2(yuvconstants)
+ "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
+
+ LABELALIGN
+ "1: \n"
+ READNV12_AVX2
+ YUVTORGB_AVX2(yuvconstants)
+ STOREARGB_AVX2
+ "sub $0x10,%[width] \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : [y_buf]"+r"(y_buf), // %[y_buf]
+ [uv_buf]"+r"(uv_buf), // %[uv_buf]
+ [dst_argb]"+r"(dst_argb), // %[dst_argb]
+ [width]"+rm"(width) // %[width]
+ : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
+ : "memory", "cc", YUVTORGB_REGS_AVX2
+ "xmm0", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
+ );
+ // clang-format on
+}
+#endif // HAS_NV12TOARGBROW_AVX2
+
+#if defined(HAS_NV21TOARGBROW_AVX2)
+// 16 pixels.
+// 8 VU values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
+void OMITFP NV21ToARGBRow_AVX2(const uint8_t* y_buf,
+ const uint8_t* vu_buf,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ // clang-format off
+ asm volatile (
+ YUVTORGB_SETUP_AVX2(yuvconstants)
+ "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
+
+ LABELALIGN
+ "1: \n"
+ READNV21_AVX2
+ YUVTORGB_AVX2(yuvconstants)
+ STOREARGB_AVX2
+ "sub $0x10,%[width] \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : [y_buf]"+r"(y_buf), // %[y_buf]
+ [vu_buf]"+r"(vu_buf), // %[vu_buf]
+ [dst_argb]"+r"(dst_argb), // %[dst_argb]
+ [width]"+rm"(width) // %[width]
+ : [yuvconstants]"r"(yuvconstants), // %[yuvconstants]
+ [kShuffleNV21]"m"(kShuffleNV21)
+ : "memory", "cc", YUVTORGB_REGS_AVX2
+ "xmm0", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
+ );
+ // clang-format on
+}
+#endif // HAS_NV21TOARGBROW_AVX2
+
+#if defined(HAS_YUY2TOARGBROW_AVX2)
+// 16 pixels.
+// 8 YUY2 values with 16 Y and 8 UV producing 16 ARGB (64 bytes).
+void OMITFP YUY2ToARGBRow_AVX2(const uint8_t* yuy2_buf,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ // clang-format off
+ asm volatile (
+ YUVTORGB_SETUP_AVX2(yuvconstants)
+ "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
+
+ LABELALIGN
+ "1: \n"
+ READYUY2_AVX2
+ YUVTORGB_AVX2(yuvconstants)
+ STOREARGB_AVX2
+ "sub $0x10,%[width] \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : [yuy2_buf]"+r"(yuy2_buf), // %[yuy2_buf]
+ [dst_argb]"+r"(dst_argb), // %[dst_argb]
+ [width]"+rm"(width) // %[width]
+ : [yuvconstants]"r"(yuvconstants), // %[yuvconstants]
+ [kShuffleYUY2Y]"m"(kShuffleYUY2Y),
+ [kShuffleYUY2UV]"m"(kShuffleYUY2UV)
+ : "memory", "cc", YUVTORGB_REGS_AVX2
+ "xmm0", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
+ );
+ // clang-format on
+}
+#endif // HAS_YUY2TOARGBROW_AVX2
+
+#if defined(HAS_UYVYTOARGBROW_AVX2)
+// 16 pixels.
+// 8 UYVY values with 16 Y and 8 UV producing 16 ARGB (64 bytes).
+void OMITFP UYVYToARGBRow_AVX2(const uint8_t* uyvy_buf,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ // clang-format off
+ asm volatile (
+ YUVTORGB_SETUP_AVX2(yuvconstants)
+ "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
+
+ LABELALIGN
+ "1: \n"
+ READUYVY_AVX2
+ YUVTORGB_AVX2(yuvconstants)
+ STOREARGB_AVX2
+ "sub $0x10,%[width] \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : [uyvy_buf]"+r"(uyvy_buf), // %[uyvy_buf]
+ [dst_argb]"+r"(dst_argb), // %[dst_argb]
+ [width]"+rm"(width) // %[width]
+ : [yuvconstants]"r"(yuvconstants), // %[yuvconstants]
+ [kShuffleUYVYY]"m"(kShuffleUYVYY),
+ [kShuffleUYVYUV]"m"(kShuffleUYVYUV)
+ : "memory", "cc", YUVTORGB_REGS_AVX2
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
+ );
+ // clang-format on
+}
+#endif // HAS_UYVYTOARGBROW_AVX2
+
+#ifdef HAS_I400TOARGBROW_SSE2
+void I400ToARGBRow_SSE2(const uint8_t* y_buf, uint8_t* dst_argb, int width) {
+ asm volatile(
+ "mov $0x4a354a35,%%eax \n" // 4a35 = 18997 = 1.164
+ "movd %%eax,%%xmm2 \n"
+ "pshufd $0x0,%%xmm2,%%xmm2 \n"
+ "mov $0x04880488,%%eax \n" // 0488 = 1160 = 1.164 *
+ // 16
+ "movd %%eax,%%xmm3 \n"
+ "pshufd $0x0,%%xmm3,%%xmm3 \n"
+ "pcmpeqb %%xmm4,%%xmm4 \n"
+ "pslld $0x18,%%xmm4 \n"
+
+ LABELALIGN
+ "1: \n"
+ // Step 1: Scale Y contribution to 8 G values. G = (y - 16) * 1.164
+ "movq (%0),%%xmm0 \n"
+ "lea 0x8(%0),%0 \n"
+ "punpcklbw %%xmm0,%%xmm0 \n"
+ "pmulhuw %%xmm2,%%xmm0 \n"
+ "psubusw %%xmm3,%%xmm0 \n"
+ "psrlw $6, %%xmm0 \n"
+ "packuswb %%xmm0,%%xmm0 \n"
+
+ // Step 2: Weave into ARGB
+ "punpcklbw %%xmm0,%%xmm0 \n"
+ "movdqa %%xmm0,%%xmm1 \n"
+ "punpcklwd %%xmm0,%%xmm0 \n"
+ "punpckhwd %%xmm1,%%xmm1 \n"
+ "por %%xmm4,%%xmm0 \n"
+ "por %%xmm4,%%xmm1 \n"
+ "movdqu %%xmm0,(%1) \n"
+ "movdqu %%xmm1,0x10(%1) \n"
+ "lea 0x20(%1),%1 \n"
+
+ "sub $0x8,%2 \n"
+ "jg 1b \n"
+ : "+r"(y_buf), // %0
+ "+r"(dst_argb), // %1
+ "+rm"(width) // %2
+ :
+ : "memory", "cc", "eax", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4");
+}
+#endif // HAS_I400TOARGBROW_SSE2
+
+#ifdef HAS_I400TOARGBROW_AVX2
+// 16 pixels of Y converted to 16 pixels of ARGB (64 bytes).
+// note: vpunpcklbw mutates and vpackuswb unmutates.
+void I400ToARGBRow_AVX2(const uint8_t* y_buf, uint8_t* dst_argb, int width) {
+ asm volatile(
+ "mov $0x4a354a35,%%eax \n" // 0488 = 1160 = 1.164 *
+ // 16
+ "vmovd %%eax,%%xmm2 \n"
+ "vbroadcastss %%xmm2,%%ymm2 \n"
+ "mov $0x4880488,%%eax \n" // 4a35 = 18997 = 1.164
+ "vmovd %%eax,%%xmm3 \n"
+ "vbroadcastss %%xmm3,%%ymm3 \n"
+ "vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n"
+ "vpslld $0x18,%%ymm4,%%ymm4 \n"
+
+ LABELALIGN
+ "1: \n"
+ // Step 1: Scale Y contribution to 16 G values. G = (y - 16) * 1.164
+ "vmovdqu (%0),%%xmm0 \n"
+ "lea 0x10(%0),%0 \n"
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n"
+ "vpunpcklbw %%ymm0,%%ymm0,%%ymm0 \n"
+ "vpmulhuw %%ymm2,%%ymm0,%%ymm0 \n"
+ "vpsubusw %%ymm3,%%ymm0,%%ymm0 \n"
+ "vpsrlw $0x6,%%ymm0,%%ymm0 \n"
+ "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n"
+ "vpunpcklbw %%ymm0,%%ymm0,%%ymm1 \n"
+ "vpermq $0xd8,%%ymm1,%%ymm1 \n"
+ "vpunpcklwd %%ymm1,%%ymm1,%%ymm0 \n"
+ "vpunpckhwd %%ymm1,%%ymm1,%%ymm1 \n"
+ "vpor %%ymm4,%%ymm0,%%ymm0 \n"
+ "vpor %%ymm4,%%ymm1,%%ymm1 \n"
+ "vmovdqu %%ymm0,(%1) \n"
+ "vmovdqu %%ymm1,0x20(%1) \n"
+ "lea 0x40(%1),%1 \n"
+ "sub $0x10,%2 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(y_buf), // %0
+ "+r"(dst_argb), // %1
+ "+rm"(width) // %2
+ :
+ : "memory", "cc", "eax", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4");
+}
+#endif // HAS_I400TOARGBROW_AVX2
+
+#ifdef HAS_MIRRORROW_SSSE3
+// Shuffle table for reversing the bytes.
+static const uvec8 kShuffleMirror = {15u, 14u, 13u, 12u, 11u, 10u, 9u, 8u,
+ 7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u};
+
+void MirrorRow_SSSE3(const uint8_t* src, uint8_t* dst, int width) {
+ intptr_t temp_width = (intptr_t)(width);
+ asm volatile(
+
+ "movdqa %3,%%xmm5 \n"
+
+ LABELALIGN
+ "1: \n"
+ "movdqu -0x10(%0,%2,1),%%xmm0 \n"
+ "pshufb %%xmm5,%%xmm0 \n"
+ "movdqu %%xmm0,(%1) \n"
+ "lea 0x10(%1),%1 \n"
+ "sub $0x10,%2 \n"
+ "jg 1b \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(temp_width) // %2
+ : "m"(kShuffleMirror) // %3
+ : "memory", "cc", "xmm0", "xmm5");
+}
+#endif // HAS_MIRRORROW_SSSE3
+
+#ifdef HAS_MIRRORROW_AVX2
+void MirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width) {
+ intptr_t temp_width = (intptr_t)(width);
+ asm volatile(
+
+ "vbroadcastf128 %3,%%ymm5 \n"
+
+ LABELALIGN
+ "1: \n"
+ "vmovdqu -0x20(%0,%2,1),%%ymm0 \n"
+ "vpshufb %%ymm5,%%ymm0,%%ymm0 \n"
+ "vpermq $0x4e,%%ymm0,%%ymm0 \n"
+ "vmovdqu %%ymm0,(%1) \n"
+ "lea 0x20(%1),%1 \n"
+ "sub $0x20,%2 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(temp_width) // %2
+ : "m"(kShuffleMirror) // %3
+ : "memory", "cc", "xmm0", "xmm5");
+}
+#endif // HAS_MIRRORROW_AVX2
+
+#ifdef HAS_MIRRORUVROW_SSSE3
+// Shuffle table for reversing the bytes of UV channels.
+static const uvec8 kShuffleMirrorUV = {14u, 12u, 10u, 8u, 6u, 4u, 2u, 0u,
+ 15u, 13u, 11u, 9u, 7u, 5u, 3u, 1u};
+void MirrorUVRow_SSSE3(const uint8_t* src,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ intptr_t temp_width = (intptr_t)(width);
+ asm volatile(
+ "movdqa %4,%%xmm1 \n"
+ "lea -0x10(%0,%3,2),%0 \n"
+ "sub %1,%2 \n"
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "lea -0x10(%0),%0 \n"
+ "pshufb %%xmm1,%%xmm0 \n"
+ "movlpd %%xmm0,(%1) \n"
+ "movhpd %%xmm0,0x00(%1,%2,1) \n"
+ "lea 0x8(%1),%1 \n"
+ "sub $8,%3 \n"
+ "jg 1b \n"
+ : "+r"(src), // %0
+ "+r"(dst_u), // %1
+ "+r"(dst_v), // %2
+ "+r"(temp_width) // %3
+ : "m"(kShuffleMirrorUV) // %4
+ : "memory", "cc", "xmm0", "xmm1");
+}
+#endif // HAS_MIRRORUVROW_SSSE3
+
+#ifdef HAS_ARGBMIRRORROW_SSE2
+
+void ARGBMirrorRow_SSE2(const uint8_t* src, uint8_t* dst, int width) {
+ intptr_t temp_width = (intptr_t)(width);
+ asm volatile(
+
+ "lea -0x10(%0,%2,4),%0 \n"
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "pshufd $0x1b,%%xmm0,%%xmm0 \n"
+ "lea -0x10(%0),%0 \n"
+ "movdqu %%xmm0,(%1) \n"
+ "lea 0x10(%1),%1 \n"
+ "sub $0x4,%2 \n"
+ "jg 1b \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(temp_width) // %2
+ :
+ : "memory", "cc", "xmm0");
+}
+#endif // HAS_ARGBMIRRORROW_SSE2
+
+#ifdef HAS_ARGBMIRRORROW_AVX2
+// Shuffle table for reversing the bytes.
+static const ulvec32 kARGBShuffleMirror_AVX2 = {7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u};
+void ARGBMirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width) {
+ intptr_t temp_width = (intptr_t)(width);
+ asm volatile(
+
+ "vmovdqu %3,%%ymm5 \n"
+
+ LABELALIGN
+ "1: \n"
+ "vpermd -0x20(%0,%2,4),%%ymm5,%%ymm0 \n"
+ "vmovdqu %%ymm0,(%1) \n"
+ "lea 0x20(%1),%1 \n"
+ "sub $0x8,%2 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(temp_width) // %2
+ : "m"(kARGBShuffleMirror_AVX2) // %3
+ : "memory", "cc", "xmm0", "xmm5");
+}
+#endif // HAS_ARGBMIRRORROW_AVX2
+
+#ifdef HAS_SPLITUVROW_AVX2
+void SplitUVRow_AVX2(const uint8_t* src_uv,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ asm volatile(
+ "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
+ "vpsrlw $0x8,%%ymm5,%%ymm5 \n"
+ "sub %1,%2 \n"
+
+ LABELALIGN
+ "1: \n"
+ "vmovdqu (%0),%%ymm0 \n"
+ "vmovdqu 0x20(%0),%%ymm1 \n"
+ "lea 0x40(%0),%0 \n"
+ "vpsrlw $0x8,%%ymm0,%%ymm2 \n"
+ "vpsrlw $0x8,%%ymm1,%%ymm3 \n"
+ "vpand %%ymm5,%%ymm0,%%ymm0 \n"
+ "vpand %%ymm5,%%ymm1,%%ymm1 \n"
+ "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
+ "vpackuswb %%ymm3,%%ymm2,%%ymm2 \n"
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n"
+ "vpermq $0xd8,%%ymm2,%%ymm2 \n"
+ "vmovdqu %%ymm0,(%1) \n"
+ "vmovdqu %%ymm2,0x00(%1,%2,1) \n"
+ "lea 0x20(%1),%1 \n"
+ "sub $0x20,%3 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_uv), // %0
+ "+r"(dst_u), // %1
+ "+r"(dst_v), // %2
+ "+r"(width) // %3
+ :
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5");
+}
+#endif // HAS_SPLITUVROW_AVX2
+
+#ifdef HAS_SPLITUVROW_SSE2
+void SplitUVRow_SSE2(const uint8_t* src_uv,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ asm volatile(
+ "pcmpeqb %%xmm5,%%xmm5 \n"
+ "psrlw $0x8,%%xmm5 \n"
+ "sub %1,%2 \n"
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "lea 0x20(%0),%0 \n"
+ "movdqa %%xmm0,%%xmm2 \n"
+ "movdqa %%xmm1,%%xmm3 \n"
+ "pand %%xmm5,%%xmm0 \n"
+ "pand %%xmm5,%%xmm1 \n"
+ "packuswb %%xmm1,%%xmm0 \n"
+ "psrlw $0x8,%%xmm2 \n"
+ "psrlw $0x8,%%xmm3 \n"
+ "packuswb %%xmm3,%%xmm2 \n"
+ "movdqu %%xmm0,(%1) \n"
+ "movdqu %%xmm2,0x00(%1,%2,1) \n"
+ "lea 0x10(%1),%1 \n"
+ "sub $0x10,%3 \n"
+ "jg 1b \n"
+ : "+r"(src_uv), // %0
+ "+r"(dst_u), // %1
+ "+r"(dst_v), // %2
+ "+r"(width) // %3
+ :
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5");
+}
+#endif // HAS_SPLITUVROW_SSE2
+
+#ifdef HAS_MERGEUVROW_AVX2
+void MergeUVRow_AVX2(const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_uv,
+ int width) {
+ asm volatile(
+
+ "sub %0,%1 \n"
+
+ LABELALIGN
+ "1: \n"
+ "vmovdqu (%0),%%ymm0 \n"
+ "vmovdqu 0x00(%0,%1,1),%%ymm1 \n"
+ "lea 0x20(%0),%0 \n"
+ "vpunpcklbw %%ymm1,%%ymm0,%%ymm2 \n"
+ "vpunpckhbw %%ymm1,%%ymm0,%%ymm0 \n"
+ "vextractf128 $0x0,%%ymm2,(%2) \n"
+ "vextractf128 $0x0,%%ymm0,0x10(%2) \n"
+ "vextractf128 $0x1,%%ymm2,0x20(%2) \n"
+ "vextractf128 $0x1,%%ymm0,0x30(%2) \n"
+ "lea 0x40(%2),%2 \n"
+ "sub $0x20,%3 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_u), // %0
+ "+r"(src_v), // %1
+ "+r"(dst_uv), // %2
+ "+r"(width) // %3
+ :
+ : "memory", "cc", "xmm0", "xmm1", "xmm2");
+}
+#endif // HAS_MERGEUVROW_AVX2
+
+#ifdef HAS_MERGEUVROW_SSE2
+void MergeUVRow_SSE2(const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_uv,
+ int width) {
+ asm volatile(
+
+ "sub %0,%1 \n"
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x00(%0,%1,1),%%xmm1 \n"
+ "lea 0x10(%0),%0 \n"
+ "movdqa %%xmm0,%%xmm2 \n"
+ "punpcklbw %%xmm1,%%xmm0 \n"
+ "punpckhbw %%xmm1,%%xmm2 \n"
+ "movdqu %%xmm0,(%2) \n"
+ "movdqu %%xmm2,0x10(%2) \n"
+ "lea 0x20(%2),%2 \n"
+ "sub $0x10,%3 \n"
+ "jg 1b \n"
+ : "+r"(src_u), // %0
+ "+r"(src_v), // %1
+ "+r"(dst_uv), // %2
+ "+r"(width) // %3
+ :
+ : "memory", "cc", "xmm0", "xmm1", "xmm2");
+}
+#endif // HAS_MERGEUVROW_SSE2
+
+// Use scale to convert lsb formats to msb, depending how many bits there are:
+// 128 = 9 bits
+// 64 = 10 bits
+// 16 = 12 bits
+// 1 = 16 bits
+#ifdef HAS_MERGEUVROW_16_AVX2
+void MergeUVRow_16_AVX2(const uint16_t* src_u,
+ const uint16_t* src_v,
+ uint16_t* dst_uv,
+ int scale,
+ int width) {
+ // clang-format off
+ asm volatile (
+ "vmovd %4,%%xmm3 \n"
+ "vpunpcklwd %%xmm3,%%xmm3,%%xmm3 \n"
+ "vbroadcastss %%xmm3,%%ymm3 \n"
+ "sub %0,%1 \n"
+
+ // 16 pixels per loop.
+ LABELALIGN
+ "1: \n"
+ "vmovdqu (%0),%%ymm0 \n"
+ "vmovdqu (%0,%1,1),%%ymm1 \n"
+ "add $0x20,%0 \n"
+
+ "vpmullw %%ymm3,%%ymm0,%%ymm0 \n"
+ "vpmullw %%ymm3,%%ymm1,%%ymm1 \n"
+ "vpunpcklwd %%ymm1,%%ymm0,%%ymm2 \n" // mutates
+ "vpunpckhwd %%ymm1,%%ymm0,%%ymm0 \n"
+ "vextractf128 $0x0,%%ymm2,(%2) \n"
+ "vextractf128 $0x0,%%ymm0,0x10(%2) \n"
+ "vextractf128 $0x1,%%ymm2,0x20(%2) \n"
+ "vextractf128 $0x1,%%ymm0,0x30(%2) \n"
+ "add $0x40,%2 \n"
+ "sub $0x10,%3 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_u), // %0
+ "+r"(src_v), // %1
+ "+r"(dst_uv), // %2
+ "+r"(width) // %3
+ : "r"(scale) // %4
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3");
+ // clang-format on
+}
+#endif // HAS_MERGEUVROW_AVX2
+
+// Use scale to convert lsb formats to msb, depending how many bits there are:
+// 128 = 9 bits
+// 64 = 10 bits
+// 16 = 12 bits
+// 1 = 16 bits
+#ifdef HAS_MULTIPLYROW_16_AVX2
+void MultiplyRow_16_AVX2(const uint16_t* src_y,
+ uint16_t* dst_y,
+ int scale,
+ int width) {
+ // clang-format off
+ asm volatile (
+ "vmovd %3,%%xmm3 \n"
+ "vpunpcklwd %%xmm3,%%xmm3,%%xmm3 \n"
+ "vbroadcastss %%xmm3,%%ymm3 \n"
+ "sub %0,%1 \n"
+
+ // 16 pixels per loop.
+ LABELALIGN
+ "1: \n"
+ "vmovdqu (%0),%%ymm0 \n"
+ "vmovdqu 0x20(%0),%%ymm1 \n"
+ "vpmullw %%ymm3,%%ymm0,%%ymm0 \n"
+ "vpmullw %%ymm3,%%ymm1,%%ymm1 \n"
+ "vmovdqu %%ymm0,(%0,%1) \n"
+ "vmovdqu %%ymm1,0x20(%0,%1) \n"
+ "add $0x40,%0 \n"
+ "sub $0x20,%2 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_y), // %0
+ "+r"(dst_y), // %1
+ "+r"(width) // %2
+ : "r"(scale) // %3
+ : "memory", "cc", "xmm0", "xmm1", "xmm3");
+ // clang-format on
+}
+#endif // HAS_MULTIPLYROW_16_AVX2
+
+// Use scale to convert lsb formats to msb, depending how many bits there are:
+// 32768 = 9 bits
+// 16384 = 10 bits
+// 4096 = 12 bits
+// 256 = 16 bits
+void Convert16To8Row_SSSE3(const uint16_t* src_y,
+ uint8_t* dst_y,
+ int scale,
+ int width) {
+ // clang-format off
+ asm volatile (
+ "movd %3,%%xmm2 \n"
+ "punpcklwd %%xmm2,%%xmm2 \n"
+ "pshufd $0x0,%%xmm2,%%xmm2 \n"
+
+ // 32 pixels per loop.
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "add $0x20,%0 \n"
+ "pmulhuw %%xmm2,%%xmm0 \n"
+ "pmulhuw %%xmm2,%%xmm1 \n"
+ "packuswb %%xmm1,%%xmm0 \n"
+ "movdqu %%xmm0,(%1) \n"
+ "add $0x10,%1 \n"
+ "sub $0x10,%2 \n"
+ "jg 1b \n"
+ : "+r"(src_y), // %0
+ "+r"(dst_y), // %1
+ "+r"(width) // %2
+ : "r"(scale) // %3
+ : "memory", "cc", "xmm0", "xmm1", "xmm2");
+ // clang-format on
+}
+
+#ifdef HAS_CONVERT16TO8ROW_AVX2
+void Convert16To8Row_AVX2(const uint16_t* src_y,
+ uint8_t* dst_y,
+ int scale,
+ int width) {
+ // clang-format off
+ asm volatile (
+ "vmovd %3,%%xmm2 \n"
+ "vpunpcklwd %%xmm2,%%xmm2,%%xmm2 \n"
+ "vbroadcastss %%xmm2,%%ymm2 \n"
+
+ // 32 pixels per loop.
+ LABELALIGN
+ "1: \n"
+ "vmovdqu (%0),%%ymm0 \n"
+ "vmovdqu 0x20(%0),%%ymm1 \n"
+ "add $0x40,%0 \n"
+ "vpmulhuw %%ymm2,%%ymm0,%%ymm0 \n"
+ "vpmulhuw %%ymm2,%%ymm1,%%ymm1 \n"
+ "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" // mutates
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n"
+ "vmovdqu %%ymm0,(%1) \n"
+ "add $0x20,%1 \n"
+ "sub $0x20,%2 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_y), // %0
+ "+r"(dst_y), // %1
+ "+r"(width) // %2
+ : "r"(scale) // %3
+ : "memory", "cc", "xmm0", "xmm1", "xmm2");
+ // clang-format on
+}
+#endif // HAS_CONVERT16TO8ROW_AVX2
+
+// Use scale to convert to lsb formats depending how many bits there are:
+// 512 = 9 bits
+// 1024 = 10 bits
+// 4096 = 12 bits
+// TODO(fbarchard): reduce to SSE2
+void Convert8To16Row_SSE2(const uint8_t* src_y,
+ uint16_t* dst_y,
+ int scale,
+ int width) {
+ // clang-format off
+ asm volatile (
+ "movd %3,%%xmm2 \n"
+ "punpcklwd %%xmm2,%%xmm2 \n"
+ "pshufd $0x0,%%xmm2,%%xmm2 \n"
+
+ // 32 pixels per loop.
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqa %%xmm0,%%xmm1 \n"
+ "punpcklbw %%xmm0,%%xmm0 \n"
+ "punpckhbw %%xmm1,%%xmm1 \n"
+ "add $0x10,%0 \n"
+ "pmulhuw %%xmm2,%%xmm0 \n"
+ "pmulhuw %%xmm2,%%xmm1 \n"
+ "movdqu %%xmm0,(%1) \n"
+ "movdqu %%xmm1,0x10(%1) \n"
+ "add $0x20,%1 \n"
+ "sub $0x10,%2 \n"
+ "jg 1b \n"
+ : "+r"(src_y), // %0
+ "+r"(dst_y), // %1
+ "+r"(width) // %2
+ : "r"(scale) // %3
+ : "memory", "cc", "xmm0", "xmm1", "xmm2");
+ // clang-format on
+}
+
+#ifdef HAS_CONVERT8TO16ROW_AVX2
+void Convert8To16Row_AVX2(const uint8_t* src_y,
+ uint16_t* dst_y,
+ int scale,
+ int width) {
+ // clang-format off
+ asm volatile (
+ "vmovd %3,%%xmm2 \n"
+ "vpunpcklwd %%xmm2,%%xmm2,%%xmm2 \n"
+ "vbroadcastss %%xmm2,%%ymm2 \n"
+
+ // 32 pixels per loop.
+ LABELALIGN
+ "1: \n"
+ "vmovdqu (%0),%%ymm0 \n"
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n"
+ "add $0x20,%0 \n"
+ "vpunpckhbw %%ymm0,%%ymm0,%%ymm1 \n"
+ "vpunpcklbw %%ymm0,%%ymm0,%%ymm0 \n"
+ "vpmulhuw %%ymm2,%%ymm0,%%ymm0 \n"
+ "vpmulhuw %%ymm2,%%ymm1,%%ymm1 \n"
+ "vmovdqu %%ymm0,(%1) \n"
+ "vmovdqu %%ymm1,0x20(%1) \n"
+ "add $0x40,%1 \n"
+ "sub $0x20,%2 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_y), // %0
+ "+r"(dst_y), // %1
+ "+r"(width) // %2
+ : "r"(scale) // %3
+ : "memory", "cc", "xmm0", "xmm1", "xmm2");
+ // clang-format on
+}
+#endif // HAS_CONVERT8TO16ROW_AVX2
+
+#ifdef HAS_SPLITRGBROW_SSSE3
+
+// Shuffle table for converting RGB to Planar.
+static const uvec8 kShuffleMaskRGBToR0 = {0u, 3u, 6u, 9u, 12u, 15u,
+ 128u, 128u, 128u, 128u, 128u, 128u,
+ 128u, 128u, 128u, 128u};
+static const uvec8 kShuffleMaskRGBToR1 = {128u, 128u, 128u, 128u, 128u, 128u,
+ 2u, 5u, 8u, 11u, 14u, 128u,
+ 128u, 128u, 128u, 128u};
+static const uvec8 kShuffleMaskRGBToR2 = {128u, 128u, 128u, 128u, 128u, 128u,
+ 128u, 128u, 128u, 128u, 128u, 1u,
+ 4u, 7u, 10u, 13u};
+
+static const uvec8 kShuffleMaskRGBToG0 = {1u, 4u, 7u, 10u, 13u, 128u,
+ 128u, 128u, 128u, 128u, 128u, 128u,
+ 128u, 128u, 128u, 128u};
+static const uvec8 kShuffleMaskRGBToG1 = {128u, 128u, 128u, 128u, 128u, 0u,
+ 3u, 6u, 9u, 12u, 15u, 128u,
+ 128u, 128u, 128u, 128u};
+static const uvec8 kShuffleMaskRGBToG2 = {128u, 128u, 128u, 128u, 128u, 128u,
+ 128u, 128u, 128u, 128u, 128u, 2u,
+ 5u, 8u, 11u, 14u};
+
+static const uvec8 kShuffleMaskRGBToB0 = {2u, 5u, 8u, 11u, 14u, 128u,
+ 128u, 128u, 128u, 128u, 128u, 128u,
+ 128u, 128u, 128u, 128u};
+static const uvec8 kShuffleMaskRGBToB1 = {128u, 128u, 128u, 128u, 128u, 1u,
+ 4u, 7u, 10u, 13u, 128u, 128u,
+ 128u, 128u, 128u, 128u};
+static const uvec8 kShuffleMaskRGBToB2 = {128u, 128u, 128u, 128u, 128u, 128u,
+ 128u, 128u, 128u, 128u, 0u, 3u,
+ 6u, 9u, 12u, 15u};
+
+void SplitRGBRow_SSSE3(const uint8_t* src_rgb,
+ uint8_t* dst_r,
+ uint8_t* dst_g,
+ uint8_t* dst_b,
+ int width) {
+ asm volatile(
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "movdqu 0x20(%0),%%xmm2 \n"
+ "pshufb %5, %%xmm0 \n"
+ "pshufb %6, %%xmm1 \n"
+ "pshufb %7, %%xmm2 \n"
+ "por %%xmm1,%%xmm0 \n"
+ "por %%xmm2,%%xmm0 \n"
+ "movdqu %%xmm0,(%1) \n"
+ "lea 0x10(%1),%1 \n"
+
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "movdqu 0x20(%0),%%xmm2 \n"
+ "pshufb %8, %%xmm0 \n"
+ "pshufb %9, %%xmm1 \n"
+ "pshufb %10, %%xmm2 \n"
+ "por %%xmm1,%%xmm0 \n"
+ "por %%xmm2,%%xmm0 \n"
+ "movdqu %%xmm0,(%2) \n"
+ "lea 0x10(%2),%2 \n"
+
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "movdqu 0x20(%0),%%xmm2 \n"
+ "pshufb %11, %%xmm0 \n"
+ "pshufb %12, %%xmm1 \n"
+ "pshufb %13, %%xmm2 \n"
+ "por %%xmm1,%%xmm0 \n"
+ "por %%xmm2,%%xmm0 \n"
+ "movdqu %%xmm0,(%3) \n"
+ "lea 0x10(%3),%3 \n"
+ "lea 0x30(%0),%0 \n"
+ "sub $0x10,%4 \n"
+ "jg 1b \n"
+ : "+r"(src_rgb), // %0
+ "+r"(dst_r), // %1
+ "+r"(dst_g), // %2
+ "+r"(dst_b), // %3
+ "+r"(width) // %4
+ : "m"(kShuffleMaskRGBToR0), // %5
+ "m"(kShuffleMaskRGBToR1), // %6
+ "m"(kShuffleMaskRGBToR2), // %7
+ "m"(kShuffleMaskRGBToG0), // %8
+ "m"(kShuffleMaskRGBToG1), // %9
+ "m"(kShuffleMaskRGBToG2), // %10
+ "m"(kShuffleMaskRGBToB0), // %11
+ "m"(kShuffleMaskRGBToB1), // %12
+ "m"(kShuffleMaskRGBToB2) // %13
+ : "memory", "cc", "xmm0", "xmm1", "xmm2");
+}
+#endif // HAS_SPLITRGBROW_SSSE3
+
+#ifdef HAS_MERGERGBROW_SSSE3
+
+// Shuffle table for converting RGB to Planar.
+static const uvec8 kShuffleMaskRToRGB0 = {0u, 128u, 128u, 1u, 128u, 128u,
+ 2u, 128u, 128u, 3u, 128u, 128u,
+ 4u, 128u, 128u, 5u};
+static const uvec8 kShuffleMaskGToRGB0 = {128u, 0u, 128u, 128u, 1u, 128u,
+ 128u, 2u, 128u, 128u, 3u, 128u,
+ 128u, 4u, 128u, 128u};
+static const uvec8 kShuffleMaskBToRGB0 = {128u, 128u, 0u, 128u, 128u, 1u,
+ 128u, 128u, 2u, 128u, 128u, 3u,
+ 128u, 128u, 4u, 128u};
+
+static const uvec8 kShuffleMaskGToRGB1 = {5u, 128u, 128u, 6u, 128u, 128u,
+ 7u, 128u, 128u, 8u, 128u, 128u,
+ 9u, 128u, 128u, 10u};
+static const uvec8 kShuffleMaskBToRGB1 = {128u, 5u, 128u, 128u, 6u, 128u,
+ 128u, 7u, 128u, 128u, 8u, 128u,
+ 128u, 9u, 128u, 128u};
+static const uvec8 kShuffleMaskRToRGB1 = {128u, 128u, 6u, 128u, 128u, 7u,
+ 128u, 128u, 8u, 128u, 128u, 9u,
+ 128u, 128u, 10u, 128u};
+
+static const uvec8 kShuffleMaskBToRGB2 = {10u, 128u, 128u, 11u, 128u, 128u,
+ 12u, 128u, 128u, 13u, 128u, 128u,
+ 14u, 128u, 128u, 15u};
+static const uvec8 kShuffleMaskRToRGB2 = {128u, 11u, 128u, 128u, 12u, 128u,
+ 128u, 13u, 128u, 128u, 14u, 128u,
+ 128u, 15u, 128u, 128u};
+static const uvec8 kShuffleMaskGToRGB2 = {128u, 128u, 11u, 128u, 128u, 12u,
+ 128u, 128u, 13u, 128u, 128u, 14u,
+ 128u, 128u, 15u, 128u};
+
+void MergeRGBRow_SSSE3(const uint8_t* src_r,
+ const uint8_t* src_g,
+ const uint8_t* src_b,
+ uint8_t* dst_rgb,
+ int width) {
+ asm volatile(
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu (%1),%%xmm1 \n"
+ "movdqu (%2),%%xmm2 \n"
+ "pshufb %5, %%xmm0 \n"
+ "pshufb %6, %%xmm1 \n"
+ "pshufb %7, %%xmm2 \n"
+ "por %%xmm1,%%xmm0 \n"
+ "por %%xmm2,%%xmm0 \n"
+ "movdqu %%xmm0,(%3) \n"
+
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu (%1),%%xmm1 \n"
+ "movdqu (%2),%%xmm2 \n"
+ "pshufb %8, %%xmm0 \n"
+ "pshufb %9, %%xmm1 \n"
+ "pshufb %10, %%xmm2 \n"
+ "por %%xmm1,%%xmm0 \n"
+ "por %%xmm2,%%xmm0 \n"
+ "movdqu %%xmm0,16(%3) \n"
+
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu (%1),%%xmm1 \n"
+ "movdqu (%2),%%xmm2 \n"
+ "pshufb %11, %%xmm0 \n"
+ "pshufb %12, %%xmm1 \n"
+ "pshufb %13, %%xmm2 \n"
+ "por %%xmm1,%%xmm0 \n"
+ "por %%xmm2,%%xmm0 \n"
+ "movdqu %%xmm0,32(%3) \n"
+
+ "lea 0x10(%0),%0 \n"
+ "lea 0x10(%1),%1 \n"
+ "lea 0x10(%2),%2 \n"
+ "lea 0x30(%3),%3 \n"
+ "sub $0x10,%4 \n"
+ "jg 1b \n"
+ : "+r"(src_r), // %0
+ "+r"(src_g), // %1
+ "+r"(src_b), // %2
+ "+r"(dst_rgb), // %3
+ "+r"(width) // %4
+ : "m"(kShuffleMaskRToRGB0), // %5
+ "m"(kShuffleMaskGToRGB0), // %6
+ "m"(kShuffleMaskBToRGB0), // %7
+ "m"(kShuffleMaskRToRGB1), // %8
+ "m"(kShuffleMaskGToRGB1), // %9
+ "m"(kShuffleMaskBToRGB1), // %10
+ "m"(kShuffleMaskRToRGB2), // %11
+ "m"(kShuffleMaskGToRGB2), // %12
+ "m"(kShuffleMaskBToRGB2) // %13
+ : "memory", "cc", "xmm0", "xmm1", "xmm2");
+}
+#endif // HAS_MERGERGBROW_SSSE3
+
+#ifdef HAS_COPYROW_SSE2
+void CopyRow_SSE2(const uint8_t* src, uint8_t* dst, int width) {
+ asm volatile(
+ "test $0xf,%0 \n"
+ "jne 2f \n"
+ "test $0xf,%1 \n"
+ "jne 2f \n"
+
+ LABELALIGN
+ "1: \n"
+ "movdqa (%0),%%xmm0 \n"
+ "movdqa 0x10(%0),%%xmm1 \n"
+ "lea 0x20(%0),%0 \n"
+ "movdqa %%xmm0,(%1) \n"
+ "movdqa %%xmm1,0x10(%1) \n"
+ "lea 0x20(%1),%1 \n"
+ "sub $0x20,%2 \n"
+ "jg 1b \n"
+ "jmp 9f \n"
+
+ LABELALIGN
+ "2: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "lea 0x20(%0),%0 \n"
+ "movdqu %%xmm0,(%1) \n"
+ "movdqu %%xmm1,0x10(%1) \n"
+ "lea 0x20(%1),%1 \n"
+ "sub $0x20,%2 \n"
+ "jg 2b \n"
+
+ LABELALIGN "9: \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width) // %2
+ :
+ : "memory", "cc", "xmm0", "xmm1");
+}
+#endif // HAS_COPYROW_SSE2
+
+#ifdef HAS_COPYROW_AVX
+void CopyRow_AVX(const uint8_t* src, uint8_t* dst, int width) {
+ asm volatile(
+
+ LABELALIGN
+ "1: \n"
+ "vmovdqu (%0),%%ymm0 \n"
+ "vmovdqu 0x20(%0),%%ymm1 \n"
+ "lea 0x40(%0),%0 \n"
+ "vmovdqu %%ymm0,(%1) \n"
+ "vmovdqu %%ymm1,0x20(%1) \n"
+ "lea 0x40(%1),%1 \n"
+ "sub $0x40,%2 \n"
+ "jg 1b \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width) // %2
+ :
+ : "memory", "cc", "xmm0", "xmm1");
+}
+#endif // HAS_COPYROW_AVX
+
+#ifdef HAS_COPYROW_ERMS
+// Multiple of 1.
+void CopyRow_ERMS(const uint8_t* src, uint8_t* dst, int width) {
+ size_t width_tmp = (size_t)(width);
+ asm volatile(
+
+ "rep movsb \n"
+ : "+S"(src), // %0
+ "+D"(dst), // %1
+ "+c"(width_tmp) // %2
+ :
+ : "memory", "cc");
+}
+#endif // HAS_COPYROW_ERMS
+
+#ifdef HAS_ARGBCOPYALPHAROW_SSE2
+// width in pixels
+void ARGBCopyAlphaRow_SSE2(const uint8_t* src, uint8_t* dst, int width) {
+ asm volatile(
+ "pcmpeqb %%xmm0,%%xmm0 \n"
+ "pslld $0x18,%%xmm0 \n"
+ "pcmpeqb %%xmm1,%%xmm1 \n"
+ "psrld $0x8,%%xmm1 \n"
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm2 \n"
+ "movdqu 0x10(%0),%%xmm3 \n"
+ "lea 0x20(%0),%0 \n"
+ "movdqu (%1),%%xmm4 \n"
+ "movdqu 0x10(%1),%%xmm5 \n"
+ "pand %%xmm0,%%xmm2 \n"
+ "pand %%xmm0,%%xmm3 \n"
+ "pand %%xmm1,%%xmm4 \n"
+ "pand %%xmm1,%%xmm5 \n"
+ "por %%xmm4,%%xmm2 \n"
+ "por %%xmm5,%%xmm3 \n"
+ "movdqu %%xmm2,(%1) \n"
+ "movdqu %%xmm3,0x10(%1) \n"
+ "lea 0x20(%1),%1 \n"
+ "sub $0x8,%2 \n"
+ "jg 1b \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width) // %2
+ :
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5");
+}
+#endif // HAS_ARGBCOPYALPHAROW_SSE2
+
+#ifdef HAS_ARGBCOPYALPHAROW_AVX2
+// width in pixels
+void ARGBCopyAlphaRow_AVX2(const uint8_t* src, uint8_t* dst, int width) {
+ asm volatile(
+ "vpcmpeqb %%ymm0,%%ymm0,%%ymm0 \n"
+ "vpsrld $0x8,%%ymm0,%%ymm0 \n"
+
+ LABELALIGN
+ "1: \n"
+ "vmovdqu (%0),%%ymm1 \n"
+ "vmovdqu 0x20(%0),%%ymm2 \n"
+ "lea 0x40(%0),%0 \n"
+ "vpblendvb %%ymm0,(%1),%%ymm1,%%ymm1 \n"
+ "vpblendvb %%ymm0,0x20(%1),%%ymm2,%%ymm2 \n"
+ "vmovdqu %%ymm1,(%1) \n"
+ "vmovdqu %%ymm2,0x20(%1) \n"
+ "lea 0x40(%1),%1 \n"
+ "sub $0x10,%2 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width) // %2
+ :
+ : "memory", "cc", "xmm0", "xmm1", "xmm2");
+}
+#endif // HAS_ARGBCOPYALPHAROW_AVX2
+
+#ifdef HAS_ARGBEXTRACTALPHAROW_SSE2
+// width in pixels
+void ARGBExtractAlphaRow_SSE2(const uint8_t* src_argb,
+ uint8_t* dst_a,
+ int width) {
+ asm volatile(
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0), %%xmm0 \n"
+ "movdqu 0x10(%0), %%xmm1 \n"
+ "lea 0x20(%0), %0 \n"
+ "psrld $0x18, %%xmm0 \n"
+ "psrld $0x18, %%xmm1 \n"
+ "packssdw %%xmm1, %%xmm0 \n"
+ "packuswb %%xmm0, %%xmm0 \n"
+ "movq %%xmm0,(%1) \n"
+ "lea 0x8(%1), %1 \n"
+ "sub $0x8, %2 \n"
+ "jg 1b \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_a), // %1
+ "+rm"(width) // %2
+ :
+ : "memory", "cc", "xmm0", "xmm1");
+}
+#endif // HAS_ARGBEXTRACTALPHAROW_SSE2
+
+#ifdef HAS_ARGBEXTRACTALPHAROW_AVX2
+static const uvec8 kShuffleAlphaShort_AVX2 = {
+ 3u, 128u, 128u, 128u, 7u, 128u, 128u, 128u,
+ 11u, 128u, 128u, 128u, 15u, 128u, 128u, 128u};
+
+void ARGBExtractAlphaRow_AVX2(const uint8_t* src_argb,
+ uint8_t* dst_a,
+ int width) {
+ asm volatile(
+ "vmovdqa %3,%%ymm4 \n"
+ "vbroadcastf128 %4,%%ymm5 \n"
+
+ LABELALIGN
+ "1: \n"
+ "vmovdqu (%0), %%ymm0 \n"
+ "vmovdqu 0x20(%0), %%ymm1 \n"
+ "vpshufb %%ymm5,%%ymm0,%%ymm0 \n" // vpsrld $0x18, %%ymm0
+ "vpshufb %%ymm5,%%ymm1,%%ymm1 \n"
+ "vmovdqu 0x40(%0), %%ymm2 \n"
+ "vmovdqu 0x60(%0), %%ymm3 \n"
+ "lea 0x80(%0), %0 \n"
+ "vpackssdw %%ymm1, %%ymm0, %%ymm0 \n" // mutates
+ "vpshufb %%ymm5,%%ymm2,%%ymm2 \n"
+ "vpshufb %%ymm5,%%ymm3,%%ymm3 \n"
+ "vpackssdw %%ymm3, %%ymm2, %%ymm2 \n" // mutates
+ "vpackuswb %%ymm2,%%ymm0,%%ymm0 \n" // mutates.
+ "vpermd %%ymm0,%%ymm4,%%ymm0 \n" // unmutate.
+ "vmovdqu %%ymm0,(%1) \n"
+ "lea 0x20(%1),%1 \n"
+ "sub $0x20, %2 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_a), // %1
+ "+rm"(width) // %2
+ : "m"(kPermdARGBToY_AVX), // %3
+ "m"(kShuffleAlphaShort_AVX2) // %4
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5");
+}
+#endif // HAS_ARGBEXTRACTALPHAROW_AVX2
+
+#ifdef HAS_ARGBCOPYYTOALPHAROW_SSE2
+// width in pixels
+void ARGBCopyYToAlphaRow_SSE2(const uint8_t* src, uint8_t* dst, int width) {
+ asm volatile(
+ "pcmpeqb %%xmm0,%%xmm0 \n"
+ "pslld $0x18,%%xmm0 \n"
+ "pcmpeqb %%xmm1,%%xmm1 \n"
+ "psrld $0x8,%%xmm1 \n"
+
+ LABELALIGN
+ "1: \n"
+ "movq (%0),%%xmm2 \n"
+ "lea 0x8(%0),%0 \n"
+ "punpcklbw %%xmm2,%%xmm2 \n"
+ "punpckhwd %%xmm2,%%xmm3 \n"
+ "punpcklwd %%xmm2,%%xmm2 \n"
+ "movdqu (%1),%%xmm4 \n"
+ "movdqu 0x10(%1),%%xmm5 \n"
+ "pand %%xmm0,%%xmm2 \n"
+ "pand %%xmm0,%%xmm3 \n"
+ "pand %%xmm1,%%xmm4 \n"
+ "pand %%xmm1,%%xmm5 \n"
+ "por %%xmm4,%%xmm2 \n"
+ "por %%xmm5,%%xmm3 \n"
+ "movdqu %%xmm2,(%1) \n"
+ "movdqu %%xmm3,0x10(%1) \n"
+ "lea 0x20(%1),%1 \n"
+ "sub $0x8,%2 \n"
+ "jg 1b \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width) // %2
+ :
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5");
+}
+#endif // HAS_ARGBCOPYYTOALPHAROW_SSE2
+
+#ifdef HAS_ARGBCOPYYTOALPHAROW_AVX2
+// width in pixels
+void ARGBCopyYToAlphaRow_AVX2(const uint8_t* src, uint8_t* dst, int width) {
+ asm volatile(
+ "vpcmpeqb %%ymm0,%%ymm0,%%ymm0 \n"
+ "vpsrld $0x8,%%ymm0,%%ymm0 \n"
+
+ LABELALIGN
+ "1: \n"
+ "vpmovzxbd (%0),%%ymm1 \n"
+ "vpmovzxbd 0x8(%0),%%ymm2 \n"
+ "lea 0x10(%0),%0 \n"
+ "vpslld $0x18,%%ymm1,%%ymm1 \n"
+ "vpslld $0x18,%%ymm2,%%ymm2 \n"
+ "vpblendvb %%ymm0,(%1),%%ymm1,%%ymm1 \n"
+ "vpblendvb %%ymm0,0x20(%1),%%ymm2,%%ymm2 \n"
+ "vmovdqu %%ymm1,(%1) \n"
+ "vmovdqu %%ymm2,0x20(%1) \n"
+ "lea 0x40(%1),%1 \n"
+ "sub $0x10,%2 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width) // %2
+ :
+ : "memory", "cc", "xmm0", "xmm1", "xmm2");
+}
+#endif // HAS_ARGBCOPYYTOALPHAROW_AVX2
+
+#ifdef HAS_SETROW_X86
+void SetRow_X86(uint8_t* dst, uint8_t v8, int width) {
+ size_t width_tmp = (size_t)(width >> 2);
+ const uint32_t v32 = v8 * 0x01010101u; // Duplicate byte to all bytes.
+ asm volatile(
+
+ "rep stosl \n"
+ : "+D"(dst), // %0
+ "+c"(width_tmp) // %1
+ : "a"(v32) // %2
+ : "memory", "cc");
+}
+
+void SetRow_ERMS(uint8_t* dst, uint8_t v8, int width) {
+ size_t width_tmp = (size_t)(width);
+ asm volatile(
+
+ "rep stosb \n"
+ : "+D"(dst), // %0
+ "+c"(width_tmp) // %1
+ : "a"(v8) // %2
+ : "memory", "cc");
+}
+
+void ARGBSetRow_X86(uint8_t* dst_argb, uint32_t v32, int width) {
+ size_t width_tmp = (size_t)(width);
+ asm volatile(
+
+ "rep stosl \n"
+ : "+D"(dst_argb), // %0
+ "+c"(width_tmp) // %1
+ : "a"(v32) // %2
+ : "memory", "cc");
+}
+#endif // HAS_SETROW_X86
+
+#ifdef HAS_YUY2TOYROW_SSE2
+void YUY2ToYRow_SSE2(const uint8_t* src_yuy2, uint8_t* dst_y, int width) {
+ asm volatile(
+ "pcmpeqb %%xmm5,%%xmm5 \n"
+ "psrlw $0x8,%%xmm5 \n"
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "lea 0x20(%0),%0 \n"
+ "pand %%xmm5,%%xmm0 \n"
+ "pand %%xmm5,%%xmm1 \n"
+ "packuswb %%xmm1,%%xmm0 \n"
+ "movdqu %%xmm0,(%1) \n"
+ "lea 0x10(%1),%1 \n"
+ "sub $0x10,%2 \n"
+ "jg 1b \n"
+ : "+r"(src_yuy2), // %0
+ "+r"(dst_y), // %1
+ "+r"(width) // %2
+ :
+ : "memory", "cc", "xmm0", "xmm1", "xmm5");
+}
+
+void YUY2ToUVRow_SSE2(const uint8_t* src_yuy2,
+ int stride_yuy2,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ asm volatile(
+ "pcmpeqb %%xmm5,%%xmm5 \n"
+ "psrlw $0x8,%%xmm5 \n"
+ "sub %1,%2 \n"
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "movdqu 0x00(%0,%4,1),%%xmm2 \n"
+ "movdqu 0x10(%0,%4,1),%%xmm3 \n"
+ "lea 0x20(%0),%0 \n"
+ "pavgb %%xmm2,%%xmm0 \n"
+ "pavgb %%xmm3,%%xmm1 \n"
+ "psrlw $0x8,%%xmm0 \n"
+ "psrlw $0x8,%%xmm1 \n"
+ "packuswb %%xmm1,%%xmm0 \n"
+ "movdqa %%xmm0,%%xmm1 \n"
+ "pand %%xmm5,%%xmm0 \n"
+ "packuswb %%xmm0,%%xmm0 \n"
+ "psrlw $0x8,%%xmm1 \n"
+ "packuswb %%xmm1,%%xmm1 \n"
+ "movq %%xmm0,(%1) \n"
+ "movq %%xmm1,0x00(%1,%2,1) \n"
+ "lea 0x8(%1),%1 \n"
+ "sub $0x10,%3 \n"
+ "jg 1b \n"
+ : "+r"(src_yuy2), // %0
+ "+r"(dst_u), // %1
+ "+r"(dst_v), // %2
+ "+r"(width) // %3
+ : "r"((intptr_t)(stride_yuy2)) // %4
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5");
+}
+
+void YUY2ToUV422Row_SSE2(const uint8_t* src_yuy2,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ asm volatile(
+ "pcmpeqb %%xmm5,%%xmm5 \n"
+ "psrlw $0x8,%%xmm5 \n"
+ "sub %1,%2 \n"
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "lea 0x20(%0),%0 \n"
+ "psrlw $0x8,%%xmm0 \n"
+ "psrlw $0x8,%%xmm1 \n"
+ "packuswb %%xmm1,%%xmm0 \n"
+ "movdqa %%xmm0,%%xmm1 \n"
+ "pand %%xmm5,%%xmm0 \n"
+ "packuswb %%xmm0,%%xmm0 \n"
+ "psrlw $0x8,%%xmm1 \n"
+ "packuswb %%xmm1,%%xmm1 \n"
+ "movq %%xmm0,(%1) \n"
+ "movq %%xmm1,0x00(%1,%2,1) \n"
+ "lea 0x8(%1),%1 \n"
+ "sub $0x10,%3 \n"
+ "jg 1b \n"
+ : "+r"(src_yuy2), // %0
+ "+r"(dst_u), // %1
+ "+r"(dst_v), // %2
+ "+r"(width) // %3
+ :
+ : "memory", "cc", "xmm0", "xmm1", "xmm5");
+}
+
+void UYVYToYRow_SSE2(const uint8_t* src_uyvy, uint8_t* dst_y, int width) {
+ asm volatile(
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "lea 0x20(%0),%0 \n"
+ "psrlw $0x8,%%xmm0 \n"
+ "psrlw $0x8,%%xmm1 \n"
+ "packuswb %%xmm1,%%xmm0 \n"
+ "movdqu %%xmm0,(%1) \n"
+ "lea 0x10(%1),%1 \n"
+ "sub $0x10,%2 \n"
+ "jg 1b \n"
+ : "+r"(src_uyvy), // %0
+ "+r"(dst_y), // %1
+ "+r"(width) // %2
+ :
+ : "memory", "cc", "xmm0", "xmm1");
+}
+
+void UYVYToUVRow_SSE2(const uint8_t* src_uyvy,
+ int stride_uyvy,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ asm volatile(
+ "pcmpeqb %%xmm5,%%xmm5 \n"
+ "psrlw $0x8,%%xmm5 \n"
+ "sub %1,%2 \n"
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "movdqu 0x00(%0,%4,1),%%xmm2 \n"
+ "movdqu 0x10(%0,%4,1),%%xmm3 \n"
+ "lea 0x20(%0),%0 \n"
+ "pavgb %%xmm2,%%xmm0 \n"
+ "pavgb %%xmm3,%%xmm1 \n"
+ "pand %%xmm5,%%xmm0 \n"
+ "pand %%xmm5,%%xmm1 \n"
+ "packuswb %%xmm1,%%xmm0 \n"
+ "movdqa %%xmm0,%%xmm1 \n"
+ "pand %%xmm5,%%xmm0 \n"
+ "packuswb %%xmm0,%%xmm0 \n"
+ "psrlw $0x8,%%xmm1 \n"
+ "packuswb %%xmm1,%%xmm1 \n"
+ "movq %%xmm0,(%1) \n"
+ "movq %%xmm1,0x00(%1,%2,1) \n"
+ "lea 0x8(%1),%1 \n"
+ "sub $0x10,%3 \n"
+ "jg 1b \n"
+ : "+r"(src_uyvy), // %0
+ "+r"(dst_u), // %1
+ "+r"(dst_v), // %2
+ "+r"(width) // %3
+ : "r"((intptr_t)(stride_uyvy)) // %4
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5");
+}
+
+void UYVYToUV422Row_SSE2(const uint8_t* src_uyvy,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ asm volatile(
+ "pcmpeqb %%xmm5,%%xmm5 \n"
+ "psrlw $0x8,%%xmm5 \n"
+ "sub %1,%2 \n"
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "lea 0x20(%0),%0 \n"
+ "pand %%xmm5,%%xmm0 \n"
+ "pand %%xmm5,%%xmm1 \n"
+ "packuswb %%xmm1,%%xmm0 \n"
+ "movdqa %%xmm0,%%xmm1 \n"
+ "pand %%xmm5,%%xmm0 \n"
+ "packuswb %%xmm0,%%xmm0 \n"
+ "psrlw $0x8,%%xmm1 \n"
+ "packuswb %%xmm1,%%xmm1 \n"
+ "movq %%xmm0,(%1) \n"
+ "movq %%xmm1,0x00(%1,%2,1) \n"
+ "lea 0x8(%1),%1 \n"
+ "sub $0x10,%3 \n"
+ "jg 1b \n"
+ : "+r"(src_uyvy), // %0
+ "+r"(dst_u), // %1
+ "+r"(dst_v), // %2
+ "+r"(width) // %3
+ :
+ : "memory", "cc", "xmm0", "xmm1", "xmm5");
+}
+#endif // HAS_YUY2TOYROW_SSE2
+
+#ifdef HAS_YUY2TOYROW_AVX2
+void YUY2ToYRow_AVX2(const uint8_t* src_yuy2, uint8_t* dst_y, int width) {
+ asm volatile(
+ "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
+ "vpsrlw $0x8,%%ymm5,%%ymm5 \n"
+
+ LABELALIGN
+ "1: \n"
+ "vmovdqu (%0),%%ymm0 \n"
+ "vmovdqu 0x20(%0),%%ymm1 \n"
+ "lea 0x40(%0),%0 \n"
+ "vpand %%ymm5,%%ymm0,%%ymm0 \n"
+ "vpand %%ymm5,%%ymm1,%%ymm1 \n"
+ "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n"
+ "vmovdqu %%ymm0,(%1) \n"
+ "lea 0x20(%1),%1 \n"
+ "sub $0x20,%2 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_yuy2), // %0
+ "+r"(dst_y), // %1
+ "+r"(width) // %2
+ :
+ : "memory", "cc", "xmm0", "xmm1", "xmm5");
+}
+
+void YUY2ToUVRow_AVX2(const uint8_t* src_yuy2,
+ int stride_yuy2,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ asm volatile(
+ "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
+ "vpsrlw $0x8,%%ymm5,%%ymm5 \n"
+ "sub %1,%2 \n"
+
+ LABELALIGN
+ "1: \n"
+ "vmovdqu (%0),%%ymm0 \n"
+ "vmovdqu 0x20(%0),%%ymm1 \n"
+ "vpavgb 0x00(%0,%4,1),%%ymm0,%%ymm0 \n"
+ "vpavgb 0x20(%0,%4,1),%%ymm1,%%ymm1 \n"
+ "lea 0x40(%0),%0 \n"
+ "vpsrlw $0x8,%%ymm0,%%ymm0 \n"
+ "vpsrlw $0x8,%%ymm1,%%ymm1 \n"
+ "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n"
+ "vpand %%ymm5,%%ymm0,%%ymm1 \n"
+ "vpsrlw $0x8,%%ymm0,%%ymm0 \n"
+ "vpackuswb %%ymm1,%%ymm1,%%ymm1 \n"
+ "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n"
+ "vpermq $0xd8,%%ymm1,%%ymm1 \n"
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n"
+ "vextractf128 $0x0,%%ymm1,(%1) \n"
+ "vextractf128 $0x0,%%ymm0,0x00(%1,%2,1) \n"
+ "lea 0x10(%1),%1 \n"
+ "sub $0x20,%3 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_yuy2), // %0
+ "+r"(dst_u), // %1
+ "+r"(dst_v), // %2
+ "+r"(width) // %3
+ : "r"((intptr_t)(stride_yuy2)) // %4
+ : "memory", "cc", "xmm0", "xmm1", "xmm5");
+}
+
+void YUY2ToUV422Row_AVX2(const uint8_t* src_yuy2,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ asm volatile(
+ "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
+ "vpsrlw $0x8,%%ymm5,%%ymm5 \n"
+ "sub %1,%2 \n"
+
+ LABELALIGN
+ "1: \n"
+ "vmovdqu (%0),%%ymm0 \n"
+ "vmovdqu 0x20(%0),%%ymm1 \n"
+ "lea 0x40(%0),%0 \n"
+ "vpsrlw $0x8,%%ymm0,%%ymm0 \n"
+ "vpsrlw $0x8,%%ymm1,%%ymm1 \n"
+ "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n"
+ "vpand %%ymm5,%%ymm0,%%ymm1 \n"
+ "vpsrlw $0x8,%%ymm0,%%ymm0 \n"
+ "vpackuswb %%ymm1,%%ymm1,%%ymm1 \n"
+ "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n"
+ "vpermq $0xd8,%%ymm1,%%ymm1 \n"
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n"
+ "vextractf128 $0x0,%%ymm1,(%1) \n"
+ "vextractf128 $0x0,%%ymm0,0x00(%1,%2,1) \n"
+ "lea 0x10(%1),%1 \n"
+ "sub $0x20,%3 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_yuy2), // %0
+ "+r"(dst_u), // %1
+ "+r"(dst_v), // %2
+ "+r"(width) // %3
+ :
+ : "memory", "cc", "xmm0", "xmm1", "xmm5");
+}
+
+void UYVYToYRow_AVX2(const uint8_t* src_uyvy, uint8_t* dst_y, int width) {
+ asm volatile(
+
+ LABELALIGN
+ "1: \n"
+ "vmovdqu (%0),%%ymm0 \n"
+ "vmovdqu 0x20(%0),%%ymm1 \n"
+ "lea 0x40(%0),%0 \n"
+ "vpsrlw $0x8,%%ymm0,%%ymm0 \n"
+ "vpsrlw $0x8,%%ymm1,%%ymm1 \n"
+ "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n"
+ "vmovdqu %%ymm0,(%1) \n"
+ "lea 0x20(%1),%1 \n"
+ "sub $0x20,%2 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_uyvy), // %0
+ "+r"(dst_y), // %1
+ "+r"(width) // %2
+ :
+ : "memory", "cc", "xmm0", "xmm1", "xmm5");
+}
+void UYVYToUVRow_AVX2(const uint8_t* src_uyvy,
+ int stride_uyvy,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ asm volatile(
+ "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
+ "vpsrlw $0x8,%%ymm5,%%ymm5 \n"
+ "sub %1,%2 \n"
+
+ LABELALIGN
+ "1: \n"
+ "vmovdqu (%0),%%ymm0 \n"
+ "vmovdqu 0x20(%0),%%ymm1 \n"
+ "vpavgb 0x00(%0,%4,1),%%ymm0,%%ymm0 \n"
+ "vpavgb 0x20(%0,%4,1),%%ymm1,%%ymm1 \n"
+ "lea 0x40(%0),%0 \n"
+ "vpand %%ymm5,%%ymm0,%%ymm0 \n"
+ "vpand %%ymm5,%%ymm1,%%ymm1 \n"
+ "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n"
+ "vpand %%ymm5,%%ymm0,%%ymm1 \n"
+ "vpsrlw $0x8,%%ymm0,%%ymm0 \n"
+ "vpackuswb %%ymm1,%%ymm1,%%ymm1 \n"
+ "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n"
+ "vpermq $0xd8,%%ymm1,%%ymm1 \n"
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n"
+ "vextractf128 $0x0,%%ymm1,(%1) \n"
+ "vextractf128 $0x0,%%ymm0,0x00(%1,%2,1) \n"
+ "lea 0x10(%1),%1 \n"
+ "sub $0x20,%3 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_uyvy), // %0
+ "+r"(dst_u), // %1
+ "+r"(dst_v), // %2
+ "+r"(width) // %3
+ : "r"((intptr_t)(stride_uyvy)) // %4
+ : "memory", "cc", "xmm0", "xmm1", "xmm5");
+}
+
+void UYVYToUV422Row_AVX2(const uint8_t* src_uyvy,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ asm volatile(
+ "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
+ "vpsrlw $0x8,%%ymm5,%%ymm5 \n"
+ "sub %1,%2 \n"
+
+ LABELALIGN
+ "1: \n"
+ "vmovdqu (%0),%%ymm0 \n"
+ "vmovdqu 0x20(%0),%%ymm1 \n"
+ "lea 0x40(%0),%0 \n"
+ "vpand %%ymm5,%%ymm0,%%ymm0 \n"
+ "vpand %%ymm5,%%ymm1,%%ymm1 \n"
+ "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n"
+ "vpand %%ymm5,%%ymm0,%%ymm1 \n"
+ "vpsrlw $0x8,%%ymm0,%%ymm0 \n"
+ "vpackuswb %%ymm1,%%ymm1,%%ymm1 \n"
+ "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n"
+ "vpermq $0xd8,%%ymm1,%%ymm1 \n"
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n"
+ "vextractf128 $0x0,%%ymm1,(%1) \n"
+ "vextractf128 $0x0,%%ymm0,0x00(%1,%2,1) \n"
+ "lea 0x10(%1),%1 \n"
+ "sub $0x20,%3 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_uyvy), // %0
+ "+r"(dst_u), // %1
+ "+r"(dst_v), // %2
+ "+r"(width) // %3
+ :
+ : "memory", "cc", "xmm0", "xmm1", "xmm5");
+}
+#endif // HAS_YUY2TOYROW_AVX2
+
+#ifdef HAS_ARGBBLENDROW_SSSE3
+// Shuffle table for isolating alpha.
+static const uvec8 kShuffleAlpha = {3u, 0x80, 3u, 0x80, 7u, 0x80, 7u, 0x80,
+ 11u, 0x80, 11u, 0x80, 15u, 0x80, 15u, 0x80};
+
+// Blend 8 pixels at a time
+void ARGBBlendRow_SSSE3(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
+ int width) {
+ asm volatile(
+ "pcmpeqb %%xmm7,%%xmm7 \n"
+ "psrlw $0xf,%%xmm7 \n"
+ "pcmpeqb %%xmm6,%%xmm6 \n"
+ "psrlw $0x8,%%xmm6 \n"
+ "pcmpeqb %%xmm5,%%xmm5 \n"
+ "psllw $0x8,%%xmm5 \n"
+ "pcmpeqb %%xmm4,%%xmm4 \n"
+ "pslld $0x18,%%xmm4 \n"
+ "sub $0x4,%3 \n"
+ "jl 49f \n"
+
+ // 4 pixel loop.
+ LABELALIGN
+ "40: \n"
+ "movdqu (%0),%%xmm3 \n"
+ "lea 0x10(%0),%0 \n"
+ "movdqa %%xmm3,%%xmm0 \n"
+ "pxor %%xmm4,%%xmm3 \n"
+ "movdqu (%1),%%xmm2 \n"
+ "pshufb %4,%%xmm3 \n"
+ "pand %%xmm6,%%xmm2 \n"
+ "paddw %%xmm7,%%xmm3 \n"
+ "pmullw %%xmm3,%%xmm2 \n"
+ "movdqu (%1),%%xmm1 \n"
+ "lea 0x10(%1),%1 \n"
+ "psrlw $0x8,%%xmm1 \n"
+ "por %%xmm4,%%xmm0 \n"
+ "pmullw %%xmm3,%%xmm1 \n"
+ "psrlw $0x8,%%xmm2 \n"
+ "paddusb %%xmm2,%%xmm0 \n"
+ "pand %%xmm5,%%xmm1 \n"
+ "paddusb %%xmm1,%%xmm0 \n"
+ "movdqu %%xmm0,(%2) \n"
+ "lea 0x10(%2),%2 \n"
+ "sub $0x4,%3 \n"
+ "jge 40b \n"
+
+ "49: \n"
+ "add $0x3,%3 \n"
+ "jl 99f \n"
+
+ // 1 pixel loop.
+ "91: \n"
+ "movd (%0),%%xmm3 \n"
+ "lea 0x4(%0),%0 \n"
+ "movdqa %%xmm3,%%xmm0 \n"
+ "pxor %%xmm4,%%xmm3 \n"
+ "movd (%1),%%xmm2 \n"
+ "pshufb %4,%%xmm3 \n"
+ "pand %%xmm6,%%xmm2 \n"
+ "paddw %%xmm7,%%xmm3 \n"
+ "pmullw %%xmm3,%%xmm2 \n"
+ "movd (%1),%%xmm1 \n"
+ "lea 0x4(%1),%1 \n"
+ "psrlw $0x8,%%xmm1 \n"
+ "por %%xmm4,%%xmm0 \n"
+ "pmullw %%xmm3,%%xmm1 \n"
+ "psrlw $0x8,%%xmm2 \n"
+ "paddusb %%xmm2,%%xmm0 \n"
+ "pand %%xmm5,%%xmm1 \n"
+ "paddusb %%xmm1,%%xmm0 \n"
+ "movd %%xmm0,(%2) \n"
+ "lea 0x4(%2),%2 \n"
+ "sub $0x1,%3 \n"
+ "jge 91b \n"
+ "99: \n"
+ : "+r"(src_argb0), // %0
+ "+r"(src_argb1), // %1
+ "+r"(dst_argb), // %2
+ "+r"(width) // %3
+ : "m"(kShuffleAlpha) // %4
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
+ "xmm7");
+}
+#endif // HAS_ARGBBLENDROW_SSSE3
+
+#ifdef HAS_BLENDPLANEROW_SSSE3
+// Blend 8 pixels at a time.
+// unsigned version of math
+// =((A2*C2)+(B2*(255-C2))+255)/256
+// signed version of math
+// =(((A2-128)*C2)+((B2-128)*(255-C2))+32768+127)/256
+void BlendPlaneRow_SSSE3(const uint8_t* src0,
+ const uint8_t* src1,
+ const uint8_t* alpha,
+ uint8_t* dst,
+ int width) {
+ asm volatile(
+ "pcmpeqb %%xmm5,%%xmm5 \n"
+ "psllw $0x8,%%xmm5 \n"
+ "mov $0x80808080,%%eax \n"
+ "movd %%eax,%%xmm6 \n"
+ "pshufd $0x0,%%xmm6,%%xmm6 \n"
+ "mov $0x807f807f,%%eax \n"
+ "movd %%eax,%%xmm7 \n"
+ "pshufd $0x0,%%xmm7,%%xmm7 \n"
+ "sub %2,%0 \n"
+ "sub %2,%1 \n"
+ "sub %2,%3 \n"
+
+ // 8 pixel loop.
+ LABELALIGN
+ "1: \n"
+ "movq (%2),%%xmm0 \n"
+ "punpcklbw %%xmm0,%%xmm0 \n"
+ "pxor %%xmm5,%%xmm0 \n"
+ "movq (%0,%2,1),%%xmm1 \n"
+ "movq (%1,%2,1),%%xmm2 \n"
+ "punpcklbw %%xmm2,%%xmm1 \n"
+ "psubb %%xmm6,%%xmm1 \n"
+ "pmaddubsw %%xmm1,%%xmm0 \n"
+ "paddw %%xmm7,%%xmm0 \n"
+ "psrlw $0x8,%%xmm0 \n"
+ "packuswb %%xmm0,%%xmm0 \n"
+ "movq %%xmm0,(%3,%2,1) \n"
+ "lea 0x8(%2),%2 \n"
+ "sub $0x8,%4 \n"
+ "jg 1b \n"
+ : "+r"(src0), // %0
+ "+r"(src1), // %1
+ "+r"(alpha), // %2
+ "+r"(dst), // %3
+ "+rm"(width) // %4
+ ::"memory",
+ "cc", "eax", "xmm0", "xmm1", "xmm2", "xmm5", "xmm6", "xmm7");
+}
+#endif // HAS_BLENDPLANEROW_SSSE3
+
+#ifdef HAS_BLENDPLANEROW_AVX2
+// Blend 32 pixels at a time.
+// unsigned version of math
+// =((A2*C2)+(B2*(255-C2))+255)/256
+// signed version of math
+// =(((A2-128)*C2)+((B2-128)*(255-C2))+32768+127)/256
+void BlendPlaneRow_AVX2(const uint8_t* src0,
+ const uint8_t* src1,
+ const uint8_t* alpha,
+ uint8_t* dst,
+ int width) {
+ asm volatile(
+ "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
+ "vpsllw $0x8,%%ymm5,%%ymm5 \n"
+ "mov $0x80808080,%%eax \n"
+ "vmovd %%eax,%%xmm6 \n"
+ "vbroadcastss %%xmm6,%%ymm6 \n"
+ "mov $0x807f807f,%%eax \n"
+ "vmovd %%eax,%%xmm7 \n"
+ "vbroadcastss %%xmm7,%%ymm7 \n"
+ "sub %2,%0 \n"
+ "sub %2,%1 \n"
+ "sub %2,%3 \n"
+
+ // 32 pixel loop.
+ LABELALIGN
+ "1: \n"
+ "vmovdqu (%2),%%ymm0 \n"
+ "vpunpckhbw %%ymm0,%%ymm0,%%ymm3 \n"
+ "vpunpcklbw %%ymm0,%%ymm0,%%ymm0 \n"
+ "vpxor %%ymm5,%%ymm3,%%ymm3 \n"
+ "vpxor %%ymm5,%%ymm0,%%ymm0 \n"
+ "vmovdqu (%0,%2,1),%%ymm1 \n"
+ "vmovdqu (%1,%2,1),%%ymm2 \n"
+ "vpunpckhbw %%ymm2,%%ymm1,%%ymm4 \n"
+ "vpunpcklbw %%ymm2,%%ymm1,%%ymm1 \n"
+ "vpsubb %%ymm6,%%ymm4,%%ymm4 \n"
+ "vpsubb %%ymm6,%%ymm1,%%ymm1 \n"
+ "vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n"
+ "vpmaddubsw %%ymm1,%%ymm0,%%ymm0 \n"
+ "vpaddw %%ymm7,%%ymm3,%%ymm3 \n"
+ "vpaddw %%ymm7,%%ymm0,%%ymm0 \n"
+ "vpsrlw $0x8,%%ymm3,%%ymm3 \n"
+ "vpsrlw $0x8,%%ymm0,%%ymm0 \n"
+ "vpackuswb %%ymm3,%%ymm0,%%ymm0 \n"
+ "vmovdqu %%ymm0,(%3,%2,1) \n"
+ "lea 0x20(%2),%2 \n"
+ "sub $0x20,%4 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src0), // %0
+ "+r"(src1), // %1
+ "+r"(alpha), // %2
+ "+r"(dst), // %3
+ "+rm"(width) // %4
+ ::"memory",
+ "cc", "eax", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
+ "xmm7");
+}
+#endif // HAS_BLENDPLANEROW_AVX2
+
+#ifdef HAS_ARGBATTENUATEROW_SSSE3
+// Shuffle table duplicating alpha
+static const uvec8 kShuffleAlpha0 = {3u, 3u, 3u, 3u, 3u, 3u, 128u, 128u,
+ 7u, 7u, 7u, 7u, 7u, 7u, 128u, 128u};
+static const uvec8 kShuffleAlpha1 = {11u, 11u, 11u, 11u, 11u, 11u, 128u, 128u,
+ 15u, 15u, 15u, 15u, 15u, 15u, 128u, 128u};
+// Attenuate 4 pixels at a time.
+void ARGBAttenuateRow_SSSE3(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ int width) {
+ asm volatile(
+ "pcmpeqb %%xmm3,%%xmm3 \n"
+ "pslld $0x18,%%xmm3 \n"
+ "movdqa %3,%%xmm4 \n"
+ "movdqa %4,%%xmm5 \n"
+
+ // 4 pixel loop.
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "pshufb %%xmm4,%%xmm0 \n"
+ "movdqu (%0),%%xmm1 \n"
+ "punpcklbw %%xmm1,%%xmm1 \n"
+ "pmulhuw %%xmm1,%%xmm0 \n"
+ "movdqu (%0),%%xmm1 \n"
+ "pshufb %%xmm5,%%xmm1 \n"
+ "movdqu (%0),%%xmm2 \n"
+ "punpckhbw %%xmm2,%%xmm2 \n"
+ "pmulhuw %%xmm2,%%xmm1 \n"
+ "movdqu (%0),%%xmm2 \n"
+ "lea 0x10(%0),%0 \n"
+ "pand %%xmm3,%%xmm2 \n"
+ "psrlw $0x8,%%xmm0 \n"
+ "psrlw $0x8,%%xmm1 \n"
+ "packuswb %%xmm1,%%xmm0 \n"
+ "por %%xmm2,%%xmm0 \n"
+ "movdqu %%xmm0,(%1) \n"
+ "lea 0x10(%1),%1 \n"
+ "sub $0x4,%2 \n"
+ "jg 1b \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_argb), // %1
+ "+r"(width) // %2
+ : "m"(kShuffleAlpha0), // %3
+ "m"(kShuffleAlpha1) // %4
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5");
+}
+#endif // HAS_ARGBATTENUATEROW_SSSE3
+
+#ifdef HAS_ARGBATTENUATEROW_AVX2
+// Shuffle table duplicating alpha.
+static const uvec8 kShuffleAlpha_AVX2 = {6u, 7u, 6u, 7u, 6u, 7u,
+ 128u, 128u, 14u, 15u, 14u, 15u,
+ 14u, 15u, 128u, 128u};
+// Attenuate 8 pixels at a time.
+void ARGBAttenuateRow_AVX2(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ int width) {
+ asm volatile(
+ "vbroadcastf128 %3,%%ymm4 \n"
+ "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
+ "vpslld $0x18,%%ymm5,%%ymm5 \n"
+ "sub %0,%1 \n"
+
+ // 8 pixel loop.
+ LABELALIGN
+ "1: \n"
+ "vmovdqu (%0),%%ymm6 \n"
+ "vpunpcklbw %%ymm6,%%ymm6,%%ymm0 \n"
+ "vpunpckhbw %%ymm6,%%ymm6,%%ymm1 \n"
+ "vpshufb %%ymm4,%%ymm0,%%ymm2 \n"
+ "vpshufb %%ymm4,%%ymm1,%%ymm3 \n"
+ "vpmulhuw %%ymm2,%%ymm0,%%ymm0 \n"
+ "vpmulhuw %%ymm3,%%ymm1,%%ymm1 \n"
+ "vpand %%ymm5,%%ymm6,%%ymm6 \n"
+ "vpsrlw $0x8,%%ymm0,%%ymm0 \n"
+ "vpsrlw $0x8,%%ymm1,%%ymm1 \n"
+ "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
+ "vpor %%ymm6,%%ymm0,%%ymm0 \n"
+ "vmovdqu %%ymm0,0x00(%0,%1,1) \n"
+ "lea 0x20(%0),%0 \n"
+ "sub $0x8,%2 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_argb), // %1
+ "+r"(width) // %2
+ : "m"(kShuffleAlpha_AVX2) // %3
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6");
+}
+#endif // HAS_ARGBATTENUATEROW_AVX2
+
+#ifdef HAS_ARGBUNATTENUATEROW_SSE2
+// Unattenuate 4 pixels at a time.
+void ARGBUnattenuateRow_SSE2(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ int width) {
+ uintptr_t alpha;
+ asm volatile(
+ // 4 pixel loop.
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movzb 0x03(%0),%3 \n"
+ "punpcklbw %%xmm0,%%xmm0 \n"
+ "movd 0x00(%4,%3,4),%%xmm2 \n"
+ "movzb 0x07(%0),%3 \n"
+ "movd 0x00(%4,%3,4),%%xmm3 \n"
+ "pshuflw $0x40,%%xmm2,%%xmm2 \n"
+ "pshuflw $0x40,%%xmm3,%%xmm3 \n"
+ "movlhps %%xmm3,%%xmm2 \n"
+ "pmulhuw %%xmm2,%%xmm0 \n"
+ "movdqu (%0),%%xmm1 \n"
+ "movzb 0x0b(%0),%3 \n"
+ "punpckhbw %%xmm1,%%xmm1 \n"
+ "movd 0x00(%4,%3,4),%%xmm2 \n"
+ "movzb 0x0f(%0),%3 \n"
+ "movd 0x00(%4,%3,4),%%xmm3 \n"
+ "pshuflw $0x40,%%xmm2,%%xmm2 \n"
+ "pshuflw $0x40,%%xmm3,%%xmm3 \n"
+ "movlhps %%xmm3,%%xmm2 \n"
+ "pmulhuw %%xmm2,%%xmm1 \n"
+ "lea 0x10(%0),%0 \n"
+ "packuswb %%xmm1,%%xmm0 \n"
+ "movdqu %%xmm0,(%1) \n"
+ "lea 0x10(%1),%1 \n"
+ "sub $0x4,%2 \n"
+ "jg 1b \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_argb), // %1
+ "+r"(width), // %2
+ "=&r"(alpha) // %3
+ : "r"(fixed_invtbl8) // %4
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5");
+}
+#endif // HAS_ARGBUNATTENUATEROW_SSE2
+
+#ifdef HAS_ARGBUNATTENUATEROW_AVX2
+// Shuffle table duplicating alpha.
+static const uvec8 kUnattenShuffleAlpha_AVX2 = {
+ 0u, 1u, 0u, 1u, 0u, 1u, 6u, 7u, 8u, 9u, 8u, 9u, 8u, 9u, 14u, 15u};
+// Unattenuate 8 pixels at a time.
+void ARGBUnattenuateRow_AVX2(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ int width) {
+ uintptr_t alpha;
+ asm volatile(
+ "sub %0,%1 \n"
+ "vbroadcastf128 %5,%%ymm5 \n"
+
+ // 8 pixel loop.
+ LABELALIGN
+ "1: \n"
+ // replace VPGATHER
+ "movzb 0x03(%0),%3 \n"
+ "vmovd 0x00(%4,%3,4),%%xmm0 \n"
+ "movzb 0x07(%0),%3 \n"
+ "vmovd 0x00(%4,%3,4),%%xmm1 \n"
+ "movzb 0x0b(%0),%3 \n"
+ "vpunpckldq %%xmm1,%%xmm0,%%xmm6 \n"
+ "vmovd 0x00(%4,%3,4),%%xmm2 \n"
+ "movzb 0x0f(%0),%3 \n"
+ "vmovd 0x00(%4,%3,4),%%xmm3 \n"
+ "movzb 0x13(%0),%3 \n"
+ "vpunpckldq %%xmm3,%%xmm2,%%xmm7 \n"
+ "vmovd 0x00(%4,%3,4),%%xmm0 \n"
+ "movzb 0x17(%0),%3 \n"
+ "vmovd 0x00(%4,%3,4),%%xmm1 \n"
+ "movzb 0x1b(%0),%3 \n"
+ "vpunpckldq %%xmm1,%%xmm0,%%xmm0 \n"
+ "vmovd 0x00(%4,%3,4),%%xmm2 \n"
+ "movzb 0x1f(%0),%3 \n"
+ "vmovd 0x00(%4,%3,4),%%xmm3 \n"
+ "vpunpckldq %%xmm3,%%xmm2,%%xmm2 \n"
+ "vpunpcklqdq %%xmm7,%%xmm6,%%xmm3 \n"
+ "vpunpcklqdq %%xmm2,%%xmm0,%%xmm0 \n"
+ "vinserti128 $0x1,%%xmm0,%%ymm3,%%ymm3 \n"
+ // end of VPGATHER
+
+ "vmovdqu (%0),%%ymm6 \n"
+ "vpunpcklbw %%ymm6,%%ymm6,%%ymm0 \n"
+ "vpunpckhbw %%ymm6,%%ymm6,%%ymm1 \n"
+ "vpunpcklwd %%ymm3,%%ymm3,%%ymm2 \n"
+ "vpunpckhwd %%ymm3,%%ymm3,%%ymm3 \n"
+ "vpshufb %%ymm5,%%ymm2,%%ymm2 \n"
+ "vpshufb %%ymm5,%%ymm3,%%ymm3 \n"
+ "vpmulhuw %%ymm2,%%ymm0,%%ymm0 \n"
+ "vpmulhuw %%ymm3,%%ymm1,%%ymm1 \n"
+ "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
+ "vmovdqu %%ymm0,0x00(%0,%1,1) \n"
+ "lea 0x20(%0),%0 \n"
+ "sub $0x8,%2 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_argb), // %1
+ "+r"(width), // %2
+ "=&r"(alpha) // %3
+ : "r"(fixed_invtbl8), // %4
+ "m"(kUnattenShuffleAlpha_AVX2) // %5
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
+ "xmm7");
+}
+#endif // HAS_ARGBUNATTENUATEROW_AVX2
+
+#ifdef HAS_ARGBGRAYROW_SSSE3
+// Convert 8 ARGB pixels (64 bytes) to 8 Gray ARGB pixels
+void ARGBGrayRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_argb, int width) {
+ asm volatile(
+ "movdqa %3,%%xmm4 \n"
+ "movdqa %4,%%xmm5 \n"
+
+ // 8 pixel loop.
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "pmaddubsw %%xmm4,%%xmm0 \n"
+ "pmaddubsw %%xmm4,%%xmm1 \n"
+ "phaddw %%xmm1,%%xmm0 \n"
+ "paddw %%xmm5,%%xmm0 \n"
+ "psrlw $0x7,%%xmm0 \n"
+ "packuswb %%xmm0,%%xmm0 \n"
+ "movdqu (%0),%%xmm2 \n"
+ "movdqu 0x10(%0),%%xmm3 \n"
+ "lea 0x20(%0),%0 \n"
+ "psrld $0x18,%%xmm2 \n"
+ "psrld $0x18,%%xmm3 \n"
+ "packuswb %%xmm3,%%xmm2 \n"
+ "packuswb %%xmm2,%%xmm2 \n"
+ "movdqa %%xmm0,%%xmm3 \n"
+ "punpcklbw %%xmm0,%%xmm0 \n"
+ "punpcklbw %%xmm2,%%xmm3 \n"
+ "movdqa %%xmm0,%%xmm1 \n"
+ "punpcklwd %%xmm3,%%xmm0 \n"
+ "punpckhwd %%xmm3,%%xmm1 \n"
+ "movdqu %%xmm0,(%1) \n"
+ "movdqu %%xmm1,0x10(%1) \n"
+ "lea 0x20(%1),%1 \n"
+ "sub $0x8,%2 \n"
+ "jg 1b \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_argb), // %1
+ "+r"(width) // %2
+ : "m"(kARGBToYJ), // %3
+ "m"(kAddYJ64) // %4
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5");
+}
+#endif // HAS_ARGBGRAYROW_SSSE3
+
+#ifdef HAS_ARGBSEPIAROW_SSSE3
+// b = (r * 35 + g * 68 + b * 17) >> 7
+// g = (r * 45 + g * 88 + b * 22) >> 7
+// r = (r * 50 + g * 98 + b * 24) >> 7
+// Constant for ARGB color to sepia tone
+static const vec8 kARGBToSepiaB = {17, 68, 35, 0, 17, 68, 35, 0,
+ 17, 68, 35, 0, 17, 68, 35, 0};
+
+static const vec8 kARGBToSepiaG = {22, 88, 45, 0, 22, 88, 45, 0,
+ 22, 88, 45, 0, 22, 88, 45, 0};
+
+static const vec8 kARGBToSepiaR = {24, 98, 50, 0, 24, 98, 50, 0,
+ 24, 98, 50, 0, 24, 98, 50, 0};
+
+// Convert 8 ARGB pixels (32 bytes) to 8 Sepia ARGB pixels.
+void ARGBSepiaRow_SSSE3(uint8_t* dst_argb, int width) {
+ asm volatile(
+ "movdqa %2,%%xmm2 \n"
+ "movdqa %3,%%xmm3 \n"
+ "movdqa %4,%%xmm4 \n"
+
+ // 8 pixel loop.
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm6 \n"
+ "pmaddubsw %%xmm2,%%xmm0 \n"
+ "pmaddubsw %%xmm2,%%xmm6 \n"
+ "phaddw %%xmm6,%%xmm0 \n"
+ "psrlw $0x7,%%xmm0 \n"
+ "packuswb %%xmm0,%%xmm0 \n"
+ "movdqu (%0),%%xmm5 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "pmaddubsw %%xmm3,%%xmm5 \n"
+ "pmaddubsw %%xmm3,%%xmm1 \n"
+ "phaddw %%xmm1,%%xmm5 \n"
+ "psrlw $0x7,%%xmm5 \n"
+ "packuswb %%xmm5,%%xmm5 \n"
+ "punpcklbw %%xmm5,%%xmm0 \n"
+ "movdqu (%0),%%xmm5 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "pmaddubsw %%xmm4,%%xmm5 \n"
+ "pmaddubsw %%xmm4,%%xmm1 \n"
+ "phaddw %%xmm1,%%xmm5 \n"
+ "psrlw $0x7,%%xmm5 \n"
+ "packuswb %%xmm5,%%xmm5 \n"
+ "movdqu (%0),%%xmm6 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "psrld $0x18,%%xmm6 \n"
+ "psrld $0x18,%%xmm1 \n"
+ "packuswb %%xmm1,%%xmm6 \n"
+ "packuswb %%xmm6,%%xmm6 \n"
+ "punpcklbw %%xmm6,%%xmm5 \n"
+ "movdqa %%xmm0,%%xmm1 \n"
+ "punpcklwd %%xmm5,%%xmm0 \n"
+ "punpckhwd %%xmm5,%%xmm1 \n"
+ "movdqu %%xmm0,(%0) \n"
+ "movdqu %%xmm1,0x10(%0) \n"
+ "lea 0x20(%0),%0 \n"
+ "sub $0x8,%1 \n"
+ "jg 1b \n"
+ : "+r"(dst_argb), // %0
+ "+r"(width) // %1
+ : "m"(kARGBToSepiaB), // %2
+ "m"(kARGBToSepiaG), // %3
+ "m"(kARGBToSepiaR) // %4
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6");
+}
+#endif // HAS_ARGBSEPIAROW_SSSE3
+
+#ifdef HAS_ARGBCOLORMATRIXROW_SSSE3
+// Tranform 8 ARGB pixels (32 bytes) with color matrix.
+// Same as Sepia except matrix is provided.
+void ARGBColorMatrixRow_SSSE3(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ const int8_t* matrix_argb,
+ int width) {
+ asm volatile(
+ "movdqu (%3),%%xmm5 \n"
+ "pshufd $0x00,%%xmm5,%%xmm2 \n"
+ "pshufd $0x55,%%xmm5,%%xmm3 \n"
+ "pshufd $0xaa,%%xmm5,%%xmm4 \n"
+ "pshufd $0xff,%%xmm5,%%xmm5 \n"
+
+ // 8 pixel loop.
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm7 \n"
+ "pmaddubsw %%xmm2,%%xmm0 \n"
+ "pmaddubsw %%xmm2,%%xmm7 \n"
+ "movdqu (%0),%%xmm6 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "pmaddubsw %%xmm3,%%xmm6 \n"
+ "pmaddubsw %%xmm3,%%xmm1 \n"
+ "phaddsw %%xmm7,%%xmm0 \n"
+ "phaddsw %%xmm1,%%xmm6 \n"
+ "psraw $0x6,%%xmm0 \n"
+ "psraw $0x6,%%xmm6 \n"
+ "packuswb %%xmm0,%%xmm0 \n"
+ "packuswb %%xmm6,%%xmm6 \n"
+ "punpcklbw %%xmm6,%%xmm0 \n"
+ "movdqu (%0),%%xmm1 \n"
+ "movdqu 0x10(%0),%%xmm7 \n"
+ "pmaddubsw %%xmm4,%%xmm1 \n"
+ "pmaddubsw %%xmm4,%%xmm7 \n"
+ "phaddsw %%xmm7,%%xmm1 \n"
+ "movdqu (%0),%%xmm6 \n"
+ "movdqu 0x10(%0),%%xmm7 \n"
+ "pmaddubsw %%xmm5,%%xmm6 \n"
+ "pmaddubsw %%xmm5,%%xmm7 \n"
+ "phaddsw %%xmm7,%%xmm6 \n"
+ "psraw $0x6,%%xmm1 \n"
+ "psraw $0x6,%%xmm6 \n"
+ "packuswb %%xmm1,%%xmm1 \n"
+ "packuswb %%xmm6,%%xmm6 \n"
+ "punpcklbw %%xmm6,%%xmm1 \n"
+ "movdqa %%xmm0,%%xmm6 \n"
+ "punpcklwd %%xmm1,%%xmm0 \n"
+ "punpckhwd %%xmm1,%%xmm6 \n"
+ "movdqu %%xmm0,(%1) \n"
+ "movdqu %%xmm6,0x10(%1) \n"
+ "lea 0x20(%0),%0 \n"
+ "lea 0x20(%1),%1 \n"
+ "sub $0x8,%2 \n"
+ "jg 1b \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_argb), // %1
+ "+r"(width) // %2
+ : "r"(matrix_argb) // %3
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
+ "xmm7");
+}
+#endif // HAS_ARGBCOLORMATRIXROW_SSSE3
+
+#ifdef HAS_ARGBQUANTIZEROW_SSE2
+// Quantize 4 ARGB pixels (16 bytes).
+void ARGBQuantizeRow_SSE2(uint8_t* dst_argb,
+ int scale,
+ int interval_size,
+ int interval_offset,
+ int width) {
+ asm volatile(
+ "movd %2,%%xmm2 \n"
+ "movd %3,%%xmm3 \n"
+ "movd %4,%%xmm4 \n"
+ "pshuflw $0x40,%%xmm2,%%xmm2 \n"
+ "pshufd $0x44,%%xmm2,%%xmm2 \n"
+ "pshuflw $0x40,%%xmm3,%%xmm3 \n"
+ "pshufd $0x44,%%xmm3,%%xmm3 \n"
+ "pshuflw $0x40,%%xmm4,%%xmm4 \n"
+ "pshufd $0x44,%%xmm4,%%xmm4 \n"
+ "pxor %%xmm5,%%xmm5 \n"
+ "pcmpeqb %%xmm6,%%xmm6 \n"
+ "pslld $0x18,%%xmm6 \n"
+
+ // 4 pixel loop.
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "punpcklbw %%xmm5,%%xmm0 \n"
+ "pmulhuw %%xmm2,%%xmm0 \n"
+ "movdqu (%0),%%xmm1 \n"
+ "punpckhbw %%xmm5,%%xmm1 \n"
+ "pmulhuw %%xmm2,%%xmm1 \n"
+ "pmullw %%xmm3,%%xmm0 \n"
+ "movdqu (%0),%%xmm7 \n"
+ "pmullw %%xmm3,%%xmm1 \n"
+ "pand %%xmm6,%%xmm7 \n"
+ "paddw %%xmm4,%%xmm0 \n"
+ "paddw %%xmm4,%%xmm1 \n"
+ "packuswb %%xmm1,%%xmm0 \n"
+ "por %%xmm7,%%xmm0 \n"
+ "movdqu %%xmm0,(%0) \n"
+ "lea 0x10(%0),%0 \n"
+ "sub $0x4,%1 \n"
+ "jg 1b \n"
+ : "+r"(dst_argb), // %0
+ "+r"(width) // %1
+ : "r"(scale), // %2
+ "r"(interval_size), // %3
+ "r"(interval_offset) // %4
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
+ "xmm7");
+}
+#endif // HAS_ARGBQUANTIZEROW_SSE2
+
+#ifdef HAS_ARGBSHADEROW_SSE2
+// Shade 4 pixels at a time by specified value.
+void ARGBShadeRow_SSE2(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ int width,
+ uint32_t value) {
+ asm volatile(
+ "movd %3,%%xmm2 \n"
+ "punpcklbw %%xmm2,%%xmm2 \n"
+ "punpcklqdq %%xmm2,%%xmm2 \n"
+
+ // 4 pixel loop.
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "lea 0x10(%0),%0 \n"
+ "movdqa %%xmm0,%%xmm1 \n"
+ "punpcklbw %%xmm0,%%xmm0 \n"
+ "punpckhbw %%xmm1,%%xmm1 \n"
+ "pmulhuw %%xmm2,%%xmm0 \n"
+ "pmulhuw %%xmm2,%%xmm1 \n"
+ "psrlw $0x8,%%xmm0 \n"
+ "psrlw $0x8,%%xmm1 \n"
+ "packuswb %%xmm1,%%xmm0 \n"
+ "movdqu %%xmm0,(%1) \n"
+ "lea 0x10(%1),%1 \n"
+ "sub $0x4,%2 \n"
+ "jg 1b \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_argb), // %1
+ "+r"(width) // %2
+ : "r"(value) // %3
+ : "memory", "cc", "xmm0", "xmm1", "xmm2");
+}
+#endif // HAS_ARGBSHADEROW_SSE2
+
+#ifdef HAS_ARGBMULTIPLYROW_SSE2
+// Multiply 2 rows of ARGB pixels together, 4 pixels at a time.
+void ARGBMultiplyRow_SSE2(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
+ int width) {
+ asm volatile(
+
+ "pxor %%xmm5,%%xmm5 \n"
+
+ // 4 pixel loop.
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "lea 0x10(%0),%0 \n"
+ "movdqu (%1),%%xmm2 \n"
+ "lea 0x10(%1),%1 \n"
+ "movdqu %%xmm0,%%xmm1 \n"
+ "movdqu %%xmm2,%%xmm3 \n"
+ "punpcklbw %%xmm0,%%xmm0 \n"
+ "punpckhbw %%xmm1,%%xmm1 \n"
+ "punpcklbw %%xmm5,%%xmm2 \n"
+ "punpckhbw %%xmm5,%%xmm3 \n"
+ "pmulhuw %%xmm2,%%xmm0 \n"
+ "pmulhuw %%xmm3,%%xmm1 \n"
+ "packuswb %%xmm1,%%xmm0 \n"
+ "movdqu %%xmm0,(%2) \n"
+ "lea 0x10(%2),%2 \n"
+ "sub $0x4,%3 \n"
+ "jg 1b \n"
+ : "+r"(src_argb0), // %0
+ "+r"(src_argb1), // %1
+ "+r"(dst_argb), // %2
+ "+r"(width) // %3
+ :
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5");
+}
+#endif // HAS_ARGBMULTIPLYROW_SSE2
+
+#ifdef HAS_ARGBMULTIPLYROW_AVX2
+// Multiply 2 rows of ARGB pixels together, 8 pixels at a time.
+void ARGBMultiplyRow_AVX2(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
+ int width) {
+ asm volatile(
+
+ "vpxor %%ymm5,%%ymm5,%%ymm5 \n"
+
+ // 4 pixel loop.
+ LABELALIGN
+ "1: \n"
+ "vmovdqu (%0),%%ymm1 \n"
+ "lea 0x20(%0),%0 \n"
+ "vmovdqu (%1),%%ymm3 \n"
+ "lea 0x20(%1),%1 \n"
+ "vpunpcklbw %%ymm1,%%ymm1,%%ymm0 \n"
+ "vpunpckhbw %%ymm1,%%ymm1,%%ymm1 \n"
+ "vpunpcklbw %%ymm5,%%ymm3,%%ymm2 \n"
+ "vpunpckhbw %%ymm5,%%ymm3,%%ymm3 \n"
+ "vpmulhuw %%ymm2,%%ymm0,%%ymm0 \n"
+ "vpmulhuw %%ymm3,%%ymm1,%%ymm1 \n"
+ "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
+ "vmovdqu %%ymm0,(%2) \n"
+ "lea 0x20(%2),%2 \n"
+ "sub $0x8,%3 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_argb0), // %0
+ "+r"(src_argb1), // %1
+ "+r"(dst_argb), // %2
+ "+r"(width) // %3
+ :
+ : "memory", "cc"
+#if defined(__AVX2__)
+ ,
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
+#endif
+ );
+}
+#endif // HAS_ARGBMULTIPLYROW_AVX2
+
+#ifdef HAS_ARGBADDROW_SSE2
+// Add 2 rows of ARGB pixels together, 4 pixels at a time.
+void ARGBAddRow_SSE2(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
+ int width) {
+ asm volatile(
+ // 4 pixel loop.
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "lea 0x10(%0),%0 \n"
+ "movdqu (%1),%%xmm1 \n"
+ "lea 0x10(%1),%1 \n"
+ "paddusb %%xmm1,%%xmm0 \n"
+ "movdqu %%xmm0,(%2) \n"
+ "lea 0x10(%2),%2 \n"
+ "sub $0x4,%3 \n"
+ "jg 1b \n"
+ : "+r"(src_argb0), // %0
+ "+r"(src_argb1), // %1
+ "+r"(dst_argb), // %2
+ "+r"(width) // %3
+ :
+ : "memory", "cc", "xmm0", "xmm1");
+}
+#endif // HAS_ARGBADDROW_SSE2
+
+#ifdef HAS_ARGBADDROW_AVX2
+// Add 2 rows of ARGB pixels together, 4 pixels at a time.
+void ARGBAddRow_AVX2(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
+ int width) {
+ asm volatile(
+ // 4 pixel loop.
+ LABELALIGN
+ "1: \n"
+ "vmovdqu (%0),%%ymm0 \n"
+ "lea 0x20(%0),%0 \n"
+ "vpaddusb (%1),%%ymm0,%%ymm0 \n"
+ "lea 0x20(%1),%1 \n"
+ "vmovdqu %%ymm0,(%2) \n"
+ "lea 0x20(%2),%2 \n"
+ "sub $0x8,%3 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_argb0), // %0
+ "+r"(src_argb1), // %1
+ "+r"(dst_argb), // %2
+ "+r"(width) // %3
+ :
+ : "memory", "cc", "xmm0");
+}
+#endif // HAS_ARGBADDROW_AVX2
+
+#ifdef HAS_ARGBSUBTRACTROW_SSE2
+// Subtract 2 rows of ARGB pixels, 4 pixels at a time.
+void ARGBSubtractRow_SSE2(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
+ int width) {
+ asm volatile(
+ // 4 pixel loop.
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "lea 0x10(%0),%0 \n"
+ "movdqu (%1),%%xmm1 \n"
+ "lea 0x10(%1),%1 \n"
+ "psubusb %%xmm1,%%xmm0 \n"
+ "movdqu %%xmm0,(%2) \n"
+ "lea 0x10(%2),%2 \n"
+ "sub $0x4,%3 \n"
+ "jg 1b \n"
+ : "+r"(src_argb0), // %0
+ "+r"(src_argb1), // %1
+ "+r"(dst_argb), // %2
+ "+r"(width) // %3
+ :
+ : "memory", "cc", "xmm0", "xmm1");
+}
+#endif // HAS_ARGBSUBTRACTROW_SSE2
+
+#ifdef HAS_ARGBSUBTRACTROW_AVX2
+// Subtract 2 rows of ARGB pixels, 8 pixels at a time.
+void ARGBSubtractRow_AVX2(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
+ int width) {
+ asm volatile(
+ // 4 pixel loop.
+ LABELALIGN
+ "1: \n"
+ "vmovdqu (%0),%%ymm0 \n"
+ "lea 0x20(%0),%0 \n"
+ "vpsubusb (%1),%%ymm0,%%ymm0 \n"
+ "lea 0x20(%1),%1 \n"
+ "vmovdqu %%ymm0,(%2) \n"
+ "lea 0x20(%2),%2 \n"
+ "sub $0x8,%3 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_argb0), // %0
+ "+r"(src_argb1), // %1
+ "+r"(dst_argb), // %2
+ "+r"(width) // %3
+ :
+ : "memory", "cc", "xmm0");
+}
+#endif // HAS_ARGBSUBTRACTROW_AVX2
+
+#ifdef HAS_SOBELXROW_SSE2
+// SobelX as a matrix is
+// -1 0 1
+// -2 0 2
+// -1 0 1
+void SobelXRow_SSE2(const uint8_t* src_y0,
+ const uint8_t* src_y1,
+ const uint8_t* src_y2,
+ uint8_t* dst_sobelx,
+ int width) {
+ asm volatile(
+ "sub %0,%1 \n"
+ "sub %0,%2 \n"
+ "sub %0,%3 \n"
+ "pxor %%xmm5,%%xmm5 \n"
+
+ // 8 pixel loop.
+ LABELALIGN
+ "1: \n"
+ "movq (%0),%%xmm0 \n"
+ "movq 0x2(%0),%%xmm1 \n"
+ "punpcklbw %%xmm5,%%xmm0 \n"
+ "punpcklbw %%xmm5,%%xmm1 \n"
+ "psubw %%xmm1,%%xmm0 \n"
+ "movq 0x00(%0,%1,1),%%xmm1 \n"
+ "movq 0x02(%0,%1,1),%%xmm2 \n"
+ "punpcklbw %%xmm5,%%xmm1 \n"
+ "punpcklbw %%xmm5,%%xmm2 \n"
+ "psubw %%xmm2,%%xmm1 \n"
+ "movq 0x00(%0,%2,1),%%xmm2 \n"
+ "movq 0x02(%0,%2,1),%%xmm3 \n"
+ "punpcklbw %%xmm5,%%xmm2 \n"
+ "punpcklbw %%xmm5,%%xmm3 \n"
+ "psubw %%xmm3,%%xmm2 \n"
+ "paddw %%xmm2,%%xmm0 \n"
+ "paddw %%xmm1,%%xmm0 \n"
+ "paddw %%xmm1,%%xmm0 \n"
+ "pxor %%xmm1,%%xmm1 \n"
+ "psubw %%xmm0,%%xmm1 \n"
+ "pmaxsw %%xmm1,%%xmm0 \n"
+ "packuswb %%xmm0,%%xmm0 \n"
+ "movq %%xmm0,0x00(%0,%3,1) \n"
+ "lea 0x8(%0),%0 \n"
+ "sub $0x8,%4 \n"
+ "jg 1b \n"
+ : "+r"(src_y0), // %0
+ "+r"(src_y1), // %1
+ "+r"(src_y2), // %2
+ "+r"(dst_sobelx), // %3
+ "+r"(width) // %4
+ :
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5");
+}
+#endif // HAS_SOBELXROW_SSE2
+
+#ifdef HAS_SOBELYROW_SSE2
+// SobelY as a matrix is
+// -1 -2 -1
+// 0 0 0
+// 1 2 1
+void SobelYRow_SSE2(const uint8_t* src_y0,
+ const uint8_t* src_y1,
+ uint8_t* dst_sobely,
+ int width) {
+ asm volatile(
+ "sub %0,%1 \n"
+ "sub %0,%2 \n"
+ "pxor %%xmm5,%%xmm5 \n"
+
+ // 8 pixel loop.
+ LABELALIGN
+ "1: \n"
+ "movq (%0),%%xmm0 \n"
+ "movq 0x00(%0,%1,1),%%xmm1 \n"
+ "punpcklbw %%xmm5,%%xmm0 \n"
+ "punpcklbw %%xmm5,%%xmm1 \n"
+ "psubw %%xmm1,%%xmm0 \n"
+ "movq 0x1(%0),%%xmm1 \n"
+ "movq 0x01(%0,%1,1),%%xmm2 \n"
+ "punpcklbw %%xmm5,%%xmm1 \n"
+ "punpcklbw %%xmm5,%%xmm2 \n"
+ "psubw %%xmm2,%%xmm1 \n"
+ "movq 0x2(%0),%%xmm2 \n"
+ "movq 0x02(%0,%1,1),%%xmm3 \n"
+ "punpcklbw %%xmm5,%%xmm2 \n"
+ "punpcklbw %%xmm5,%%xmm3 \n"
+ "psubw %%xmm3,%%xmm2 \n"
+ "paddw %%xmm2,%%xmm0 \n"
+ "paddw %%xmm1,%%xmm0 \n"
+ "paddw %%xmm1,%%xmm0 \n"
+ "pxor %%xmm1,%%xmm1 \n"
+ "psubw %%xmm0,%%xmm1 \n"
+ "pmaxsw %%xmm1,%%xmm0 \n"
+ "packuswb %%xmm0,%%xmm0 \n"
+ "movq %%xmm0,0x00(%0,%2,1) \n"
+ "lea 0x8(%0),%0 \n"
+ "sub $0x8,%3 \n"
+ "jg 1b \n"
+ : "+r"(src_y0), // %0
+ "+r"(src_y1), // %1
+ "+r"(dst_sobely), // %2
+ "+r"(width) // %3
+ :
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5");
+}
+#endif // HAS_SOBELYROW_SSE2
+
+#ifdef HAS_SOBELROW_SSE2
+// Adds Sobel X and Sobel Y and stores Sobel into ARGB.
+// A = 255
+// R = Sobel
+// G = Sobel
+// B = Sobel
+void SobelRow_SSE2(const uint8_t* src_sobelx,
+ const uint8_t* src_sobely,
+ uint8_t* dst_argb,
+ int width) {
+ asm volatile(
+ "sub %0,%1 \n"
+ "pcmpeqb %%xmm5,%%xmm5 \n"
+ "pslld $0x18,%%xmm5 \n"
+
+ // 8 pixel loop.
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x00(%0,%1,1),%%xmm1 \n"
+ "lea 0x10(%0),%0 \n"
+ "paddusb %%xmm1,%%xmm0 \n"
+ "movdqa %%xmm0,%%xmm2 \n"
+ "punpcklbw %%xmm0,%%xmm2 \n"
+ "punpckhbw %%xmm0,%%xmm0 \n"
+ "movdqa %%xmm2,%%xmm1 \n"
+ "punpcklwd %%xmm2,%%xmm1 \n"
+ "punpckhwd %%xmm2,%%xmm2 \n"
+ "por %%xmm5,%%xmm1 \n"
+ "por %%xmm5,%%xmm2 \n"
+ "movdqa %%xmm0,%%xmm3 \n"
+ "punpcklwd %%xmm0,%%xmm3 \n"
+ "punpckhwd %%xmm0,%%xmm0 \n"
+ "por %%xmm5,%%xmm3 \n"
+ "por %%xmm5,%%xmm0 \n"
+ "movdqu %%xmm1,(%2) \n"
+ "movdqu %%xmm2,0x10(%2) \n"
+ "movdqu %%xmm3,0x20(%2) \n"
+ "movdqu %%xmm0,0x30(%2) \n"
+ "lea 0x40(%2),%2 \n"
+ "sub $0x10,%3 \n"
+ "jg 1b \n"
+ : "+r"(src_sobelx), // %0
+ "+r"(src_sobely), // %1
+ "+r"(dst_argb), // %2
+ "+r"(width) // %3
+ :
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5");
+}
+#endif // HAS_SOBELROW_SSE2
+
+#ifdef HAS_SOBELTOPLANEROW_SSE2
+// Adds Sobel X and Sobel Y and stores Sobel into a plane.
+void SobelToPlaneRow_SSE2(const uint8_t* src_sobelx,
+ const uint8_t* src_sobely,
+ uint8_t* dst_y,
+ int width) {
+ asm volatile(
+ "sub %0,%1 \n"
+ "pcmpeqb %%xmm5,%%xmm5 \n"
+ "pslld $0x18,%%xmm5 \n"
+
+ // 8 pixel loop.
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x00(%0,%1,1),%%xmm1 \n"
+ "lea 0x10(%0),%0 \n"
+ "paddusb %%xmm1,%%xmm0 \n"
+ "movdqu %%xmm0,(%2) \n"
+ "lea 0x10(%2),%2 \n"
+ "sub $0x10,%3 \n"
+ "jg 1b \n"
+ : "+r"(src_sobelx), // %0
+ "+r"(src_sobely), // %1
+ "+r"(dst_y), // %2
+ "+r"(width) // %3
+ :
+ : "memory", "cc", "xmm0", "xmm1");
+}
+#endif // HAS_SOBELTOPLANEROW_SSE2
+
+#ifdef HAS_SOBELXYROW_SSE2
+// Mixes Sobel X, Sobel Y and Sobel into ARGB.
+// A = 255
+// R = Sobel X
+// G = Sobel
+// B = Sobel Y
+void SobelXYRow_SSE2(const uint8_t* src_sobelx,
+ const uint8_t* src_sobely,
+ uint8_t* dst_argb,
+ int width) {
+ asm volatile(
+ "sub %0,%1 \n"
+ "pcmpeqb %%xmm5,%%xmm5 \n"
+
+ // 8 pixel loop.
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x00(%0,%1,1),%%xmm1 \n"
+ "lea 0x10(%0),%0 \n"
+ "movdqa %%xmm0,%%xmm2 \n"
+ "paddusb %%xmm1,%%xmm2 \n"
+ "movdqa %%xmm0,%%xmm3 \n"
+ "punpcklbw %%xmm5,%%xmm3 \n"
+ "punpckhbw %%xmm5,%%xmm0 \n"
+ "movdqa %%xmm1,%%xmm4 \n"
+ "punpcklbw %%xmm2,%%xmm4 \n"
+ "punpckhbw %%xmm2,%%xmm1 \n"
+ "movdqa %%xmm4,%%xmm6 \n"
+ "punpcklwd %%xmm3,%%xmm6 \n"
+ "punpckhwd %%xmm3,%%xmm4 \n"
+ "movdqa %%xmm1,%%xmm7 \n"
+ "punpcklwd %%xmm0,%%xmm7 \n"
+ "punpckhwd %%xmm0,%%xmm1 \n"
+ "movdqu %%xmm6,(%2) \n"
+ "movdqu %%xmm4,0x10(%2) \n"
+ "movdqu %%xmm7,0x20(%2) \n"
+ "movdqu %%xmm1,0x30(%2) \n"
+ "lea 0x40(%2),%2 \n"
+ "sub $0x10,%3 \n"
+ "jg 1b \n"
+ : "+r"(src_sobelx), // %0
+ "+r"(src_sobely), // %1
+ "+r"(dst_argb), // %2
+ "+r"(width) // %3
+ :
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
+ "xmm7");
+}
+#endif // HAS_SOBELXYROW_SSE2
+
+#ifdef HAS_COMPUTECUMULATIVESUMROW_SSE2
+// Creates a table of cumulative sums where each value is a sum of all values
+// above and to the left of the value, inclusive of the value.
+void ComputeCumulativeSumRow_SSE2(const uint8_t* row,
+ int32_t* cumsum,
+ const int32_t* previous_cumsum,
+ int width) {
+ asm volatile(
+ "pxor %%xmm0,%%xmm0 \n"
+ "pxor %%xmm1,%%xmm1 \n"
+ "sub $0x4,%3 \n"
+ "jl 49f \n"
+ "test $0xf,%1 \n"
+ "jne 49f \n"
+
+ // 4 pixel loop.
+ LABELALIGN
+ "40: \n"
+ "movdqu (%0),%%xmm2 \n"
+ "lea 0x10(%0),%0 \n"
+ "movdqa %%xmm2,%%xmm4 \n"
+ "punpcklbw %%xmm1,%%xmm2 \n"
+ "movdqa %%xmm2,%%xmm3 \n"
+ "punpcklwd %%xmm1,%%xmm2 \n"
+ "punpckhwd %%xmm1,%%xmm3 \n"
+ "punpckhbw %%xmm1,%%xmm4 \n"
+ "movdqa %%xmm4,%%xmm5 \n"
+ "punpcklwd %%xmm1,%%xmm4 \n"
+ "punpckhwd %%xmm1,%%xmm5 \n"
+ "paddd %%xmm2,%%xmm0 \n"
+ "movdqu (%2),%%xmm2 \n"
+ "paddd %%xmm0,%%xmm2 \n"
+ "paddd %%xmm3,%%xmm0 \n"
+ "movdqu 0x10(%2),%%xmm3 \n"
+ "paddd %%xmm0,%%xmm3 \n"
+ "paddd %%xmm4,%%xmm0 \n"
+ "movdqu 0x20(%2),%%xmm4 \n"
+ "paddd %%xmm0,%%xmm4 \n"
+ "paddd %%xmm5,%%xmm0 \n"
+ "movdqu 0x30(%2),%%xmm5 \n"
+ "lea 0x40(%2),%2 \n"
+ "paddd %%xmm0,%%xmm5 \n"
+ "movdqu %%xmm2,(%1) \n"
+ "movdqu %%xmm3,0x10(%1) \n"
+ "movdqu %%xmm4,0x20(%1) \n"
+ "movdqu %%xmm5,0x30(%1) \n"
+ "lea 0x40(%1),%1 \n"
+ "sub $0x4,%3 \n"
+ "jge 40b \n"
+
+ "49: \n"
+ "add $0x3,%3 \n"
+ "jl 19f \n"
+
+ // 1 pixel loop.
+ LABELALIGN
+ "10: \n"
+ "movd (%0),%%xmm2 \n"
+ "lea 0x4(%0),%0 \n"
+ "punpcklbw %%xmm1,%%xmm2 \n"
+ "punpcklwd %%xmm1,%%xmm2 \n"
+ "paddd %%xmm2,%%xmm0 \n"
+ "movdqu (%2),%%xmm2 \n"
+ "lea 0x10(%2),%2 \n"
+ "paddd %%xmm0,%%xmm2 \n"
+ "movdqu %%xmm2,(%1) \n"
+ "lea 0x10(%1),%1 \n"
+ "sub $0x1,%3 \n"
+ "jge 10b \n"
+
+ "19: \n"
+ : "+r"(row), // %0
+ "+r"(cumsum), // %1
+ "+r"(previous_cumsum), // %2
+ "+r"(width) // %3
+ :
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5");
+}
+#endif // HAS_COMPUTECUMULATIVESUMROW_SSE2
+
+#ifdef HAS_CUMULATIVESUMTOAVERAGEROW_SSE2
+void CumulativeSumToAverageRow_SSE2(const int32_t* topleft,
+ const int32_t* botleft,
+ int width,
+ int area,
+ uint8_t* dst,
+ int count) {
+ asm volatile(
+ "movd %5,%%xmm5 \n"
+ "cvtdq2ps %%xmm5,%%xmm5 \n"
+ "rcpss %%xmm5,%%xmm4 \n"
+ "pshufd $0x0,%%xmm4,%%xmm4 \n"
+ "sub $0x4,%3 \n"
+ "jl 49f \n"
+ "cmpl $0x80,%5 \n"
+ "ja 40f \n"
+
+ "pshufd $0x0,%%xmm5,%%xmm5 \n"
+ "pcmpeqb %%xmm6,%%xmm6 \n"
+ "psrld $0x10,%%xmm6 \n"
+ "cvtdq2ps %%xmm6,%%xmm6 \n"
+ "addps %%xmm6,%%xmm5 \n"
+ "mulps %%xmm4,%%xmm5 \n"
+ "cvtps2dq %%xmm5,%%xmm5 \n"
+ "packssdw %%xmm5,%%xmm5 \n"
+
+ // 4 pixel small loop.
+ LABELALIGN
+ "4: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "movdqu 0x20(%0),%%xmm2 \n"
+ "movdqu 0x30(%0),%%xmm3 \n"
+ "psubd 0x00(%0,%4,4),%%xmm0 \n"
+ "psubd 0x10(%0,%4,4),%%xmm1 \n"
+ "psubd 0x20(%0,%4,4),%%xmm2 \n"
+ "psubd 0x30(%0,%4,4),%%xmm3 \n"
+ "lea 0x40(%0),%0 \n"
+ "psubd (%1),%%xmm0 \n"
+ "psubd 0x10(%1),%%xmm1 \n"
+ "psubd 0x20(%1),%%xmm2 \n"
+ "psubd 0x30(%1),%%xmm3 \n"
+ "paddd 0x00(%1,%4,4),%%xmm0 \n"
+ "paddd 0x10(%1,%4,4),%%xmm1 \n"
+ "paddd 0x20(%1,%4,4),%%xmm2 \n"
+ "paddd 0x30(%1,%4,4),%%xmm3 \n"
+ "lea 0x40(%1),%1 \n"
+ "packssdw %%xmm1,%%xmm0 \n"
+ "packssdw %%xmm3,%%xmm2 \n"
+ "pmulhuw %%xmm5,%%xmm0 \n"
+ "pmulhuw %%xmm5,%%xmm2 \n"
+ "packuswb %%xmm2,%%xmm0 \n"
+ "movdqu %%xmm0,(%2) \n"
+ "lea 0x10(%2),%2 \n"
+ "sub $0x4,%3 \n"
+ "jge 4b \n"
+ "jmp 49f \n"
+
+ // 4 pixel loop
+ LABELALIGN
+ "40: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "movdqu 0x20(%0),%%xmm2 \n"
+ "movdqu 0x30(%0),%%xmm3 \n"
+ "psubd 0x00(%0,%4,4),%%xmm0 \n"
+ "psubd 0x10(%0,%4,4),%%xmm1 \n"
+ "psubd 0x20(%0,%4,4),%%xmm2 \n"
+ "psubd 0x30(%0,%4,4),%%xmm3 \n"
+ "lea 0x40(%0),%0 \n"
+ "psubd (%1),%%xmm0 \n"
+ "psubd 0x10(%1),%%xmm1 \n"
+ "psubd 0x20(%1),%%xmm2 \n"
+ "psubd 0x30(%1),%%xmm3 \n"
+ "paddd 0x00(%1,%4,4),%%xmm0 \n"
+ "paddd 0x10(%1,%4,4),%%xmm1 \n"
+ "paddd 0x20(%1,%4,4),%%xmm2 \n"
+ "paddd 0x30(%1,%4,4),%%xmm3 \n"
+ "lea 0x40(%1),%1 \n"
+ "cvtdq2ps %%xmm0,%%xmm0 \n"
+ "cvtdq2ps %%xmm1,%%xmm1 \n"
+ "mulps %%xmm4,%%xmm0 \n"
+ "mulps %%xmm4,%%xmm1 \n"
+ "cvtdq2ps %%xmm2,%%xmm2 \n"
+ "cvtdq2ps %%xmm3,%%xmm3 \n"
+ "mulps %%xmm4,%%xmm2 \n"
+ "mulps %%xmm4,%%xmm3 \n"
+ "cvtps2dq %%xmm0,%%xmm0 \n"
+ "cvtps2dq %%xmm1,%%xmm1 \n"
+ "cvtps2dq %%xmm2,%%xmm2 \n"
+ "cvtps2dq %%xmm3,%%xmm3 \n"
+ "packssdw %%xmm1,%%xmm0 \n"
+ "packssdw %%xmm3,%%xmm2 \n"
+ "packuswb %%xmm2,%%xmm0 \n"
+ "movdqu %%xmm0,(%2) \n"
+ "lea 0x10(%2),%2 \n"
+ "sub $0x4,%3 \n"
+ "jge 40b \n"
+
+ "49: \n"
+ "add $0x3,%3 \n"
+ "jl 19f \n"
+
+ // 1 pixel loop
+ LABELALIGN
+ "10: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "psubd 0x00(%0,%4,4),%%xmm0 \n"
+ "lea 0x10(%0),%0 \n"
+ "psubd (%1),%%xmm0 \n"
+ "paddd 0x00(%1,%4,4),%%xmm0 \n"
+ "lea 0x10(%1),%1 \n"
+ "cvtdq2ps %%xmm0,%%xmm0 \n"
+ "mulps %%xmm4,%%xmm0 \n"
+ "cvtps2dq %%xmm0,%%xmm0 \n"
+ "packssdw %%xmm0,%%xmm0 \n"
+ "packuswb %%xmm0,%%xmm0 \n"
+ "movd %%xmm0,(%2) \n"
+ "lea 0x4(%2),%2 \n"
+ "sub $0x1,%3 \n"
+ "jge 10b \n"
+ "19: \n"
+ : "+r"(topleft), // %0
+ "+r"(botleft), // %1
+ "+r"(dst), // %2
+ "+rm"(count) // %3
+ : "r"((intptr_t)(width)), // %4
+ "rm"(area) // %5
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6");
+}
+#endif // HAS_CUMULATIVESUMTOAVERAGEROW_SSE2
+
+#ifdef HAS_ARGBAFFINEROW_SSE2
+// Copy ARGB pixels from source image with slope to a row of destination.
+LIBYUV_API
+void ARGBAffineRow_SSE2(const uint8_t* src_argb,
+ int src_argb_stride,
+ uint8_t* dst_argb,
+ const float* src_dudv,
+ int width) {
+ intptr_t src_argb_stride_temp = src_argb_stride;
+ intptr_t temp;
+ asm volatile(
+ "movq (%3),%%xmm2 \n"
+ "movq 0x08(%3),%%xmm7 \n"
+ "shl $0x10,%1 \n"
+ "add $0x4,%1 \n"
+ "movd %1,%%xmm5 \n"
+ "sub $0x4,%4 \n"
+ "jl 49f \n"
+
+ "pshufd $0x44,%%xmm7,%%xmm7 \n"
+ "pshufd $0x0,%%xmm5,%%xmm5 \n"
+ "movdqa %%xmm2,%%xmm0 \n"
+ "addps %%xmm7,%%xmm0 \n"
+ "movlhps %%xmm0,%%xmm2 \n"
+ "movdqa %%xmm7,%%xmm4 \n"
+ "addps %%xmm4,%%xmm4 \n"
+ "movdqa %%xmm2,%%xmm3 \n"
+ "addps %%xmm4,%%xmm3 \n"
+ "addps %%xmm4,%%xmm4 \n"
+
+ // 4 pixel loop
+ LABELALIGN
+ "40: \n"
+ "cvttps2dq %%xmm2,%%xmm0 \n" // x,y float->int first 2
+ "cvttps2dq %%xmm3,%%xmm1 \n" // x,y float->int next 2
+ "packssdw %%xmm1,%%xmm0 \n" // x, y as 8 shorts
+ "pmaddwd %%xmm5,%%xmm0 \n" // off = x*4 + y*stride
+ "movd %%xmm0,%k1 \n"
+ "pshufd $0x39,%%xmm0,%%xmm0 \n"
+ "movd %%xmm0,%k5 \n"
+ "pshufd $0x39,%%xmm0,%%xmm0 \n"
+ "movd 0x00(%0,%1,1),%%xmm1 \n"
+ "movd 0x00(%0,%5,1),%%xmm6 \n"
+ "punpckldq %%xmm6,%%xmm1 \n"
+ "addps %%xmm4,%%xmm2 \n"
+ "movq %%xmm1,(%2) \n"
+ "movd %%xmm0,%k1 \n"
+ "pshufd $0x39,%%xmm0,%%xmm0 \n"
+ "movd %%xmm0,%k5 \n"
+ "movd 0x00(%0,%1,1),%%xmm0 \n"
+ "movd 0x00(%0,%5,1),%%xmm6 \n"
+ "punpckldq %%xmm6,%%xmm0 \n"
+ "addps %%xmm4,%%xmm3 \n"
+ "movq %%xmm0,0x08(%2) \n"
+ "lea 0x10(%2),%2 \n"
+ "sub $0x4,%4 \n"
+ "jge 40b \n"
+
+ "49: \n"
+ "add $0x3,%4 \n"
+ "jl 19f \n"
+
+ // 1 pixel loop
+ LABELALIGN
+ "10: \n"
+ "cvttps2dq %%xmm2,%%xmm0 \n"
+ "packssdw %%xmm0,%%xmm0 \n"
+ "pmaddwd %%xmm5,%%xmm0 \n"
+ "addps %%xmm7,%%xmm2 \n"
+ "movd %%xmm0,%k1 \n"
+ "movd 0x00(%0,%1,1),%%xmm0 \n"
+ "movd %%xmm0,(%2) \n"
+ "lea 0x04(%2),%2 \n"
+ "sub $0x1,%4 \n"
+ "jge 10b \n"
+ "19: \n"
+ : "+r"(src_argb), // %0
+ "+r"(src_argb_stride_temp), // %1
+ "+r"(dst_argb), // %2
+ "+r"(src_dudv), // %3
+ "+rm"(width), // %4
+ "=&r"(temp) // %5
+ :
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
+ "xmm7");
+}
+#endif // HAS_ARGBAFFINEROW_SSE2
+
+#ifdef HAS_INTERPOLATEROW_SSSE3
+// Bilinear filter 16x2 -> 16x1
+void InterpolateRow_SSSE3(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ int dst_width,
+ int source_y_fraction) {
+ asm volatile(
+ "sub %1,%0 \n"
+ "cmp $0x0,%3 \n"
+ "je 100f \n"
+ "cmp $0x80,%3 \n"
+ "je 50f \n"
+
+ "movd %3,%%xmm0 \n"
+ "neg %3 \n"
+ "add $0x100,%3 \n"
+ "movd %3,%%xmm5 \n"
+ "punpcklbw %%xmm0,%%xmm5 \n"
+ "punpcklwd %%xmm5,%%xmm5 \n"
+ "pshufd $0x0,%%xmm5,%%xmm5 \n"
+ "mov $0x80808080,%%eax \n"
+ "movd %%eax,%%xmm4 \n"
+ "pshufd $0x0,%%xmm4,%%xmm4 \n"
+
+ // General purpose row blend.
+ LABELALIGN
+ "1: \n"
+ "movdqu (%1),%%xmm0 \n"
+ "movdqu 0x00(%1,%4,1),%%xmm2 \n"
+ "movdqa %%xmm0,%%xmm1 \n"
+ "punpcklbw %%xmm2,%%xmm0 \n"
+ "punpckhbw %%xmm2,%%xmm1 \n"
+ "psubb %%xmm4,%%xmm0 \n"
+ "psubb %%xmm4,%%xmm1 \n"
+ "movdqa %%xmm5,%%xmm2 \n"
+ "movdqa %%xmm5,%%xmm3 \n"
+ "pmaddubsw %%xmm0,%%xmm2 \n"
+ "pmaddubsw %%xmm1,%%xmm3 \n"
+ "paddw %%xmm4,%%xmm2 \n"
+ "paddw %%xmm4,%%xmm3 \n"
+ "psrlw $0x8,%%xmm2 \n"
+ "psrlw $0x8,%%xmm3 \n"
+ "packuswb %%xmm3,%%xmm2 \n"
+ "movdqu %%xmm2,0x00(%1,%0,1) \n"
+ "lea 0x10(%1),%1 \n"
+ "sub $0x10,%2 \n"
+ "jg 1b \n"
+ "jmp 99f \n"
+
+ // Blend 50 / 50.
+ LABELALIGN
+ "50: \n"
+ "movdqu (%1),%%xmm0 \n"
+ "movdqu 0x00(%1,%4,1),%%xmm1 \n"
+ "pavgb %%xmm1,%%xmm0 \n"
+ "movdqu %%xmm0,0x00(%1,%0,1) \n"
+ "lea 0x10(%1),%1 \n"
+ "sub $0x10,%2 \n"
+ "jg 50b \n"
+ "jmp 99f \n"
+
+ // Blend 100 / 0 - Copy row unchanged.
+ LABELALIGN
+ "100: \n"
+ "movdqu (%1),%%xmm0 \n"
+ "movdqu %%xmm0,0x00(%1,%0,1) \n"
+ "lea 0x10(%1),%1 \n"
+ "sub $0x10,%2 \n"
+ "jg 100b \n"
+
+ "99: \n"
+ : "+r"(dst_ptr), // %0
+ "+r"(src_ptr), // %1
+ "+rm"(dst_width), // %2
+ "+r"(source_y_fraction) // %3
+ : "r"((intptr_t)(src_stride)) // %4
+ : "memory", "cc", "eax", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5");
+}
+#endif // HAS_INTERPOLATEROW_SSSE3
+
+#ifdef HAS_INTERPOLATEROW_AVX2
+// Bilinear filter 32x2 -> 32x1
+void InterpolateRow_AVX2(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ int dst_width,
+ int source_y_fraction) {
+ asm volatile(
+ "cmp $0x0,%3 \n"
+ "je 100f \n"
+ "sub %1,%0 \n"
+ "cmp $0x80,%3 \n"
+ "je 50f \n"
+
+ "vmovd %3,%%xmm0 \n"
+ "neg %3 \n"
+ "add $0x100,%3 \n"
+ "vmovd %3,%%xmm5 \n"
+ "vpunpcklbw %%xmm0,%%xmm5,%%xmm5 \n"
+ "vpunpcklwd %%xmm5,%%xmm5,%%xmm5 \n"
+ "vbroadcastss %%xmm5,%%ymm5 \n"
+ "mov $0x80808080,%%eax \n"
+ "vmovd %%eax,%%xmm4 \n"
+ "vbroadcastss %%xmm4,%%ymm4 \n"
+
+ // General purpose row blend.
+ LABELALIGN
+ "1: \n"
+ "vmovdqu (%1),%%ymm0 \n"
+ "vmovdqu 0x00(%1,%4,1),%%ymm2 \n"
+ "vpunpckhbw %%ymm2,%%ymm0,%%ymm1 \n"
+ "vpunpcklbw %%ymm2,%%ymm0,%%ymm0 \n"
+ "vpsubb %%ymm4,%%ymm1,%%ymm1 \n"
+ "vpsubb %%ymm4,%%ymm0,%%ymm0 \n"
+ "vpmaddubsw %%ymm1,%%ymm5,%%ymm1 \n"
+ "vpmaddubsw %%ymm0,%%ymm5,%%ymm0 \n"
+ "vpaddw %%ymm4,%%ymm1,%%ymm1 \n"
+ "vpaddw %%ymm4,%%ymm0,%%ymm0 \n"
+ "vpsrlw $0x8,%%ymm1,%%ymm1 \n"
+ "vpsrlw $0x8,%%ymm0,%%ymm0 \n"
+ "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
+ "vmovdqu %%ymm0,0x00(%1,%0,1) \n"
+ "lea 0x20(%1),%1 \n"
+ "sub $0x20,%2 \n"
+ "jg 1b \n"
+ "jmp 99f \n"
+
+ // Blend 50 / 50.
+ LABELALIGN
+ "50: \n"
+ "vmovdqu (%1),%%ymm0 \n"
+ "vpavgb 0x00(%1,%4,1),%%ymm0,%%ymm0 \n"
+ "vmovdqu %%ymm0,0x00(%1,%0,1) \n"
+ "lea 0x20(%1),%1 \n"
+ "sub $0x20,%2 \n"
+ "jg 50b \n"
+ "jmp 99f \n"
+
+ // Blend 100 / 0 - Copy row unchanged.
+ LABELALIGN
+ "100: \n"
+ "rep movsb \n"
+ "jmp 999f \n"
+
+ "99: \n"
+ "vzeroupper \n"
+ "999: \n"
+ : "+D"(dst_ptr), // %0
+ "+S"(src_ptr), // %1
+ "+cm"(dst_width), // %2
+ "+r"(source_y_fraction) // %3
+ : "r"((intptr_t)(src_stride)) // %4
+ : "memory", "cc", "eax", "xmm0", "xmm1", "xmm2", "xmm4", "xmm5");
+}
+#endif // HAS_INTERPOLATEROW_AVX2
+
+#ifdef HAS_ARGBSHUFFLEROW_SSSE3
+// For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA.
+void ARGBShuffleRow_SSSE3(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ const uint8_t* shuffler,
+ int width) {
+ asm volatile(
+
+ "movdqu (%3),%%xmm5 \n"
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "lea 0x20(%0),%0 \n"
+ "pshufb %%xmm5,%%xmm0 \n"
+ "pshufb %%xmm5,%%xmm1 \n"
+ "movdqu %%xmm0,(%1) \n"
+ "movdqu %%xmm1,0x10(%1) \n"
+ "lea 0x20(%1),%1 \n"
+ "sub $0x8,%2 \n"
+ "jg 1b \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_argb), // %1
+ "+r"(width) // %2
+ : "r"(shuffler) // %3
+ : "memory", "cc", "xmm0", "xmm1", "xmm5");
+}
+#endif // HAS_ARGBSHUFFLEROW_SSSE3
+
+#ifdef HAS_ARGBSHUFFLEROW_AVX2
+// For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA.
+void ARGBShuffleRow_AVX2(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ const uint8_t* shuffler,
+ int width) {
+ asm volatile(
+
+ "vbroadcastf128 (%3),%%ymm5 \n"
+
+ LABELALIGN
+ "1: \n"
+ "vmovdqu (%0),%%ymm0 \n"
+ "vmovdqu 0x20(%0),%%ymm1 \n"
+ "lea 0x40(%0),%0 \n"
+ "vpshufb %%ymm5,%%ymm0,%%ymm0 \n"
+ "vpshufb %%ymm5,%%ymm1,%%ymm1 \n"
+ "vmovdqu %%ymm0,(%1) \n"
+ "vmovdqu %%ymm1,0x20(%1) \n"
+ "lea 0x40(%1),%1 \n"
+ "sub $0x10,%2 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_argb), // %1
+ "+r"(width) // %2
+ : "r"(shuffler) // %3
+ : "memory", "cc", "xmm0", "xmm1", "xmm5");
+}
+#endif // HAS_ARGBSHUFFLEROW_AVX2
+
+#ifdef HAS_I422TOYUY2ROW_SSE2
+void I422ToYUY2Row_SSE2(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_yuy2,
+ int width) {
+ asm volatile(
+
+ "sub %1,%2 \n"
+
+ LABELALIGN
+ "1: \n"
+ "movq (%1),%%xmm2 \n"
+ "movq 0x00(%1,%2,1),%%xmm1 \n"
+ "add $0x8,%1 \n"
+ "punpcklbw %%xmm1,%%xmm2 \n"
+ "movdqu (%0),%%xmm0 \n"
+ "add $0x10,%0 \n"
+ "movdqa %%xmm0,%%xmm1 \n"
+ "punpcklbw %%xmm2,%%xmm0 \n"
+ "punpckhbw %%xmm2,%%xmm1 \n"
+ "movdqu %%xmm0,(%3) \n"
+ "movdqu %%xmm1,0x10(%3) \n"
+ "lea 0x20(%3),%3 \n"
+ "sub $0x10,%4 \n"
+ "jg 1b \n"
+ : "+r"(src_y), // %0
+ "+r"(src_u), // %1
+ "+r"(src_v), // %2
+ "+r"(dst_yuy2), // %3
+ "+rm"(width) // %4
+ :
+ : "memory", "cc", "xmm0", "xmm1", "xmm2");
+}
+#endif // HAS_I422TOYUY2ROW_SSE2
+
+#ifdef HAS_I422TOUYVYROW_SSE2
+void I422ToUYVYRow_SSE2(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_uyvy,
+ int width) {
+ asm volatile(
+
+ "sub %1,%2 \n"
+
+ LABELALIGN
+ "1: \n"
+ "movq (%1),%%xmm2 \n"
+ "movq 0x00(%1,%2,1),%%xmm1 \n"
+ "add $0x8,%1 \n"
+ "punpcklbw %%xmm1,%%xmm2 \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqa %%xmm2,%%xmm1 \n"
+ "add $0x10,%0 \n"
+ "punpcklbw %%xmm0,%%xmm1 \n"
+ "punpckhbw %%xmm0,%%xmm2 \n"
+ "movdqu %%xmm1,(%3) \n"
+ "movdqu %%xmm2,0x10(%3) \n"
+ "lea 0x20(%3),%3 \n"
+ "sub $0x10,%4 \n"
+ "jg 1b \n"
+ : "+r"(src_y), // %0
+ "+r"(src_u), // %1
+ "+r"(src_v), // %2
+ "+r"(dst_uyvy), // %3
+ "+rm"(width) // %4
+ :
+ : "memory", "cc", "xmm0", "xmm1", "xmm2");
+}
+#endif // HAS_I422TOUYVYROW_SSE2
+
+#ifdef HAS_I422TOYUY2ROW_AVX2
+void I422ToYUY2Row_AVX2(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_yuy2,
+ int width) {
+ asm volatile(
+
+ "sub %1,%2 \n"
+
+ LABELALIGN
+ "1: \n"
+ "vpmovzxbw (%1),%%ymm1 \n"
+ "vpmovzxbw 0x00(%1,%2,1),%%ymm2 \n"
+ "add $0x10,%1 \n"
+ "vpsllw $0x8,%%ymm2,%%ymm2 \n"
+ "vpor %%ymm1,%%ymm2,%%ymm2 \n"
+ "vmovdqu (%0),%%ymm0 \n"
+ "add $0x20,%0 \n"
+ "vpunpcklbw %%ymm2,%%ymm0,%%ymm1 \n"
+ "vpunpckhbw %%ymm2,%%ymm0,%%ymm2 \n"
+ "vextractf128 $0x0,%%ymm1,(%3) \n"
+ "vextractf128 $0x0,%%ymm2,0x10(%3) \n"
+ "vextractf128 $0x1,%%ymm1,0x20(%3) \n"
+ "vextractf128 $0x1,%%ymm2,0x30(%3) \n"
+ "lea 0x40(%3),%3 \n"
+ "sub $0x20,%4 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_y), // %0
+ "+r"(src_u), // %1
+ "+r"(src_v), // %2
+ "+r"(dst_yuy2), // %3
+ "+rm"(width) // %4
+ :
+ : "memory", "cc", "xmm0", "xmm1", "xmm2");
+}
+#endif // HAS_I422TOYUY2ROW_AVX2
+
+#ifdef HAS_I422TOUYVYROW_AVX2
+void I422ToUYVYRow_AVX2(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_uyvy,
+ int width) {
+ asm volatile(
+
+ "sub %1,%2 \n"
+
+ LABELALIGN
+ "1: \n"
+ "vpmovzxbw (%1),%%ymm1 \n"
+ "vpmovzxbw 0x00(%1,%2,1),%%ymm2 \n"
+ "add $0x10,%1 \n"
+ "vpsllw $0x8,%%ymm2,%%ymm2 \n"
+ "vpor %%ymm1,%%ymm2,%%ymm2 \n"
+ "vmovdqu (%0),%%ymm0 \n"
+ "add $0x20,%0 \n"
+ "vpunpcklbw %%ymm0,%%ymm2,%%ymm1 \n"
+ "vpunpckhbw %%ymm0,%%ymm2,%%ymm2 \n"
+ "vextractf128 $0x0,%%ymm1,(%3) \n"
+ "vextractf128 $0x0,%%ymm2,0x10(%3) \n"
+ "vextractf128 $0x1,%%ymm1,0x20(%3) \n"
+ "vextractf128 $0x1,%%ymm2,0x30(%3) \n"
+ "lea 0x40(%3),%3 \n"
+ "sub $0x20,%4 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_y), // %0
+ "+r"(src_u), // %1
+ "+r"(src_v), // %2
+ "+r"(dst_uyvy), // %3
+ "+rm"(width) // %4
+ :
+ : "memory", "cc", "xmm0", "xmm1", "xmm2");
+}
+#endif // HAS_I422TOUYVYROW_AVX2
+
+#ifdef HAS_ARGBPOLYNOMIALROW_SSE2
+void ARGBPolynomialRow_SSE2(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ const float* poly,
+ int width) {
+ asm volatile(
+
+ "pxor %%xmm3,%%xmm3 \n"
+
+ // 2 pixel loop.
+ LABELALIGN
+ "1: \n"
+ "movq (%0),%%xmm0 \n"
+ "lea 0x8(%0),%0 \n"
+ "punpcklbw %%xmm3,%%xmm0 \n"
+ "movdqa %%xmm0,%%xmm4 \n"
+ "punpcklwd %%xmm3,%%xmm0 \n"
+ "punpckhwd %%xmm3,%%xmm4 \n"
+ "cvtdq2ps %%xmm0,%%xmm0 \n"
+ "cvtdq2ps %%xmm4,%%xmm4 \n"
+ "movdqa %%xmm0,%%xmm1 \n"
+ "movdqa %%xmm4,%%xmm5 \n"
+ "mulps 0x10(%3),%%xmm0 \n"
+ "mulps 0x10(%3),%%xmm4 \n"
+ "addps (%3),%%xmm0 \n"
+ "addps (%3),%%xmm4 \n"
+ "movdqa %%xmm1,%%xmm2 \n"
+ "movdqa %%xmm5,%%xmm6 \n"
+ "mulps %%xmm1,%%xmm2 \n"
+ "mulps %%xmm5,%%xmm6 \n"
+ "mulps %%xmm2,%%xmm1 \n"
+ "mulps %%xmm6,%%xmm5 \n"
+ "mulps 0x20(%3),%%xmm2 \n"
+ "mulps 0x20(%3),%%xmm6 \n"
+ "mulps 0x30(%3),%%xmm1 \n"
+ "mulps 0x30(%3),%%xmm5 \n"
+ "addps %%xmm2,%%xmm0 \n"
+ "addps %%xmm6,%%xmm4 \n"
+ "addps %%xmm1,%%xmm0 \n"
+ "addps %%xmm5,%%xmm4 \n"
+ "cvttps2dq %%xmm0,%%xmm0 \n"
+ "cvttps2dq %%xmm4,%%xmm4 \n"
+ "packuswb %%xmm4,%%xmm0 \n"
+ "packuswb %%xmm0,%%xmm0 \n"
+ "movq %%xmm0,(%1) \n"
+ "lea 0x8(%1),%1 \n"
+ "sub $0x2,%2 \n"
+ "jg 1b \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_argb), // %1
+ "+r"(width) // %2
+ : "r"(poly) // %3
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6");
+}
+#endif // HAS_ARGBPOLYNOMIALROW_SSE2
+
+#ifdef HAS_ARGBPOLYNOMIALROW_AVX2
+void ARGBPolynomialRow_AVX2(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ const float* poly,
+ int width) {
+ asm volatile(
+ "vbroadcastf128 (%3),%%ymm4 \n"
+ "vbroadcastf128 0x10(%3),%%ymm5 \n"
+ "vbroadcastf128 0x20(%3),%%ymm6 \n"
+ "vbroadcastf128 0x30(%3),%%ymm7 \n"
+
+ // 2 pixel loop.
+ LABELALIGN
+ "1: \n"
+ "vpmovzxbd (%0),%%ymm0 \n" // 2 ARGB pixels
+ "lea 0x8(%0),%0 \n"
+ "vcvtdq2ps %%ymm0,%%ymm0 \n" // X 8 floats
+ "vmulps %%ymm0,%%ymm0,%%ymm2 \n" // X * X
+ "vmulps %%ymm7,%%ymm0,%%ymm3 \n" // C3 * X
+ "vfmadd132ps %%ymm5,%%ymm4,%%ymm0 \n" // result = C0 + C1 * X
+ "vfmadd231ps %%ymm6,%%ymm2,%%ymm0 \n" // result += C2 * X * X
+ "vfmadd231ps %%ymm3,%%ymm2,%%ymm0 \n" // result += C3 * X * X *
+ // X
+ "vcvttps2dq %%ymm0,%%ymm0 \n"
+ "vpackusdw %%ymm0,%%ymm0,%%ymm0 \n"
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n"
+ "vpackuswb %%xmm0,%%xmm0,%%xmm0 \n"
+ "vmovq %%xmm0,(%1) \n"
+ "lea 0x8(%1),%1 \n"
+ "sub $0x2,%2 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_argb), // %1
+ "+r"(width) // %2
+ : "r"(poly) // %3
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
+ "xmm7");
+}
+#endif // HAS_ARGBPOLYNOMIALROW_AVX2
+
+#ifdef HAS_HALFFLOATROW_SSE2
+static float kScaleBias = 1.9259299444e-34f;
+void HalfFloatRow_SSE2(const uint16_t* src,
+ uint16_t* dst,
+ float scale,
+ int width) {
+ scale *= kScaleBias;
+ asm volatile(
+ "movd %3,%%xmm4 \n"
+ "pshufd $0x0,%%xmm4,%%xmm4 \n"
+ "pxor %%xmm5,%%xmm5 \n"
+ "sub %0,%1 \n"
+
+ // 16 pixel loop.
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm2 \n" // 8 shorts
+ "add $0x10,%0 \n"
+ "movdqa %%xmm2,%%xmm3 \n"
+ "punpcklwd %%xmm5,%%xmm2 \n" // 8 ints in xmm2/1
+ "cvtdq2ps %%xmm2,%%xmm2 \n" // 8 floats
+ "punpckhwd %%xmm5,%%xmm3 \n"
+ "cvtdq2ps %%xmm3,%%xmm3 \n"
+ "mulps %%xmm4,%%xmm2 \n"
+ "mulps %%xmm4,%%xmm3 \n"
+ "psrld $0xd,%%xmm2 \n"
+ "psrld $0xd,%%xmm3 \n"
+ "packssdw %%xmm3,%%xmm2 \n"
+ "movdqu %%xmm2,-0x10(%0,%1,1) \n"
+ "sub $0x8,%2 \n"
+ "jg 1b \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width) // %2
+ : "m"(scale) // %3
+ : "memory", "cc", "xmm2", "xmm3", "xmm4", "xmm5");
+}
+#endif // HAS_HALFFLOATROW_SSE2
+
+#ifdef HAS_HALFFLOATROW_AVX2
+void HalfFloatRow_AVX2(const uint16_t* src,
+ uint16_t* dst,
+ float scale,
+ int width) {
+ scale *= kScaleBias;
+ asm volatile(
+ "vbroadcastss %3, %%ymm4 \n"
+ "vpxor %%ymm5,%%ymm5,%%ymm5 \n"
+ "sub %0,%1 \n"
+
+ // 16 pixel loop.
+ LABELALIGN
+ "1: \n"
+ "vmovdqu (%0),%%ymm2 \n" // 16 shorts
+ "add $0x20,%0 \n"
+ "vpunpckhwd %%ymm5,%%ymm2,%%ymm3 \n" // mutates
+ "vpunpcklwd %%ymm5,%%ymm2,%%ymm2 \n"
+ "vcvtdq2ps %%ymm3,%%ymm3 \n"
+ "vcvtdq2ps %%ymm2,%%ymm2 \n"
+ "vmulps %%ymm3,%%ymm4,%%ymm3 \n"
+ "vmulps %%ymm2,%%ymm4,%%ymm2 \n"
+ "vpsrld $0xd,%%ymm3,%%ymm3 \n"
+ "vpsrld $0xd,%%ymm2,%%ymm2 \n"
+ "vpackssdw %%ymm3, %%ymm2, %%ymm2 \n" // unmutates
+ "vmovdqu %%ymm2,-0x20(%0,%1,1) \n"
+ "sub $0x10,%2 \n"
+ "jg 1b \n"
+
+ "vzeroupper \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width) // %2
+#if defined(__x86_64__)
+ : "x"(scale) // %3
+#else
+ : "m"(scale) // %3
+#endif
+ : "memory", "cc", "xmm2", "xmm3", "xmm4", "xmm5");
+}
+#endif // HAS_HALFFLOATROW_AVX2
+
+#ifdef HAS_HALFFLOATROW_F16C
+void HalfFloatRow_F16C(const uint16_t* src,
+ uint16_t* dst,
+ float scale,
+ int width) {
+ asm volatile(
+ "vbroadcastss %3, %%ymm4 \n"
+ "sub %0,%1 \n"
+
+ // 16 pixel loop.
+ LABELALIGN
+ "1: \n"
+ "vpmovzxwd (%0),%%ymm2 \n" // 16 shorts -> 16 ints
+ "vpmovzxwd 0x10(%0),%%ymm3 \n"
+ "vcvtdq2ps %%ymm2,%%ymm2 \n"
+ "vcvtdq2ps %%ymm3,%%ymm3 \n"
+ "vmulps %%ymm2,%%ymm4,%%ymm2 \n"
+ "vmulps %%ymm3,%%ymm4,%%ymm3 \n"
+ "vcvtps2ph $3, %%ymm2, %%xmm2 \n"
+ "vcvtps2ph $3, %%ymm3, %%xmm3 \n"
+ "vmovdqu %%xmm2,0x00(%0,%1,1) \n"
+ "vmovdqu %%xmm3,0x10(%0,%1,1) \n"
+ "add $0x20,%0 \n"
+ "sub $0x10,%2 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width) // %2
+#if defined(__x86_64__)
+ : "x"(scale) // %3
+#else
+ : "m"(scale) // %3
+#endif
+ : "memory", "cc", "xmm2", "xmm3", "xmm4");
+}
+#endif // HAS_HALFFLOATROW_F16C
+
+#ifdef HAS_HALFFLOATROW_F16C
+void HalfFloat1Row_F16C(const uint16_t* src, uint16_t* dst, float, int width) {
+ asm volatile(
+ "sub %0,%1 \n"
+ // 16 pixel loop.
+ LABELALIGN
+ "1: \n"
+ "vpmovzxwd (%0),%%ymm2 \n" // 16 shorts -> 16 ints
+ "vpmovzxwd 0x10(%0),%%ymm3 \n"
+ "vcvtdq2ps %%ymm2,%%ymm2 \n"
+ "vcvtdq2ps %%ymm3,%%ymm3 \n"
+ "vcvtps2ph $3, %%ymm2, %%xmm2 \n"
+ "vcvtps2ph $3, %%ymm3, %%xmm3 \n"
+ "vmovdqu %%xmm2,0x00(%0,%1,1) \n"
+ "vmovdqu %%xmm3,0x10(%0,%1,1) \n"
+ "add $0x20,%0 \n"
+ "sub $0x10,%2 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width) // %2
+ :
+ : "memory", "cc", "xmm2", "xmm3");
+}
+#endif // HAS_HALFFLOATROW_F16C
+
+#ifdef HAS_ARGBCOLORTABLEROW_X86
+// Tranform ARGB pixels with color table.
+void ARGBColorTableRow_X86(uint8_t* dst_argb,
+ const uint8_t* table_argb,
+ int width) {
+ uintptr_t pixel_temp;
+ asm volatile(
+ // 1 pixel loop.
+ LABELALIGN
+ "1: \n"
+ "movzb (%0),%1 \n"
+ "lea 0x4(%0),%0 \n"
+ "movzb 0x00(%3,%1,4),%1 \n"
+ "mov %b1,-0x4(%0) \n"
+ "movzb -0x3(%0),%1 \n"
+ "movzb 0x01(%3,%1,4),%1 \n"
+ "mov %b1,-0x3(%0) \n"
+ "movzb -0x2(%0),%1 \n"
+ "movzb 0x02(%3,%1,4),%1 \n"
+ "mov %b1,-0x2(%0) \n"
+ "movzb -0x1(%0),%1 \n"
+ "movzb 0x03(%3,%1,4),%1 \n"
+ "mov %b1,-0x1(%0) \n"
+ "dec %2 \n"
+ "jg 1b \n"
+ : "+r"(dst_argb), // %0
+ "=&d"(pixel_temp), // %1
+ "+r"(width) // %2
+ : "r"(table_argb) // %3
+ : "memory", "cc");
+}
+#endif // HAS_ARGBCOLORTABLEROW_X86
+
+#ifdef HAS_RGBCOLORTABLEROW_X86
+// Tranform RGB pixels with color table.
+void RGBColorTableRow_X86(uint8_t* dst_argb,
+ const uint8_t* table_argb,
+ int width) {
+ uintptr_t pixel_temp;
+ asm volatile(
+ // 1 pixel loop.
+ LABELALIGN
+ "1: \n"
+ "movzb (%0),%1 \n"
+ "lea 0x4(%0),%0 \n"
+ "movzb 0x00(%3,%1,4),%1 \n"
+ "mov %b1,-0x4(%0) \n"
+ "movzb -0x3(%0),%1 \n"
+ "movzb 0x01(%3,%1,4),%1 \n"
+ "mov %b1,-0x3(%0) \n"
+ "movzb -0x2(%0),%1 \n"
+ "movzb 0x02(%3,%1,4),%1 \n"
+ "mov %b1,-0x2(%0) \n"
+ "dec %2 \n"
+ "jg 1b \n"
+ : "+r"(dst_argb), // %0
+ "=&d"(pixel_temp), // %1
+ "+r"(width) // %2
+ : "r"(table_argb) // %3
+ : "memory", "cc");
+}
+#endif // HAS_RGBCOLORTABLEROW_X86
+
+#ifdef HAS_ARGBLUMACOLORTABLEROW_SSSE3
+// Tranform RGB pixels with luma table.
+void ARGBLumaColorTableRow_SSSE3(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ int width,
+ const uint8_t* luma,
+ uint32_t lumacoeff) {
+ uintptr_t pixel_temp;
+ uintptr_t table_temp;
+ asm volatile(
+ "movd %6,%%xmm3 \n"
+ "pshufd $0x0,%%xmm3,%%xmm3 \n"
+ "pcmpeqb %%xmm4,%%xmm4 \n"
+ "psllw $0x8,%%xmm4 \n"
+ "pxor %%xmm5,%%xmm5 \n"
+
+ // 4 pixel loop.
+ LABELALIGN
+ "1: \n"
+ "movdqu (%2),%%xmm0 \n"
+ "pmaddubsw %%xmm3,%%xmm0 \n"
+ "phaddw %%xmm0,%%xmm0 \n"
+ "pand %%xmm4,%%xmm0 \n"
+ "punpcklwd %%xmm5,%%xmm0 \n"
+ "movd %%xmm0,%k1 \n" // 32 bit offset
+ "add %5,%1 \n"
+ "pshufd $0x39,%%xmm0,%%xmm0 \n"
+
+ "movzb (%2),%0 \n"
+ "movzb 0x00(%1,%0,1),%0 \n"
+ "mov %b0,(%3) \n"
+ "movzb 0x1(%2),%0 \n"
+ "movzb 0x00(%1,%0,1),%0 \n"
+ "mov %b0,0x1(%3) \n"
+ "movzb 0x2(%2),%0 \n"
+ "movzb 0x00(%1,%0,1),%0 \n"
+ "mov %b0,0x2(%3) \n"
+ "movzb 0x3(%2),%0 \n"
+ "mov %b0,0x3(%3) \n"
+
+ "movd %%xmm0,%k1 \n" // 32 bit offset
+ "add %5,%1 \n"
+ "pshufd $0x39,%%xmm0,%%xmm0 \n"
+
+ "movzb 0x4(%2),%0 \n"
+ "movzb 0x00(%1,%0,1),%0 \n"
+ "mov %b0,0x4(%3) \n"
+ "movzb 0x5(%2),%0 \n"
+ "movzb 0x00(%1,%0,1),%0 \n"
+ "mov %b0,0x5(%3) \n"
+ "movzb 0x6(%2),%0 \n"
+ "movzb 0x00(%1,%0,1),%0 \n"
+ "mov %b0,0x6(%3) \n"
+ "movzb 0x7(%2),%0 \n"
+ "mov %b0,0x7(%3) \n"
+
+ "movd %%xmm0,%k1 \n" // 32 bit offset
+ "add %5,%1 \n"
+ "pshufd $0x39,%%xmm0,%%xmm0 \n"
+
+ "movzb 0x8(%2),%0 \n"
+ "movzb 0x00(%1,%0,1),%0 \n"
+ "mov %b0,0x8(%3) \n"
+ "movzb 0x9(%2),%0 \n"
+ "movzb 0x00(%1,%0,1),%0 \n"
+ "mov %b0,0x9(%3) \n"
+ "movzb 0xa(%2),%0 \n"
+ "movzb 0x00(%1,%0,1),%0 \n"
+ "mov %b0,0xa(%3) \n"
+ "movzb 0xb(%2),%0 \n"
+ "mov %b0,0xb(%3) \n"
+
+ "movd %%xmm0,%k1 \n" // 32 bit offset
+ "add %5,%1 \n"
+
+ "movzb 0xc(%2),%0 \n"
+ "movzb 0x00(%1,%0,1),%0 \n"
+ "mov %b0,0xc(%3) \n"
+ "movzb 0xd(%2),%0 \n"
+ "movzb 0x00(%1,%0,1),%0 \n"
+ "mov %b0,0xd(%3) \n"
+ "movzb 0xe(%2),%0 \n"
+ "movzb 0x00(%1,%0,1),%0 \n"
+ "mov %b0,0xe(%3) \n"
+ "movzb 0xf(%2),%0 \n"
+ "mov %b0,0xf(%3) \n"
+ "lea 0x10(%2),%2 \n"
+ "lea 0x10(%3),%3 \n"
+ "sub $0x4,%4 \n"
+ "jg 1b \n"
+ : "=&d"(pixel_temp), // %0
+ "=&a"(table_temp), // %1
+ "+r"(src_argb), // %2
+ "+r"(dst_argb), // %3
+ "+rm"(width) // %4
+ : "r"(luma), // %5
+ "rm"(lumacoeff) // %6
+ : "memory", "cc", "xmm0", "xmm3", "xmm4", "xmm5");
+}
+#endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3
+
+#endif // defined(__x86_64__) || defined(__i386__)
+
+#ifdef __cplusplus
+} // extern "C"
+} // namespace libyuv
+#endif
diff --git a/media/libvpx/libvpx/third_party/libyuv/source/row_msa.cc b/media/libvpx/libvpx/third_party/libyuv/source/row_msa.cc
new file mode 100644
index 0000000000..4fb2631f0b
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/libyuv/source/row_msa.cc
@@ -0,0 +1,3512 @@
+/*
+ * Copyright 2016 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <string.h>
+
+#include "libyuv/row.h"
+
+// This module is for GCC MSA
+#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
+#include "libyuv/macros_msa.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+#define ALPHA_VAL (-1)
+
+// Fill YUV -> RGB conversion constants into vectors
+#define YUVTORGB_SETUP(yuvconst, ub, vr, ug, vg, bb, bg, br, yg) \
+ { \
+ ub = __msa_fill_w(yuvconst->kUVToB[0]); \
+ vr = __msa_fill_w(yuvconst->kUVToR[1]); \
+ ug = __msa_fill_w(yuvconst->kUVToG[0]); \
+ vg = __msa_fill_w(yuvconst->kUVToG[1]); \
+ bb = __msa_fill_w(yuvconst->kUVBiasB[0]); \
+ bg = __msa_fill_w(yuvconst->kUVBiasG[0]); \
+ br = __msa_fill_w(yuvconst->kUVBiasR[0]); \
+ yg = __msa_fill_w(yuvconst->kYToRgb[0]); \
+ }
+
+// Load YUV 422 pixel data
+#define READYUV422(psrc_y, psrc_u, psrc_v, out_y, out_u, out_v) \
+ { \
+ uint64_t y_m; \
+ uint32_t u_m, v_m; \
+ v4i32 zero_m = {0}; \
+ y_m = LD(psrc_y); \
+ u_m = LW(psrc_u); \
+ v_m = LW(psrc_v); \
+ out_y = (v16u8)__msa_insert_d((v2i64)zero_m, 0, (int64_t)y_m); \
+ out_u = (v16u8)__msa_insert_w(zero_m, 0, (int32_t)u_m); \
+ out_v = (v16u8)__msa_insert_w(zero_m, 0, (int32_t)v_m); \
+ }
+
+// Clip input vector elements between 0 to 255
+#define CLIP_0TO255(in0, in1, in2, in3, in4, in5) \
+ { \
+ v4i32 max_m = __msa_ldi_w(0xFF); \
+ \
+ in0 = __msa_maxi_s_w(in0, 0); \
+ in1 = __msa_maxi_s_w(in1, 0); \
+ in2 = __msa_maxi_s_w(in2, 0); \
+ in3 = __msa_maxi_s_w(in3, 0); \
+ in4 = __msa_maxi_s_w(in4, 0); \
+ in5 = __msa_maxi_s_w(in5, 0); \
+ in0 = __msa_min_s_w(max_m, in0); \
+ in1 = __msa_min_s_w(max_m, in1); \
+ in2 = __msa_min_s_w(max_m, in2); \
+ in3 = __msa_min_s_w(max_m, in3); \
+ in4 = __msa_min_s_w(max_m, in4); \
+ in5 = __msa_min_s_w(max_m, in5); \
+ }
+
+// Convert 8 pixels of YUV 420 to RGB.
+#define YUVTORGB(in_y, in_uv, ubvr, ugvg, bb, bg, br, yg, out_b, out_g, out_r) \
+ { \
+ v8i16 vec0_m, vec1_m; \
+ v4i32 reg0_m, reg1_m, reg2_m, reg3_m, reg4_m; \
+ v4i32 reg5_m, reg6_m, reg7_m; \
+ v16i8 zero_m = {0}; \
+ \
+ vec0_m = (v8i16)__msa_ilvr_b((v16i8)in_y, (v16i8)in_y); \
+ vec1_m = (v8i16)__msa_ilvr_b((v16i8)zero_m, (v16i8)in_uv); \
+ reg0_m = (v4i32)__msa_ilvr_h((v8i16)zero_m, (v8i16)vec0_m); \
+ reg1_m = (v4i32)__msa_ilvl_h((v8i16)zero_m, (v8i16)vec0_m); \
+ reg2_m = (v4i32)__msa_ilvr_h((v8i16)zero_m, (v8i16)vec1_m); \
+ reg3_m = (v4i32)__msa_ilvl_h((v8i16)zero_m, (v8i16)vec1_m); \
+ reg0_m *= yg; \
+ reg1_m *= yg; \
+ reg2_m *= ubvr; \
+ reg3_m *= ubvr; \
+ reg0_m = __msa_srai_w(reg0_m, 16); \
+ reg1_m = __msa_srai_w(reg1_m, 16); \
+ reg4_m = __msa_dotp_s_w((v8i16)vec1_m, (v8i16)ugvg); \
+ reg5_m = __msa_ilvev_w(reg2_m, reg2_m); \
+ reg6_m = __msa_ilvev_w(reg3_m, reg3_m); \
+ reg7_m = __msa_ilvr_w(reg4_m, reg4_m); \
+ reg2_m = __msa_ilvod_w(reg2_m, reg2_m); \
+ reg3_m = __msa_ilvod_w(reg3_m, reg3_m); \
+ reg4_m = __msa_ilvl_w(reg4_m, reg4_m); \
+ reg5_m = reg0_m - reg5_m; \
+ reg6_m = reg1_m - reg6_m; \
+ reg2_m = reg0_m - reg2_m; \
+ reg3_m = reg1_m - reg3_m; \
+ reg7_m = reg0_m - reg7_m; \
+ reg4_m = reg1_m - reg4_m; \
+ reg5_m += bb; \
+ reg6_m += bb; \
+ reg7_m += bg; \
+ reg4_m += bg; \
+ reg2_m += br; \
+ reg3_m += br; \
+ reg5_m = __msa_srai_w(reg5_m, 6); \
+ reg6_m = __msa_srai_w(reg6_m, 6); \
+ reg7_m = __msa_srai_w(reg7_m, 6); \
+ reg4_m = __msa_srai_w(reg4_m, 6); \
+ reg2_m = __msa_srai_w(reg2_m, 6); \
+ reg3_m = __msa_srai_w(reg3_m, 6); \
+ CLIP_0TO255(reg5_m, reg6_m, reg7_m, reg4_m, reg2_m, reg3_m); \
+ out_b = __msa_pckev_h((v8i16)reg6_m, (v8i16)reg5_m); \
+ out_g = __msa_pckev_h((v8i16)reg4_m, (v8i16)reg7_m); \
+ out_r = __msa_pckev_h((v8i16)reg3_m, (v8i16)reg2_m); \
+ }
+
+// Pack and Store 8 ARGB values.
+#define STOREARGB(in0, in1, in2, in3, pdst_argb) \
+ { \
+ v8i16 vec0_m, vec1_m; \
+ v16u8 dst0_m, dst1_m; \
+ vec0_m = (v8i16)__msa_ilvev_b((v16i8)in1, (v16i8)in0); \
+ vec1_m = (v8i16)__msa_ilvev_b((v16i8)in3, (v16i8)in2); \
+ dst0_m = (v16u8)__msa_ilvr_h(vec1_m, vec0_m); \
+ dst1_m = (v16u8)__msa_ilvl_h(vec1_m, vec0_m); \
+ ST_UB2(dst0_m, dst1_m, pdst_argb, 16); \
+ }
+
+// Takes ARGB input and calculates Y.
+#define ARGBTOY(argb0, argb1, argb2, argb3, const0, const1, const2, shift, \
+ y_out) \
+ { \
+ v16u8 vec0_m, vec1_m, vec2_m, vec3_m; \
+ v8u16 reg0_m, reg1_m; \
+ \
+ vec0_m = (v16u8)__msa_pckev_h((v8i16)argb1, (v8i16)argb0); \
+ vec1_m = (v16u8)__msa_pckev_h((v8i16)argb3, (v8i16)argb2); \
+ vec2_m = (v16u8)__msa_pckod_h((v8i16)argb1, (v8i16)argb0); \
+ vec3_m = (v16u8)__msa_pckod_h((v8i16)argb3, (v8i16)argb2); \
+ reg0_m = __msa_dotp_u_h(vec0_m, const0); \
+ reg1_m = __msa_dotp_u_h(vec1_m, const0); \
+ reg0_m = __msa_dpadd_u_h(reg0_m, vec2_m, const1); \
+ reg1_m = __msa_dpadd_u_h(reg1_m, vec3_m, const1); \
+ reg0_m += const2; \
+ reg1_m += const2; \
+ reg0_m = (v8u16)__msa_srai_h((v8i16)reg0_m, shift); \
+ reg1_m = (v8u16)__msa_srai_h((v8i16)reg1_m, shift); \
+ y_out = (v16u8)__msa_pckev_b((v16i8)reg1_m, (v16i8)reg0_m); \
+ }
+
+// Loads current and next row of ARGB input and averages it to calculate U and V
+#define READ_ARGB(s_ptr, t_ptr, argb0, argb1, argb2, argb3) \
+ { \
+ v16u8 src0_m, src1_m, src2_m, src3_m, src4_m, src5_m, src6_m, src7_m; \
+ v16u8 vec0_m, vec1_m, vec2_m, vec3_m, vec4_m, vec5_m, vec6_m, vec7_m; \
+ v16u8 vec8_m, vec9_m; \
+ v8u16 reg0_m, reg1_m, reg2_m, reg3_m, reg4_m, reg5_m, reg6_m, reg7_m; \
+ v8u16 reg8_m, reg9_m; \
+ \
+ src0_m = (v16u8)__msa_ld_b((v16i8*)s, 0); \
+ src1_m = (v16u8)__msa_ld_b((v16i8*)s, 16); \
+ src2_m = (v16u8)__msa_ld_b((v16i8*)s, 32); \
+ src3_m = (v16u8)__msa_ld_b((v16i8*)s, 48); \
+ src4_m = (v16u8)__msa_ld_b((v16i8*)t, 0); \
+ src5_m = (v16u8)__msa_ld_b((v16i8*)t, 16); \
+ src6_m = (v16u8)__msa_ld_b((v16i8*)t, 32); \
+ src7_m = (v16u8)__msa_ld_b((v16i8*)t, 48); \
+ vec0_m = (v16u8)__msa_ilvr_b((v16i8)src0_m, (v16i8)src4_m); \
+ vec1_m = (v16u8)__msa_ilvr_b((v16i8)src1_m, (v16i8)src5_m); \
+ vec2_m = (v16u8)__msa_ilvr_b((v16i8)src2_m, (v16i8)src6_m); \
+ vec3_m = (v16u8)__msa_ilvr_b((v16i8)src3_m, (v16i8)src7_m); \
+ vec4_m = (v16u8)__msa_ilvl_b((v16i8)src0_m, (v16i8)src4_m); \
+ vec5_m = (v16u8)__msa_ilvl_b((v16i8)src1_m, (v16i8)src5_m); \
+ vec6_m = (v16u8)__msa_ilvl_b((v16i8)src2_m, (v16i8)src6_m); \
+ vec7_m = (v16u8)__msa_ilvl_b((v16i8)src3_m, (v16i8)src7_m); \
+ reg0_m = __msa_hadd_u_h(vec0_m, vec0_m); \
+ reg1_m = __msa_hadd_u_h(vec1_m, vec1_m); \
+ reg2_m = __msa_hadd_u_h(vec2_m, vec2_m); \
+ reg3_m = __msa_hadd_u_h(vec3_m, vec3_m); \
+ reg4_m = __msa_hadd_u_h(vec4_m, vec4_m); \
+ reg5_m = __msa_hadd_u_h(vec5_m, vec5_m); \
+ reg6_m = __msa_hadd_u_h(vec6_m, vec6_m); \
+ reg7_m = __msa_hadd_u_h(vec7_m, vec7_m); \
+ reg8_m = (v8u16)__msa_pckev_d((v2i64)reg4_m, (v2i64)reg0_m); \
+ reg9_m = (v8u16)__msa_pckev_d((v2i64)reg5_m, (v2i64)reg1_m); \
+ reg8_m += (v8u16)__msa_pckod_d((v2i64)reg4_m, (v2i64)reg0_m); \
+ reg9_m += (v8u16)__msa_pckod_d((v2i64)reg5_m, (v2i64)reg1_m); \
+ reg0_m = (v8u16)__msa_pckev_d((v2i64)reg6_m, (v2i64)reg2_m); \
+ reg1_m = (v8u16)__msa_pckev_d((v2i64)reg7_m, (v2i64)reg3_m); \
+ reg0_m += (v8u16)__msa_pckod_d((v2i64)reg6_m, (v2i64)reg2_m); \
+ reg1_m += (v8u16)__msa_pckod_d((v2i64)reg7_m, (v2i64)reg3_m); \
+ reg8_m = (v8u16)__msa_srai_h((v8i16)reg8_m, 2); \
+ reg9_m = (v8u16)__msa_srai_h((v8i16)reg9_m, 2); \
+ reg0_m = (v8u16)__msa_srai_h((v8i16)reg0_m, 2); \
+ reg1_m = (v8u16)__msa_srai_h((v8i16)reg1_m, 2); \
+ argb0 = (v16u8)__msa_pckev_b((v16i8)reg9_m, (v16i8)reg8_m); \
+ argb1 = (v16u8)__msa_pckev_b((v16i8)reg1_m, (v16i8)reg0_m); \
+ src0_m = (v16u8)__msa_ld_b((v16i8*)s, 64); \
+ src1_m = (v16u8)__msa_ld_b((v16i8*)s, 80); \
+ src2_m = (v16u8)__msa_ld_b((v16i8*)s, 96); \
+ src3_m = (v16u8)__msa_ld_b((v16i8*)s, 112); \
+ src4_m = (v16u8)__msa_ld_b((v16i8*)t, 64); \
+ src5_m = (v16u8)__msa_ld_b((v16i8*)t, 80); \
+ src6_m = (v16u8)__msa_ld_b((v16i8*)t, 96); \
+ src7_m = (v16u8)__msa_ld_b((v16i8*)t, 112); \
+ vec2_m = (v16u8)__msa_ilvr_b((v16i8)src0_m, (v16i8)src4_m); \
+ vec3_m = (v16u8)__msa_ilvr_b((v16i8)src1_m, (v16i8)src5_m); \
+ vec4_m = (v16u8)__msa_ilvr_b((v16i8)src2_m, (v16i8)src6_m); \
+ vec5_m = (v16u8)__msa_ilvr_b((v16i8)src3_m, (v16i8)src7_m); \
+ vec6_m = (v16u8)__msa_ilvl_b((v16i8)src0_m, (v16i8)src4_m); \
+ vec7_m = (v16u8)__msa_ilvl_b((v16i8)src1_m, (v16i8)src5_m); \
+ vec8_m = (v16u8)__msa_ilvl_b((v16i8)src2_m, (v16i8)src6_m); \
+ vec9_m = (v16u8)__msa_ilvl_b((v16i8)src3_m, (v16i8)src7_m); \
+ reg0_m = __msa_hadd_u_h(vec2_m, vec2_m); \
+ reg1_m = __msa_hadd_u_h(vec3_m, vec3_m); \
+ reg2_m = __msa_hadd_u_h(vec4_m, vec4_m); \
+ reg3_m = __msa_hadd_u_h(vec5_m, vec5_m); \
+ reg4_m = __msa_hadd_u_h(vec6_m, vec6_m); \
+ reg5_m = __msa_hadd_u_h(vec7_m, vec7_m); \
+ reg6_m = __msa_hadd_u_h(vec8_m, vec8_m); \
+ reg7_m = __msa_hadd_u_h(vec9_m, vec9_m); \
+ reg8_m = (v8u16)__msa_pckev_d((v2i64)reg4_m, (v2i64)reg0_m); \
+ reg9_m = (v8u16)__msa_pckev_d((v2i64)reg5_m, (v2i64)reg1_m); \
+ reg8_m += (v8u16)__msa_pckod_d((v2i64)reg4_m, (v2i64)reg0_m); \
+ reg9_m += (v8u16)__msa_pckod_d((v2i64)reg5_m, (v2i64)reg1_m); \
+ reg0_m = (v8u16)__msa_pckev_d((v2i64)reg6_m, (v2i64)reg2_m); \
+ reg1_m = (v8u16)__msa_pckev_d((v2i64)reg7_m, (v2i64)reg3_m); \
+ reg0_m += (v8u16)__msa_pckod_d((v2i64)reg6_m, (v2i64)reg2_m); \
+ reg1_m += (v8u16)__msa_pckod_d((v2i64)reg7_m, (v2i64)reg3_m); \
+ reg8_m = (v8u16)__msa_srai_h((v8i16)reg8_m, 2); \
+ reg9_m = (v8u16)__msa_srai_h((v8i16)reg9_m, 2); \
+ reg0_m = (v8u16)__msa_srai_h((v8i16)reg0_m, 2); \
+ reg1_m = (v8u16)__msa_srai_h((v8i16)reg1_m, 2); \
+ argb2 = (v16u8)__msa_pckev_b((v16i8)reg9_m, (v16i8)reg8_m); \
+ argb3 = (v16u8)__msa_pckev_b((v16i8)reg1_m, (v16i8)reg0_m); \
+ }
+
+// Takes ARGB input and calculates U and V.
+#define ARGBTOUV(argb0, argb1, argb2, argb3, const0, const1, const2, const3, \
+ shf0, shf1, shf2, shf3, v_out, u_out) \
+ { \
+ v16u8 vec0_m, vec1_m, vec2_m, vec3_m, vec4_m, vec5_m, vec6_m, vec7_m; \
+ v8u16 reg0_m, reg1_m, reg2_m, reg3_m; \
+ \
+ vec0_m = (v16u8)__msa_vshf_b(shf0, (v16i8)argb1, (v16i8)argb0); \
+ vec1_m = (v16u8)__msa_vshf_b(shf0, (v16i8)argb3, (v16i8)argb2); \
+ vec2_m = (v16u8)__msa_vshf_b(shf1, (v16i8)argb1, (v16i8)argb0); \
+ vec3_m = (v16u8)__msa_vshf_b(shf1, (v16i8)argb3, (v16i8)argb2); \
+ vec4_m = (v16u8)__msa_vshf_b(shf2, (v16i8)argb1, (v16i8)argb0); \
+ vec5_m = (v16u8)__msa_vshf_b(shf2, (v16i8)argb3, (v16i8)argb2); \
+ vec6_m = (v16u8)__msa_vshf_b(shf3, (v16i8)argb1, (v16i8)argb0); \
+ vec7_m = (v16u8)__msa_vshf_b(shf3, (v16i8)argb3, (v16i8)argb2); \
+ reg0_m = __msa_dotp_u_h(vec0_m, const1); \
+ reg1_m = __msa_dotp_u_h(vec1_m, const1); \
+ reg2_m = __msa_dotp_u_h(vec4_m, const1); \
+ reg3_m = __msa_dotp_u_h(vec5_m, const1); \
+ reg0_m += const3; \
+ reg1_m += const3; \
+ reg2_m += const3; \
+ reg3_m += const3; \
+ reg0_m -= __msa_dotp_u_h(vec2_m, const0); \
+ reg1_m -= __msa_dotp_u_h(vec3_m, const0); \
+ reg2_m -= __msa_dotp_u_h(vec6_m, const2); \
+ reg3_m -= __msa_dotp_u_h(vec7_m, const2); \
+ v_out = (v16u8)__msa_pckod_b((v16i8)reg1_m, (v16i8)reg0_m); \
+ u_out = (v16u8)__msa_pckod_b((v16i8)reg3_m, (v16i8)reg2_m); \
+ }
+
+// Load I444 pixel data
+#define READI444(psrc_y, psrc_u, psrc_v, out_y, out_u, out_v) \
+ { \
+ uint64_t y_m, u_m, v_m; \
+ v2i64 zero_m = {0}; \
+ y_m = LD(psrc_y); \
+ u_m = LD(psrc_u); \
+ v_m = LD(psrc_v); \
+ out_y = (v16u8)__msa_insert_d(zero_m, 0, (int64_t)y_m); \
+ out_u = (v16u8)__msa_insert_d(zero_m, 0, (int64_t)u_m); \
+ out_v = (v16u8)__msa_insert_d(zero_m, 0, (int64_t)v_m); \
+ }
+
+void MirrorRow_MSA(const uint8_t* src, uint8_t* dst, int width) {
+ int x;
+ v16u8 src0, src1, src2, src3;
+ v16u8 dst0, dst1, dst2, dst3;
+ v16i8 shuffler = {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0};
+ src += width - 64;
+
+ for (x = 0; x < width; x += 64) {
+ LD_UB4(src, 16, src3, src2, src1, src0);
+ VSHF_B2_UB(src3, src3, src2, src2, shuffler, shuffler, dst3, dst2);
+ VSHF_B2_UB(src1, src1, src0, src0, shuffler, shuffler, dst1, dst0);
+ ST_UB4(dst0, dst1, dst2, dst3, dst, 16);
+ dst += 64;
+ src -= 64;
+ }
+}
+
+void ARGBMirrorRow_MSA(const uint8_t* src, uint8_t* dst, int width) {
+ int x;
+ v16u8 src0, src1, src2, src3;
+ v16u8 dst0, dst1, dst2, dst3;
+ v16i8 shuffler = {12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3};
+ src += width * 4 - 64;
+
+ for (x = 0; x < width; x += 16) {
+ LD_UB4(src, 16, src3, src2, src1, src0);
+ VSHF_B2_UB(src3, src3, src2, src2, shuffler, shuffler, dst3, dst2);
+ VSHF_B2_UB(src1, src1, src0, src0, shuffler, shuffler, dst1, dst0);
+ ST_UB4(dst0, dst1, dst2, dst3, dst, 16);
+ dst += 64;
+ src -= 64;
+ }
+}
+
+void I422ToYUY2Row_MSA(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_yuy2,
+ int width) {
+ int x;
+ v16u8 src_u0, src_v0, src_y0, src_y1, vec_uv0, vec_uv1;
+ v16u8 dst_yuy2_0, dst_yuy2_1, dst_yuy2_2, dst_yuy2_3;
+
+ for (x = 0; x < width; x += 32) {
+ src_u0 = LD_UB(src_u);
+ src_v0 = LD_UB(src_v);
+ LD_UB2(src_y, 16, src_y0, src_y1);
+ ILVRL_B2_UB(src_v0, src_u0, vec_uv0, vec_uv1);
+ ILVRL_B2_UB(vec_uv0, src_y0, dst_yuy2_0, dst_yuy2_1);
+ ILVRL_B2_UB(vec_uv1, src_y1, dst_yuy2_2, dst_yuy2_3);
+ ST_UB4(dst_yuy2_0, dst_yuy2_1, dst_yuy2_2, dst_yuy2_3, dst_yuy2, 16);
+ src_u += 16;
+ src_v += 16;
+ src_y += 32;
+ dst_yuy2 += 64;
+ }
+}
+
+void I422ToUYVYRow_MSA(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_uyvy,
+ int width) {
+ int x;
+ v16u8 src_u0, src_v0, src_y0, src_y1, vec_uv0, vec_uv1;
+ v16u8 dst_uyvy0, dst_uyvy1, dst_uyvy2, dst_uyvy3;
+
+ for (x = 0; x < width; x += 32) {
+ src_u0 = LD_UB(src_u);
+ src_v0 = LD_UB(src_v);
+ LD_UB2(src_y, 16, src_y0, src_y1);
+ ILVRL_B2_UB(src_v0, src_u0, vec_uv0, vec_uv1);
+ ILVRL_B2_UB(src_y0, vec_uv0, dst_uyvy0, dst_uyvy1);
+ ILVRL_B2_UB(src_y1, vec_uv1, dst_uyvy2, dst_uyvy3);
+ ST_UB4(dst_uyvy0, dst_uyvy1, dst_uyvy2, dst_uyvy3, dst_uyvy, 16);
+ src_u += 16;
+ src_v += 16;
+ src_y += 32;
+ dst_uyvy += 64;
+ }
+}
+
+void I422ToARGBRow_MSA(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ int x;
+ v16u8 src0, src1, src2;
+ v8i16 vec0, vec1, vec2;
+ v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg;
+ v4i32 vec_ubvr, vec_ugvg;
+ v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL);
+
+ YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg,
+ vec_br, vec_yg);
+ vec_ubvr = __msa_ilvr_w(vec_vr, vec_ub);
+ vec_ugvg = (v4i32)__msa_ilvev_h((v8i16)vec_vg, (v8i16)vec_ug);
+
+ for (x = 0; x < width; x += 8) {
+ READYUV422(src_y, src_u, src_v, src0, src1, src2);
+ src1 = (v16u8)__msa_ilvr_b((v16i8)src2, (v16i8)src1);
+ YUVTORGB(src0, src1, vec_ubvr, vec_ugvg, vec_bb, vec_bg, vec_br, vec_yg,
+ vec0, vec1, vec2);
+ STOREARGB(vec0, vec1, vec2, alpha, dst_argb);
+ src_y += 8;
+ src_u += 4;
+ src_v += 4;
+ dst_argb += 32;
+ }
+}
+
+void I422ToRGBARow_MSA(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ int x;
+ v16u8 src0, src1, src2;
+ v8i16 vec0, vec1, vec2;
+ v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg;
+ v4i32 vec_ubvr, vec_ugvg;
+ v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL);
+
+ YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg,
+ vec_br, vec_yg);
+ vec_ubvr = __msa_ilvr_w(vec_vr, vec_ub);
+ vec_ugvg = (v4i32)__msa_ilvev_h((v8i16)vec_vg, (v8i16)vec_ug);
+
+ for (x = 0; x < width; x += 8) {
+ READYUV422(src_y, src_u, src_v, src0, src1, src2);
+ src1 = (v16u8)__msa_ilvr_b((v16i8)src2, (v16i8)src1);
+ YUVTORGB(src0, src1, vec_ubvr, vec_ugvg, vec_bb, vec_bg, vec_br, vec_yg,
+ vec0, vec1, vec2);
+ STOREARGB(alpha, vec0, vec1, vec2, dst_argb);
+ src_y += 8;
+ src_u += 4;
+ src_v += 4;
+ dst_argb += 32;
+ }
+}
+
+void I422AlphaToARGBRow_MSA(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ const uint8_t* src_a,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ int x;
+ int64_t data_a;
+ v16u8 src0, src1, src2, src3;
+ v8i16 vec0, vec1, vec2;
+ v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg;
+ v4i32 vec_ubvr, vec_ugvg;
+ v4i32 zero = {0};
+
+ YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg,
+ vec_br, vec_yg);
+ vec_ubvr = __msa_ilvr_w(vec_vr, vec_ub);
+ vec_ugvg = (v4i32)__msa_ilvev_h((v8i16)vec_vg, (v8i16)vec_ug);
+
+ for (x = 0; x < width; x += 8) {
+ data_a = LD(src_a);
+ READYUV422(src_y, src_u, src_v, src0, src1, src2);
+ src1 = (v16u8)__msa_ilvr_b((v16i8)src2, (v16i8)src1);
+ src3 = (v16u8)__msa_insert_d((v2i64)zero, 0, data_a);
+ YUVTORGB(src0, src1, vec_ubvr, vec_ugvg, vec_bb, vec_bg, vec_br, vec_yg,
+ vec0, vec1, vec2);
+ src3 = (v16u8)__msa_ilvr_b((v16i8)src3, (v16i8)src3);
+ STOREARGB(vec0, vec1, vec2, src3, dst_argb);
+ src_y += 8;
+ src_u += 4;
+ src_v += 4;
+ src_a += 8;
+ dst_argb += 32;
+ }
+}
+
+void I422ToRGB24Row_MSA(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int32_t width) {
+ int x;
+ int64_t data_u, data_v;
+ v16u8 src0, src1, src2, src3, src4, dst0, dst1, dst2;
+ v8i16 vec0, vec1, vec2, vec3, vec4, vec5;
+ v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg;
+ v4i32 vec_ubvr, vec_ugvg;
+ v16u8 reg0, reg1, reg2, reg3;
+ v2i64 zero = {0};
+ v16i8 shuffler0 = {0, 1, 16, 2, 3, 17, 4, 5, 18, 6, 7, 19, 8, 9, 20, 10};
+ v16i8 shuffler1 = {0, 21, 1, 2, 22, 3, 4, 23, 5, 6, 24, 7, 8, 25, 9, 10};
+ v16i8 shuffler2 = {26, 6, 7, 27, 8, 9, 28, 10,
+ 11, 29, 12, 13, 30, 14, 15, 31};
+
+ YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg,
+ vec_br, vec_yg);
+ vec_ubvr = __msa_ilvr_w(vec_vr, vec_ub);
+ vec_ugvg = (v4i32)__msa_ilvev_h((v8i16)vec_vg, (v8i16)vec_ug);
+
+ for (x = 0; x < width; x += 16) {
+ src0 = (v16u8)__msa_ld_b((v16u8*)src_y, 0);
+ data_u = LD(src_u);
+ data_v = LD(src_v);
+ src1 = (v16u8)__msa_insert_d(zero, 0, data_u);
+ src2 = (v16u8)__msa_insert_d(zero, 0, data_v);
+ src1 = (v16u8)__msa_ilvr_b((v16i8)src2, (v16i8)src1);
+ src3 = (v16u8)__msa_sldi_b((v16i8)src0, (v16i8)src0, 8);
+ src4 = (v16u8)__msa_sldi_b((v16i8)src1, (v16i8)src1, 8);
+ YUVTORGB(src0, src1, vec_ubvr, vec_ugvg, vec_bb, vec_bg, vec_br, vec_yg,
+ vec0, vec1, vec2);
+ YUVTORGB(src3, src4, vec_ubvr, vec_ugvg, vec_bb, vec_bg, vec_br, vec_yg,
+ vec3, vec4, vec5);
+ reg0 = (v16u8)__msa_ilvev_b((v16i8)vec1, (v16i8)vec0);
+ reg2 = (v16u8)__msa_ilvev_b((v16i8)vec4, (v16i8)vec3);
+ reg3 = (v16u8)__msa_pckev_b((v16i8)vec5, (v16i8)vec2);
+ reg1 = (v16u8)__msa_sldi_b((v16i8)reg2, (v16i8)reg0, 11);
+ dst0 = (v16u8)__msa_vshf_b(shuffler0, (v16i8)reg3, (v16i8)reg0);
+ dst1 = (v16u8)__msa_vshf_b(shuffler1, (v16i8)reg3, (v16i8)reg1);
+ dst2 = (v16u8)__msa_vshf_b(shuffler2, (v16i8)reg3, (v16i8)reg2);
+ ST_UB2(dst0, dst1, dst_argb, 16);
+ ST_UB(dst2, (dst_argb + 32));
+ src_y += 16;
+ src_u += 8;
+ src_v += 8;
+ dst_argb += 48;
+ }
+}
+
+// TODO(fbarchard): Consider AND instead of shift to isolate 5 upper bits of R.
+void I422ToRGB565Row_MSA(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_rgb565,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ int x;
+ v16u8 src0, src1, src2, dst0;
+ v8i16 vec0, vec1, vec2;
+ v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg;
+ v4i32 vec_ubvr, vec_ugvg;
+
+ YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg,
+ vec_br, vec_yg);
+ vec_ubvr = __msa_ilvr_w(vec_vr, vec_ub);
+ vec_ugvg = (v4i32)__msa_ilvev_h((v8i16)vec_vg, (v8i16)vec_ug);
+
+ for (x = 0; x < width; x += 8) {
+ READYUV422(src_y, src_u, src_v, src0, src1, src2);
+ src1 = (v16u8)__msa_ilvr_b((v16i8)src2, (v16i8)src1);
+ YUVTORGB(src0, src1, vec_ubvr, vec_ugvg, vec_bb, vec_bg, vec_br, vec_yg,
+ vec0, vec2, vec1);
+ vec0 = __msa_srai_h(vec0, 3);
+ vec1 = __msa_srai_h(vec1, 3);
+ vec2 = __msa_srai_h(vec2, 2);
+ vec1 = __msa_slli_h(vec1, 11);
+ vec2 = __msa_slli_h(vec2, 5);
+ vec0 |= vec1;
+ dst0 = (v16u8)(vec2 | vec0);
+ ST_UB(dst0, dst_rgb565);
+ src_y += 8;
+ src_u += 4;
+ src_v += 4;
+ dst_rgb565 += 16;
+ }
+}
+
+// TODO(fbarchard): Consider AND instead of shift to isolate 4 upper bits of G.
+void I422ToARGB4444Row_MSA(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb4444,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ int x;
+ v16u8 src0, src1, src2, dst0;
+ v8i16 vec0, vec1, vec2;
+ v8u16 reg0, reg1, reg2;
+ v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg;
+ v4i32 vec_ubvr, vec_ugvg;
+ v8u16 const_0xF000 = (v8u16)__msa_fill_h(0xF000);
+
+ YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg,
+ vec_br, vec_yg);
+ vec_ubvr = __msa_ilvr_w(vec_vr, vec_ub);
+ vec_ugvg = (v4i32)__msa_ilvev_h((v8i16)vec_vg, (v8i16)vec_ug);
+
+ for (x = 0; x < width; x += 8) {
+ READYUV422(src_y, src_u, src_v, src0, src1, src2);
+ src1 = (v16u8)__msa_ilvr_b((v16i8)src2, (v16i8)src1);
+ YUVTORGB(src0, src1, vec_ubvr, vec_ugvg, vec_bb, vec_bg, vec_br, vec_yg,
+ vec0, vec1, vec2);
+ reg0 = (v8u16)__msa_srai_h(vec0, 4);
+ reg1 = (v8u16)__msa_srai_h(vec1, 4);
+ reg2 = (v8u16)__msa_srai_h(vec2, 4);
+ reg1 = (v8u16)__msa_slli_h((v8i16)reg1, 4);
+ reg2 = (v8u16)__msa_slli_h((v8i16)reg2, 8);
+ reg1 |= const_0xF000;
+ reg0 |= reg2;
+ dst0 = (v16u8)(reg1 | reg0);
+ ST_UB(dst0, dst_argb4444);
+ src_y += 8;
+ src_u += 4;
+ src_v += 4;
+ dst_argb4444 += 16;
+ }
+}
+
+void I422ToARGB1555Row_MSA(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb1555,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ int x;
+ v16u8 src0, src1, src2, dst0;
+ v8i16 vec0, vec1, vec2;
+ v8u16 reg0, reg1, reg2;
+ v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg;
+ v4i32 vec_ubvr, vec_ugvg;
+ v8u16 const_0x8000 = (v8u16)__msa_fill_h(0x8000);
+
+ YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg,
+ vec_br, vec_yg);
+ vec_ubvr = __msa_ilvr_w(vec_vr, vec_ub);
+ vec_ugvg = (v4i32)__msa_ilvev_h((v8i16)vec_vg, (v8i16)vec_ug);
+
+ for (x = 0; x < width; x += 8) {
+ READYUV422(src_y, src_u, src_v, src0, src1, src2);
+ src1 = (v16u8)__msa_ilvr_b((v16i8)src2, (v16i8)src1);
+ YUVTORGB(src0, src1, vec_ubvr, vec_ugvg, vec_bb, vec_bg, vec_br, vec_yg,
+ vec0, vec1, vec2);
+ reg0 = (v8u16)__msa_srai_h(vec0, 3);
+ reg1 = (v8u16)__msa_srai_h(vec1, 3);
+ reg2 = (v8u16)__msa_srai_h(vec2, 3);
+ reg1 = (v8u16)__msa_slli_h((v8i16)reg1, 5);
+ reg2 = (v8u16)__msa_slli_h((v8i16)reg2, 10);
+ reg1 |= const_0x8000;
+ reg0 |= reg2;
+ dst0 = (v16u8)(reg1 | reg0);
+ ST_UB(dst0, dst_argb1555);
+ src_y += 8;
+ src_u += 4;
+ src_v += 4;
+ dst_argb1555 += 16;
+ }
+}
+
+void YUY2ToYRow_MSA(const uint8_t* src_yuy2, uint8_t* dst_y, int width) {
+ int x;
+ v16u8 src0, src1, src2, src3, dst0, dst1;
+
+ for (x = 0; x < width; x += 32) {
+ LD_UB4(src_yuy2, 16, src0, src1, src2, src3);
+ dst0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0);
+ dst1 = (v16u8)__msa_pckev_b((v16i8)src3, (v16i8)src2);
+ ST_UB2(dst0, dst1, dst_y, 16);
+ src_yuy2 += 64;
+ dst_y += 32;
+ }
+}
+
+void YUY2ToUVRow_MSA(const uint8_t* src_yuy2,
+ int src_stride_yuy2,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ const uint8_t* src_yuy2_next = src_yuy2 + src_stride_yuy2;
+ int x;
+ v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
+ v16u8 vec0, vec1, dst0, dst1;
+
+ for (x = 0; x < width; x += 32) {
+ LD_UB4(src_yuy2, 16, src0, src1, src2, src3);
+ LD_UB4(src_yuy2_next, 16, src4, src5, src6, src7);
+ src0 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0);
+ src1 = (v16u8)__msa_pckod_b((v16i8)src3, (v16i8)src2);
+ src2 = (v16u8)__msa_pckod_b((v16i8)src5, (v16i8)src4);
+ src3 = (v16u8)__msa_pckod_b((v16i8)src7, (v16i8)src6);
+ vec0 = __msa_aver_u_b(src0, src2);
+ vec1 = __msa_aver_u_b(src1, src3);
+ dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0);
+ dst1 = (v16u8)__msa_pckod_b((v16i8)vec1, (v16i8)vec0);
+ ST_UB(dst0, dst_u);
+ ST_UB(dst1, dst_v);
+ src_yuy2 += 64;
+ src_yuy2_next += 64;
+ dst_u += 16;
+ dst_v += 16;
+ }
+}
+
+void YUY2ToUV422Row_MSA(const uint8_t* src_yuy2,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ int x;
+ v16u8 src0, src1, src2, src3, dst0, dst1;
+
+ for (x = 0; x < width; x += 32) {
+ LD_UB4(src_yuy2, 16, src0, src1, src2, src3);
+ src0 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0);
+ src1 = (v16u8)__msa_pckod_b((v16i8)src3, (v16i8)src2);
+ dst0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0);
+ dst1 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0);
+ ST_UB(dst0, dst_u);
+ ST_UB(dst1, dst_v);
+ src_yuy2 += 64;
+ dst_u += 16;
+ dst_v += 16;
+ }
+}
+
+void UYVYToYRow_MSA(const uint8_t* src_uyvy, uint8_t* dst_y, int width) {
+ int x;
+ v16u8 src0, src1, src2, src3, dst0, dst1;
+
+ for (x = 0; x < width; x += 32) {
+ LD_UB4(src_uyvy, 16, src0, src1, src2, src3);
+ dst0 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0);
+ dst1 = (v16u8)__msa_pckod_b((v16i8)src3, (v16i8)src2);
+ ST_UB2(dst0, dst1, dst_y, 16);
+ src_uyvy += 64;
+ dst_y += 32;
+ }
+}
+
+void UYVYToUVRow_MSA(const uint8_t* src_uyvy,
+ int src_stride_uyvy,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ const uint8_t* src_uyvy_next = src_uyvy + src_stride_uyvy;
+ int x;
+ v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
+ v16u8 vec0, vec1, dst0, dst1;
+
+ for (x = 0; x < width; x += 32) {
+ LD_UB4(src_uyvy, 16, src0, src1, src2, src3);
+ LD_UB4(src_uyvy_next, 16, src4, src5, src6, src7);
+ src0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0);
+ src1 = (v16u8)__msa_pckev_b((v16i8)src3, (v16i8)src2);
+ src2 = (v16u8)__msa_pckev_b((v16i8)src5, (v16i8)src4);
+ src3 = (v16u8)__msa_pckev_b((v16i8)src7, (v16i8)src6);
+ vec0 = __msa_aver_u_b(src0, src2);
+ vec1 = __msa_aver_u_b(src1, src3);
+ dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0);
+ dst1 = (v16u8)__msa_pckod_b((v16i8)vec1, (v16i8)vec0);
+ ST_UB(dst0, dst_u);
+ ST_UB(dst1, dst_v);
+ src_uyvy += 64;
+ src_uyvy_next += 64;
+ dst_u += 16;
+ dst_v += 16;
+ }
+}
+
+void UYVYToUV422Row_MSA(const uint8_t* src_uyvy,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ int x;
+ v16u8 src0, src1, src2, src3, dst0, dst1;
+
+ for (x = 0; x < width; x += 32) {
+ LD_UB4(src_uyvy, 16, src0, src1, src2, src3);
+ src0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0);
+ src1 = (v16u8)__msa_pckev_b((v16i8)src3, (v16i8)src2);
+ dst0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0);
+ dst1 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0);
+ ST_UB(dst0, dst_u);
+ ST_UB(dst1, dst_v);
+ src_uyvy += 64;
+ dst_u += 16;
+ dst_v += 16;
+ }
+}
+
+void ARGBToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
+ int x;
+ v16u8 src0, src1, src2, src3, vec0, vec1, vec2, vec3, dst0;
+ v8u16 reg0, reg1, reg2, reg3, reg4, reg5;
+ v16i8 zero = {0};
+ v8u16 const_0x19 = (v8u16)__msa_ldi_h(0x19);
+ v8u16 const_0x81 = (v8u16)__msa_ldi_h(0x81);
+ v8u16 const_0x42 = (v8u16)__msa_ldi_h(0x42);
+ v8u16 const_0x1080 = (v8u16)__msa_fill_h(0x1080);
+
+ for (x = 0; x < width; x += 16) {
+ src0 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 0);
+ src1 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 16);
+ src2 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 32);
+ src3 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 48);
+ vec0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0);
+ vec1 = (v16u8)__msa_pckev_b((v16i8)src3, (v16i8)src2);
+ vec2 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0);
+ vec3 = (v16u8)__msa_pckod_b((v16i8)src3, (v16i8)src2);
+ reg0 = (v8u16)__msa_ilvev_b(zero, (v16i8)vec0);
+ reg1 = (v8u16)__msa_ilvev_b(zero, (v16i8)vec1);
+ reg2 = (v8u16)__msa_ilvev_b(zero, (v16i8)vec2);
+ reg3 = (v8u16)__msa_ilvev_b(zero, (v16i8)vec3);
+ reg4 = (v8u16)__msa_ilvod_b(zero, (v16i8)vec0);
+ reg5 = (v8u16)__msa_ilvod_b(zero, (v16i8)vec1);
+ reg0 *= const_0x19;
+ reg1 *= const_0x19;
+ reg2 *= const_0x81;
+ reg3 *= const_0x81;
+ reg4 *= const_0x42;
+ reg5 *= const_0x42;
+ reg0 += reg2;
+ reg1 += reg3;
+ reg0 += reg4;
+ reg1 += reg5;
+ reg0 += const_0x1080;
+ reg1 += const_0x1080;
+ reg0 = (v8u16)__msa_srai_h((v8i16)reg0, 8);
+ reg1 = (v8u16)__msa_srai_h((v8i16)reg1, 8);
+ dst0 = (v16u8)__msa_pckev_b((v16i8)reg1, (v16i8)reg0);
+ ST_UB(dst0, dst_y);
+ src_argb0 += 64;
+ dst_y += 16;
+ }
+}
+
+void ARGBToUVRow_MSA(const uint8_t* src_argb0,
+ int src_stride_argb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ int x;
+ const uint8_t* src_argb0_next = src_argb0 + src_stride_argb;
+ v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
+ v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8, vec9;
+ v8u16 reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7, reg8, reg9;
+ v16u8 dst0, dst1;
+ v8u16 const_0x70 = (v8u16)__msa_ldi_h(0x70);
+ v8u16 const_0x4A = (v8u16)__msa_ldi_h(0x4A);
+ v8u16 const_0x26 = (v8u16)__msa_ldi_h(0x26);
+ v8u16 const_0x5E = (v8u16)__msa_ldi_h(0x5E);
+ v8u16 const_0x12 = (v8u16)__msa_ldi_h(0x12);
+ v8u16 const_0x8080 = (v8u16)__msa_fill_h(0x8080);
+
+ for (x = 0; x < width; x += 32) {
+ src0 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 0);
+ src1 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 16);
+ src2 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 32);
+ src3 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 48);
+ src4 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 64);
+ src5 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 80);
+ src6 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 96);
+ src7 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 112);
+ vec0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0);
+ vec1 = (v16u8)__msa_pckev_b((v16i8)src3, (v16i8)src2);
+ vec2 = (v16u8)__msa_pckev_b((v16i8)src5, (v16i8)src4);
+ vec3 = (v16u8)__msa_pckev_b((v16i8)src7, (v16i8)src6);
+ vec4 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0);
+ vec5 = (v16u8)__msa_pckod_b((v16i8)src3, (v16i8)src2);
+ vec6 = (v16u8)__msa_pckod_b((v16i8)src5, (v16i8)src4);
+ vec7 = (v16u8)__msa_pckod_b((v16i8)src7, (v16i8)src6);
+ vec8 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0);
+ vec9 = (v16u8)__msa_pckev_b((v16i8)vec3, (v16i8)vec2);
+ vec4 = (v16u8)__msa_pckev_b((v16i8)vec5, (v16i8)vec4);
+ vec5 = (v16u8)__msa_pckev_b((v16i8)vec7, (v16i8)vec6);
+ vec0 = (v16u8)__msa_pckod_b((v16i8)vec1, (v16i8)vec0);
+ vec1 = (v16u8)__msa_pckod_b((v16i8)vec3, (v16i8)vec2);
+ reg0 = __msa_hadd_u_h(vec8, vec8);
+ reg1 = __msa_hadd_u_h(vec9, vec9);
+ reg2 = __msa_hadd_u_h(vec4, vec4);
+ reg3 = __msa_hadd_u_h(vec5, vec5);
+ reg4 = __msa_hadd_u_h(vec0, vec0);
+ reg5 = __msa_hadd_u_h(vec1, vec1);
+ src0 = (v16u8)__msa_ld_b((v16u8*)src_argb0_next, 0);
+ src1 = (v16u8)__msa_ld_b((v16u8*)src_argb0_next, 16);
+ src2 = (v16u8)__msa_ld_b((v16u8*)src_argb0_next, 32);
+ src3 = (v16u8)__msa_ld_b((v16u8*)src_argb0_next, 48);
+ src4 = (v16u8)__msa_ld_b((v16u8*)src_argb0_next, 64);
+ src5 = (v16u8)__msa_ld_b((v16u8*)src_argb0_next, 80);
+ src6 = (v16u8)__msa_ld_b((v16u8*)src_argb0_next, 96);
+ src7 = (v16u8)__msa_ld_b((v16u8*)src_argb0_next, 112);
+ vec0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0);
+ vec1 = (v16u8)__msa_pckev_b((v16i8)src3, (v16i8)src2);
+ vec2 = (v16u8)__msa_pckev_b((v16i8)src5, (v16i8)src4);
+ vec3 = (v16u8)__msa_pckev_b((v16i8)src7, (v16i8)src6);
+ vec4 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0);
+ vec5 = (v16u8)__msa_pckod_b((v16i8)src3, (v16i8)src2);
+ vec6 = (v16u8)__msa_pckod_b((v16i8)src5, (v16i8)src4);
+ vec7 = (v16u8)__msa_pckod_b((v16i8)src7, (v16i8)src6);
+ vec8 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0);
+ vec9 = (v16u8)__msa_pckev_b((v16i8)vec3, (v16i8)vec2);
+ vec4 = (v16u8)__msa_pckev_b((v16i8)vec5, (v16i8)vec4);
+ vec5 = (v16u8)__msa_pckev_b((v16i8)vec7, (v16i8)vec6);
+ vec0 = (v16u8)__msa_pckod_b((v16i8)vec1, (v16i8)vec0);
+ vec1 = (v16u8)__msa_pckod_b((v16i8)vec3, (v16i8)vec2);
+ reg0 += __msa_hadd_u_h(vec8, vec8);
+ reg1 += __msa_hadd_u_h(vec9, vec9);
+ reg2 += __msa_hadd_u_h(vec4, vec4);
+ reg3 += __msa_hadd_u_h(vec5, vec5);
+ reg4 += __msa_hadd_u_h(vec0, vec0);
+ reg5 += __msa_hadd_u_h(vec1, vec1);
+ reg0 = (v8u16)__msa_srai_h((v8i16)reg0, 2);
+ reg1 = (v8u16)__msa_srai_h((v8i16)reg1, 2);
+ reg2 = (v8u16)__msa_srai_h((v8i16)reg2, 2);
+ reg3 = (v8u16)__msa_srai_h((v8i16)reg3, 2);
+ reg4 = (v8u16)__msa_srai_h((v8i16)reg4, 2);
+ reg5 = (v8u16)__msa_srai_h((v8i16)reg5, 2);
+ reg6 = reg0 * const_0x70;
+ reg7 = reg1 * const_0x70;
+ reg8 = reg2 * const_0x4A;
+ reg9 = reg3 * const_0x4A;
+ reg6 += const_0x8080;
+ reg7 += const_0x8080;
+ reg8 += reg4 * const_0x26;
+ reg9 += reg5 * const_0x26;
+ reg0 *= const_0x12;
+ reg1 *= const_0x12;
+ reg2 *= const_0x5E;
+ reg3 *= const_0x5E;
+ reg4 *= const_0x70;
+ reg5 *= const_0x70;
+ reg2 += reg0;
+ reg3 += reg1;
+ reg4 += const_0x8080;
+ reg5 += const_0x8080;
+ reg6 -= reg8;
+ reg7 -= reg9;
+ reg4 -= reg2;
+ reg5 -= reg3;
+ reg6 = (v8u16)__msa_srai_h((v8i16)reg6, 8);
+ reg7 = (v8u16)__msa_srai_h((v8i16)reg7, 8);
+ reg4 = (v8u16)__msa_srai_h((v8i16)reg4, 8);
+ reg5 = (v8u16)__msa_srai_h((v8i16)reg5, 8);
+ dst0 = (v16u8)__msa_pckev_b((v16i8)reg7, (v16i8)reg6);
+ dst1 = (v16u8)__msa_pckev_b((v16i8)reg5, (v16i8)reg4);
+ ST_UB(dst0, dst_u);
+ ST_UB(dst1, dst_v);
+ src_argb0 += 128;
+ src_argb0_next += 128;
+ dst_u += 16;
+ dst_v += 16;
+ }
+}
+
+void ARGBToRGB24Row_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
+ int x;
+ v16u8 src0, src1, src2, src3, dst0, dst1, dst2;
+ v16i8 shuffler0 = {0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 16, 17, 18, 20};
+ v16i8 shuffler1 = {5, 6, 8, 9, 10, 12, 13, 14,
+ 16, 17, 18, 20, 21, 22, 24, 25};
+ v16i8 shuffler2 = {10, 12, 13, 14, 16, 17, 18, 20,
+ 21, 22, 24, 25, 26, 28, 29, 30};
+
+ for (x = 0; x < width; x += 16) {
+ src0 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 0);
+ src1 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 16);
+ src2 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 32);
+ src3 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 48);
+ dst0 = (v16u8)__msa_vshf_b(shuffler0, (v16i8)src1, (v16i8)src0);
+ dst1 = (v16u8)__msa_vshf_b(shuffler1, (v16i8)src2, (v16i8)src1);
+ dst2 = (v16u8)__msa_vshf_b(shuffler2, (v16i8)src3, (v16i8)src2);
+ ST_UB2(dst0, dst1, dst_rgb, 16);
+ ST_UB(dst2, (dst_rgb + 32));
+ src_argb += 64;
+ dst_rgb += 48;
+ }
+}
+
+void ARGBToRAWRow_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
+ int x;
+ v16u8 src0, src1, src2, src3, dst0, dst1, dst2;
+ v16i8 shuffler0 = {2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, 18, 17, 16, 22};
+ v16i8 shuffler1 = {5, 4, 10, 9, 8, 14, 13, 12,
+ 18, 17, 16, 22, 21, 20, 26, 25};
+ v16i8 shuffler2 = {8, 14, 13, 12, 18, 17, 16, 22,
+ 21, 20, 26, 25, 24, 30, 29, 28};
+
+ for (x = 0; x < width; x += 16) {
+ src0 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 0);
+ src1 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 16);
+ src2 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 32);
+ src3 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 48);
+ dst0 = (v16u8)__msa_vshf_b(shuffler0, (v16i8)src1, (v16i8)src0);
+ dst1 = (v16u8)__msa_vshf_b(shuffler1, (v16i8)src2, (v16i8)src1);
+ dst2 = (v16u8)__msa_vshf_b(shuffler2, (v16i8)src3, (v16i8)src2);
+ ST_UB2(dst0, dst1, dst_rgb, 16);
+ ST_UB(dst2, (dst_rgb + 32));
+ src_argb += 64;
+ dst_rgb += 48;
+ }
+}
+
+void ARGBToRGB565Row_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
+ int x;
+ v16u8 src0, src1, dst0;
+ v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+ v16i8 zero = {0};
+
+ for (x = 0; x < width; x += 8) {
+ src0 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 0);
+ src1 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 16);
+ vec0 = (v16u8)__msa_srai_b((v16i8)src0, 3);
+ vec1 = (v16u8)__msa_slli_b((v16i8)src0, 3);
+ vec2 = (v16u8)__msa_srai_b((v16i8)src0, 5);
+ vec4 = (v16u8)__msa_srai_b((v16i8)src1, 3);
+ vec5 = (v16u8)__msa_slli_b((v16i8)src1, 3);
+ vec6 = (v16u8)__msa_srai_b((v16i8)src1, 5);
+ vec1 = (v16u8)__msa_sldi_b(zero, (v16i8)vec1, 1);
+ vec2 = (v16u8)__msa_sldi_b(zero, (v16i8)vec2, 1);
+ vec5 = (v16u8)__msa_sldi_b(zero, (v16i8)vec5, 1);
+ vec6 = (v16u8)__msa_sldi_b(zero, (v16i8)vec6, 1);
+ vec3 = (v16u8)__msa_sldi_b(zero, (v16i8)src0, 2);
+ vec7 = (v16u8)__msa_sldi_b(zero, (v16i8)src1, 2);
+ vec0 = __msa_binsli_b(vec0, vec1, 2);
+ vec1 = __msa_binsli_b(vec2, vec3, 4);
+ vec4 = __msa_binsli_b(vec4, vec5, 2);
+ vec5 = __msa_binsli_b(vec6, vec7, 4);
+ vec0 = (v16u8)__msa_ilvev_b((v16i8)vec1, (v16i8)vec0);
+ vec4 = (v16u8)__msa_ilvev_b((v16i8)vec5, (v16i8)vec4);
+ dst0 = (v16u8)__msa_pckev_h((v8i16)vec4, (v8i16)vec0);
+ ST_UB(dst0, dst_rgb);
+ src_argb += 32;
+ dst_rgb += 16;
+ }
+}
+
+void ARGBToARGB1555Row_MSA(const uint8_t* src_argb,
+ uint8_t* dst_rgb,
+ int width) {
+ int x;
+ v16u8 src0, src1, dst0;
+ v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8, vec9;
+ v16i8 zero = {0};
+
+ for (x = 0; x < width; x += 8) {
+ src0 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 0);
+ src1 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 16);
+ vec0 = (v16u8)__msa_srai_b((v16i8)src0, 3);
+ vec1 = (v16u8)__msa_slli_b((v16i8)src0, 2);
+ vec2 = (v16u8)__msa_srai_b((v16i8)vec0, 3);
+ vec1 = (v16u8)__msa_sldi_b(zero, (v16i8)vec1, 1);
+ vec2 = (v16u8)__msa_sldi_b(zero, (v16i8)vec2, 1);
+ vec3 = (v16u8)__msa_srai_b((v16i8)src0, 1);
+ vec5 = (v16u8)__msa_srai_b((v16i8)src1, 3);
+ vec6 = (v16u8)__msa_slli_b((v16i8)src1, 2);
+ vec7 = (v16u8)__msa_srai_b((v16i8)vec5, 3);
+ vec6 = (v16u8)__msa_sldi_b(zero, (v16i8)vec6, 1);
+ vec7 = (v16u8)__msa_sldi_b(zero, (v16i8)vec7, 1);
+ vec8 = (v16u8)__msa_srai_b((v16i8)src1, 1);
+ vec3 = (v16u8)__msa_sldi_b(zero, (v16i8)vec3, 2);
+ vec8 = (v16u8)__msa_sldi_b(zero, (v16i8)vec8, 2);
+ vec4 = (v16u8)__msa_sldi_b(zero, (v16i8)src0, 3);
+ vec9 = (v16u8)__msa_sldi_b(zero, (v16i8)src1, 3);
+ vec0 = __msa_binsli_b(vec0, vec1, 2);
+ vec5 = __msa_binsli_b(vec5, vec6, 2);
+ vec1 = __msa_binsli_b(vec2, vec3, 5);
+ vec6 = __msa_binsli_b(vec7, vec8, 5);
+ vec1 = __msa_binsli_b(vec1, vec4, 0);
+ vec6 = __msa_binsli_b(vec6, vec9, 0);
+ vec0 = (v16u8)__msa_ilvev_b((v16i8)vec1, (v16i8)vec0);
+ vec1 = (v16u8)__msa_ilvev_b((v16i8)vec6, (v16i8)vec5);
+ dst0 = (v16u8)__msa_pckev_h((v8i16)vec1, (v8i16)vec0);
+ ST_UB(dst0, dst_rgb);
+ src_argb += 32;
+ dst_rgb += 16;
+ }
+}
+
+void ARGBToARGB4444Row_MSA(const uint8_t* src_argb,
+ uint8_t* dst_rgb,
+ int width) {
+ int x;
+ v16u8 src0, src1;
+ v16u8 vec0, vec1;
+ v16u8 dst0;
+ v16i8 zero = {0};
+
+ for (x = 0; x < width; x += 8) {
+ src0 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 0);
+ src1 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 16);
+ vec0 = (v16u8)__msa_srai_b((v16i8)src0, 4);
+ vec1 = (v16u8)__msa_srai_b((v16i8)src1, 4);
+ src0 = (v16u8)__msa_sldi_b(zero, (v16i8)src0, 1);
+ src1 = (v16u8)__msa_sldi_b(zero, (v16i8)src1, 1);
+ vec0 = __msa_binsli_b(vec0, src0, 3);
+ vec1 = __msa_binsli_b(vec1, src1, 3);
+ dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0);
+ ST_UB(dst0, dst_rgb);
+ src_argb += 32;
+ dst_rgb += 16;
+ }
+}
+
+void ARGBToUV444Row_MSA(const uint8_t* src_argb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int32_t width) {
+ int32_t x;
+ v16u8 src0, src1, src2, src3, reg0, reg1, reg2, reg3, dst0, dst1;
+ v8u16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+ v8u16 vec8, vec9, vec10, vec11;
+ v8u16 const_112 = (v8u16)__msa_ldi_h(112);
+ v8u16 const_74 = (v8u16)__msa_ldi_h(74);
+ v8u16 const_38 = (v8u16)__msa_ldi_h(38);
+ v8u16 const_94 = (v8u16)__msa_ldi_h(94);
+ v8u16 const_18 = (v8u16)__msa_ldi_h(18);
+ v8u16 const_32896 = (v8u16)__msa_fill_h(32896);
+ v16i8 zero = {0};
+
+ for (x = width; x > 0; x -= 16) {
+ src0 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 0);
+ src1 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 16);
+ src2 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 32);
+ src3 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 48);
+ reg0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0);
+ reg1 = (v16u8)__msa_pckev_b((v16i8)src3, (v16i8)src2);
+ reg2 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0);
+ reg3 = (v16u8)__msa_pckod_b((v16i8)src3, (v16i8)src2);
+ src0 = (v16u8)__msa_pckev_b((v16i8)reg1, (v16i8)reg0);
+ src1 = (v16u8)__msa_pckev_b((v16i8)reg3, (v16i8)reg2);
+ src2 = (v16u8)__msa_pckod_b((v16i8)reg1, (v16i8)reg0);
+ vec0 = (v8u16)__msa_ilvr_b(zero, (v16i8)src0);
+ vec1 = (v8u16)__msa_ilvl_b(zero, (v16i8)src0);
+ vec2 = (v8u16)__msa_ilvr_b(zero, (v16i8)src1);
+ vec3 = (v8u16)__msa_ilvl_b(zero, (v16i8)src1);
+ vec4 = (v8u16)__msa_ilvr_b(zero, (v16i8)src2);
+ vec5 = (v8u16)__msa_ilvl_b(zero, (v16i8)src2);
+ vec10 = vec0 * const_18;
+ vec11 = vec1 * const_18;
+ vec8 = vec2 * const_94;
+ vec9 = vec3 * const_94;
+ vec6 = vec4 * const_112;
+ vec7 = vec5 * const_112;
+ vec0 *= const_112;
+ vec1 *= const_112;
+ vec2 *= const_74;
+ vec3 *= const_74;
+ vec4 *= const_38;
+ vec5 *= const_38;
+ vec8 += vec10;
+ vec9 += vec11;
+ vec6 += const_32896;
+ vec7 += const_32896;
+ vec0 += const_32896;
+ vec1 += const_32896;
+ vec2 += vec4;
+ vec3 += vec5;
+ vec0 -= vec2;
+ vec1 -= vec3;
+ vec6 -= vec8;
+ vec7 -= vec9;
+ vec0 = (v8u16)__msa_srai_h((v8i16)vec0, 8);
+ vec1 = (v8u16)__msa_srai_h((v8i16)vec1, 8);
+ vec6 = (v8u16)__msa_srai_h((v8i16)vec6, 8);
+ vec7 = (v8u16)__msa_srai_h((v8i16)vec7, 8);
+ dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0);
+ dst1 = (v16u8)__msa_pckev_b((v16i8)vec7, (v16i8)vec6);
+ ST_UB(dst0, dst_u);
+ ST_UB(dst1, dst_v);
+ src_argb += 64;
+ dst_u += 16;
+ dst_v += 16;
+ }
+}
+
+void ARGBMultiplyRow_MSA(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
+ int width) {
+ int x;
+ v16u8 src0, src1, dst0;
+ v8u16 vec0, vec1, vec2, vec3;
+ v4u32 reg0, reg1, reg2, reg3;
+ v8i16 zero = {0};
+
+ for (x = 0; x < width; x += 4) {
+ src0 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 0);
+ src1 = (v16u8)__msa_ld_b((const v16i8*)src_argb1, 0);
+ vec0 = (v8u16)__msa_ilvr_b((v16i8)src0, (v16i8)src0);
+ vec1 = (v8u16)__msa_ilvl_b((v16i8)src0, (v16i8)src0);
+ vec2 = (v8u16)__msa_ilvr_b((v16i8)zero, (v16i8)src1);
+ vec3 = (v8u16)__msa_ilvl_b((v16i8)zero, (v16i8)src1);
+ reg0 = (v4u32)__msa_ilvr_h(zero, (v8i16)vec0);
+ reg1 = (v4u32)__msa_ilvl_h(zero, (v8i16)vec0);
+ reg2 = (v4u32)__msa_ilvr_h(zero, (v8i16)vec1);
+ reg3 = (v4u32)__msa_ilvl_h(zero, (v8i16)vec1);
+ reg0 *= (v4u32)__msa_ilvr_h(zero, (v8i16)vec2);
+ reg1 *= (v4u32)__msa_ilvl_h(zero, (v8i16)vec2);
+ reg2 *= (v4u32)__msa_ilvr_h(zero, (v8i16)vec3);
+ reg3 *= (v4u32)__msa_ilvl_h(zero, (v8i16)vec3);
+ reg0 = (v4u32)__msa_srai_w((v4i32)reg0, 16);
+ reg1 = (v4u32)__msa_srai_w((v4i32)reg1, 16);
+ reg2 = (v4u32)__msa_srai_w((v4i32)reg2, 16);
+ reg3 = (v4u32)__msa_srai_w((v4i32)reg3, 16);
+ vec0 = (v8u16)__msa_pckev_h((v8i16)reg1, (v8i16)reg0);
+ vec1 = (v8u16)__msa_pckev_h((v8i16)reg3, (v8i16)reg2);
+ dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0);
+ ST_UB(dst0, dst_argb);
+ src_argb0 += 16;
+ src_argb1 += 16;
+ dst_argb += 16;
+ }
+}
+
+void ARGBAddRow_MSA(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
+ int width) {
+ int x;
+ v16u8 src0, src1, src2, src3, dst0, dst1;
+
+ for (x = 0; x < width; x += 8) {
+ src0 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 0);
+ src1 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 16);
+ src2 = (v16u8)__msa_ld_b((const v16i8*)src_argb1, 0);
+ src3 = (v16u8)__msa_ld_b((const v16i8*)src_argb1, 16);
+ dst0 = __msa_adds_u_b(src0, src2);
+ dst1 = __msa_adds_u_b(src1, src3);
+ ST_UB2(dst0, dst1, dst_argb, 16);
+ src_argb0 += 32;
+ src_argb1 += 32;
+ dst_argb += 32;
+ }
+}
+
+void ARGBSubtractRow_MSA(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
+ int width) {
+ int x;
+ v16u8 src0, src1, src2, src3, dst0, dst1;
+
+ for (x = 0; x < width; x += 8) {
+ src0 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 0);
+ src1 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 16);
+ src2 = (v16u8)__msa_ld_b((const v16i8*)src_argb1, 0);
+ src3 = (v16u8)__msa_ld_b((const v16i8*)src_argb1, 16);
+ dst0 = __msa_subs_u_b(src0, src2);
+ dst1 = __msa_subs_u_b(src1, src3);
+ ST_UB2(dst0, dst1, dst_argb, 16);
+ src_argb0 += 32;
+ src_argb1 += 32;
+ dst_argb += 32;
+ }
+}
+
+void ARGBAttenuateRow_MSA(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ int width) {
+ int x;
+ v16u8 src0, src1, dst0, dst1;
+ v8u16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8, vec9;
+ v4u32 reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7;
+ v8i16 zero = {0};
+ v16u8 mask = {0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255};
+
+ for (x = 0; x < width; x += 8) {
+ src0 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 0);
+ src1 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 16);
+ vec0 = (v8u16)__msa_ilvr_b((v16i8)src0, (v16i8)src0);
+ vec1 = (v8u16)__msa_ilvl_b((v16i8)src0, (v16i8)src0);
+ vec2 = (v8u16)__msa_ilvr_b((v16i8)src1, (v16i8)src1);
+ vec3 = (v8u16)__msa_ilvl_b((v16i8)src1, (v16i8)src1);
+ vec4 = (v8u16)__msa_fill_h(vec0[3]);
+ vec5 = (v8u16)__msa_fill_h(vec0[7]);
+ vec6 = (v8u16)__msa_fill_h(vec1[3]);
+ vec7 = (v8u16)__msa_fill_h(vec1[7]);
+ vec4 = (v8u16)__msa_pckev_d((v2i64)vec5, (v2i64)vec4);
+ vec5 = (v8u16)__msa_pckev_d((v2i64)vec7, (v2i64)vec6);
+ vec6 = (v8u16)__msa_fill_h(vec2[3]);
+ vec7 = (v8u16)__msa_fill_h(vec2[7]);
+ vec8 = (v8u16)__msa_fill_h(vec3[3]);
+ vec9 = (v8u16)__msa_fill_h(vec3[7]);
+ vec6 = (v8u16)__msa_pckev_d((v2i64)vec7, (v2i64)vec6);
+ vec7 = (v8u16)__msa_pckev_d((v2i64)vec9, (v2i64)vec8);
+ reg0 = (v4u32)__msa_ilvr_h(zero, (v8i16)vec4);
+ reg1 = (v4u32)__msa_ilvl_h(zero, (v8i16)vec4);
+ reg2 = (v4u32)__msa_ilvr_h(zero, (v8i16)vec5);
+ reg3 = (v4u32)__msa_ilvl_h(zero, (v8i16)vec5);
+ reg4 = (v4u32)__msa_ilvr_h(zero, (v8i16)vec6);
+ reg5 = (v4u32)__msa_ilvl_h(zero, (v8i16)vec6);
+ reg6 = (v4u32)__msa_ilvr_h(zero, (v8i16)vec7);
+ reg7 = (v4u32)__msa_ilvl_h(zero, (v8i16)vec7);
+ reg0 *= (v4u32)__msa_ilvr_h(zero, (v8i16)vec0);
+ reg1 *= (v4u32)__msa_ilvl_h(zero, (v8i16)vec0);
+ reg2 *= (v4u32)__msa_ilvr_h(zero, (v8i16)vec1);
+ reg3 *= (v4u32)__msa_ilvl_h(zero, (v8i16)vec1);
+ reg4 *= (v4u32)__msa_ilvr_h(zero, (v8i16)vec2);
+ reg5 *= (v4u32)__msa_ilvl_h(zero, (v8i16)vec2);
+ reg6 *= (v4u32)__msa_ilvr_h(zero, (v8i16)vec3);
+ reg7 *= (v4u32)__msa_ilvl_h(zero, (v8i16)vec3);
+ reg0 = (v4u32)__msa_srai_w((v4i32)reg0, 24);
+ reg1 = (v4u32)__msa_srai_w((v4i32)reg1, 24);
+ reg2 = (v4u32)__msa_srai_w((v4i32)reg2, 24);
+ reg3 = (v4u32)__msa_srai_w((v4i32)reg3, 24);
+ reg4 = (v4u32)__msa_srai_w((v4i32)reg4, 24);
+ reg5 = (v4u32)__msa_srai_w((v4i32)reg5, 24);
+ reg6 = (v4u32)__msa_srai_w((v4i32)reg6, 24);
+ reg7 = (v4u32)__msa_srai_w((v4i32)reg7, 24);
+ vec0 = (v8u16)__msa_pckev_h((v8i16)reg1, (v8i16)reg0);
+ vec1 = (v8u16)__msa_pckev_h((v8i16)reg3, (v8i16)reg2);
+ vec2 = (v8u16)__msa_pckev_h((v8i16)reg5, (v8i16)reg4);
+ vec3 = (v8u16)__msa_pckev_h((v8i16)reg7, (v8i16)reg6);
+ dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0);
+ dst1 = (v16u8)__msa_pckev_b((v16i8)vec3, (v16i8)vec2);
+ dst0 = __msa_bmnz_v(dst0, src0, mask);
+ dst1 = __msa_bmnz_v(dst1, src1, mask);
+ ST_UB2(dst0, dst1, dst_argb, 16);
+ src_argb += 32;
+ dst_argb += 32;
+ }
+}
+
+void ARGBToRGB565DitherRow_MSA(const uint8_t* src_argb,
+ uint8_t* dst_rgb,
+ uint32_t dither4,
+ int width) {
+ int x;
+ v16u8 src0, src1, dst0, vec0, vec1;
+ v8i16 vec_d0;
+ v8i16 reg0, reg1, reg2;
+ v16i8 zero = {0};
+ v8i16 max = __msa_ldi_h(0xFF);
+
+ vec_d0 = (v8i16)__msa_fill_w(dither4);
+ vec_d0 = (v8i16)__msa_ilvr_b(zero, (v16i8)vec_d0);
+
+ for (x = 0; x < width; x += 8) {
+ src0 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 0);
+ src1 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 16);
+ vec0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0);
+ vec1 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0);
+ reg0 = (v8i16)__msa_ilvev_b(zero, (v16i8)vec0);
+ reg1 = (v8i16)__msa_ilvev_b(zero, (v16i8)vec1);
+ reg2 = (v8i16)__msa_ilvod_b(zero, (v16i8)vec0);
+ reg0 += vec_d0;
+ reg1 += vec_d0;
+ reg2 += vec_d0;
+ reg0 = __msa_maxi_s_h((v8i16)reg0, 0);
+ reg1 = __msa_maxi_s_h((v8i16)reg1, 0);
+ reg2 = __msa_maxi_s_h((v8i16)reg2, 0);
+ reg0 = __msa_min_s_h((v8i16)max, (v8i16)reg0);
+ reg1 = __msa_min_s_h((v8i16)max, (v8i16)reg1);
+ reg2 = __msa_min_s_h((v8i16)max, (v8i16)reg2);
+ reg0 = __msa_srai_h(reg0, 3);
+ reg2 = __msa_srai_h(reg2, 3);
+ reg1 = __msa_srai_h(reg1, 2);
+ reg2 = __msa_slli_h(reg2, 11);
+ reg1 = __msa_slli_h(reg1, 5);
+ reg0 |= reg1;
+ dst0 = (v16u8)(reg0 | reg2);
+ ST_UB(dst0, dst_rgb);
+ src_argb += 32;
+ dst_rgb += 16;
+ }
+}
+
+void ARGBShuffleRow_MSA(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ const uint8_t* shuffler,
+ int width) {
+ int x;
+ v16u8 src0, src1, dst0, dst1;
+ v16i8 vec0;
+ v16i8 shuffler_vec = {0, 0, 0, 0, 4, 4, 4, 4, 8, 8, 8, 8, 12, 12, 12, 12};
+ int32_t val = LW((int32_t*)shuffler);
+
+ vec0 = (v16i8)__msa_fill_w(val);
+ shuffler_vec += vec0;
+
+ for (x = 0; x < width; x += 8) {
+ src0 = (v16u8)__msa_ld_b((const v16u8*)src_argb, 0);
+ src1 = (v16u8)__msa_ld_b((const v16u8*)src_argb, 16);
+ dst0 = (v16u8)__msa_vshf_b(shuffler_vec, (v16i8)src0, (v16i8)src0);
+ dst1 = (v16u8)__msa_vshf_b(shuffler_vec, (v16i8)src1, (v16i8)src1);
+ ST_UB2(dst0, dst1, dst_argb, 16);
+ src_argb += 32;
+ dst_argb += 32;
+ }
+}
+
+void ARGBShadeRow_MSA(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ int width,
+ uint32_t value) {
+ int x;
+ v16u8 src0, dst0;
+ v8u16 vec0, vec1;
+ v4u32 reg0, reg1, reg2, reg3, rgba_scale;
+ v8i16 zero = {0};
+
+ rgba_scale[0] = value;
+ rgba_scale = (v4u32)__msa_ilvr_b((v16i8)rgba_scale, (v16i8)rgba_scale);
+ rgba_scale = (v4u32)__msa_ilvr_h(zero, (v8i16)rgba_scale);
+
+ for (x = 0; x < width; x += 4) {
+ src0 = (v16u8)__msa_ld_b((const v16u8*)src_argb, 0);
+ vec0 = (v8u16)__msa_ilvr_b((v16i8)src0, (v16i8)src0);
+ vec1 = (v8u16)__msa_ilvl_b((v16i8)src0, (v16i8)src0);
+ reg0 = (v4u32)__msa_ilvr_h(zero, (v8i16)vec0);
+ reg1 = (v4u32)__msa_ilvl_h(zero, (v8i16)vec0);
+ reg2 = (v4u32)__msa_ilvr_h(zero, (v8i16)vec1);
+ reg3 = (v4u32)__msa_ilvl_h(zero, (v8i16)vec1);
+ reg0 *= rgba_scale;
+ reg1 *= rgba_scale;
+ reg2 *= rgba_scale;
+ reg3 *= rgba_scale;
+ reg0 = (v4u32)__msa_srai_w((v4i32)reg0, 24);
+ reg1 = (v4u32)__msa_srai_w((v4i32)reg1, 24);
+ reg2 = (v4u32)__msa_srai_w((v4i32)reg2, 24);
+ reg3 = (v4u32)__msa_srai_w((v4i32)reg3, 24);
+ vec0 = (v8u16)__msa_pckev_h((v8i16)reg1, (v8i16)reg0);
+ vec1 = (v8u16)__msa_pckev_h((v8i16)reg3, (v8i16)reg2);
+ dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0);
+ ST_UB(dst0, dst_argb);
+ src_argb += 16;
+ dst_argb += 16;
+ }
+}
+
+void ARGBGrayRow_MSA(const uint8_t* src_argb, uint8_t* dst_argb, int width) {
+ int x;
+ v16u8 src0, src1, vec0, vec1, dst0, dst1;
+ v8u16 reg0;
+ v16u8 const_0x26 = (v16u8)__msa_ldi_h(0x26);
+ v16u8 const_0x4B0F = (v16u8)__msa_fill_h(0x4B0F);
+
+ for (x = 0; x < width; x += 8) {
+ src0 = (v16u8)__msa_ld_b((const v16u8*)src_argb, 0);
+ src1 = (v16u8)__msa_ld_b((const v16u8*)src_argb, 16);
+ vec0 = (v16u8)__msa_pckev_h((v8i16)src1, (v8i16)src0);
+ vec1 = (v16u8)__msa_pckod_h((v8i16)src1, (v8i16)src0);
+ reg0 = __msa_dotp_u_h(vec0, const_0x4B0F);
+ reg0 = __msa_dpadd_u_h(reg0, vec1, const_0x26);
+ reg0 = (v8u16)__msa_srari_h((v8i16)reg0, 7);
+ vec0 = (v16u8)__msa_ilvev_b((v16i8)reg0, (v16i8)reg0);
+ vec1 = (v16u8)__msa_ilvod_b((v16i8)vec1, (v16i8)vec0);
+ dst0 = (v16u8)__msa_ilvr_b((v16i8)vec1, (v16i8)vec0);
+ dst1 = (v16u8)__msa_ilvl_b((v16i8)vec1, (v16i8)vec0);
+ ST_UB2(dst0, dst1, dst_argb, 16);
+ src_argb += 32;
+ dst_argb += 32;
+ }
+}
+
+void ARGBSepiaRow_MSA(uint8_t* dst_argb, int width) {
+ int x;
+ v16u8 src0, src1, dst0, dst1, vec0, vec1, vec2, vec3, vec4, vec5;
+ v8u16 reg0, reg1, reg2;
+ v16u8 const_0x4411 = (v16u8)__msa_fill_h(0x4411);
+ v16u8 const_0x23 = (v16u8)__msa_ldi_h(0x23);
+ v16u8 const_0x5816 = (v16u8)__msa_fill_h(0x5816);
+ v16u8 const_0x2D = (v16u8)__msa_ldi_h(0x2D);
+ v16u8 const_0x6218 = (v16u8)__msa_fill_h(0x6218);
+ v16u8 const_0x32 = (v16u8)__msa_ldi_h(0x32);
+ v8u16 const_0xFF = (v8u16)__msa_ldi_h(0xFF);
+
+ for (x = 0; x < width; x += 8) {
+ src0 = (v16u8)__msa_ld_b((v16u8*)dst_argb, 0);
+ src1 = (v16u8)__msa_ld_b((v16u8*)dst_argb, 16);
+ vec0 = (v16u8)__msa_pckev_h((v8i16)src1, (v8i16)src0);
+ vec1 = (v16u8)__msa_pckod_h((v8i16)src1, (v8i16)src0);
+ vec3 = (v16u8)__msa_pckod_b((v16i8)vec1, (v16i8)vec1);
+ reg0 = (v8u16)__msa_dotp_u_h(vec0, const_0x4411);
+ reg1 = (v8u16)__msa_dotp_u_h(vec0, const_0x5816);
+ reg2 = (v8u16)__msa_dotp_u_h(vec0, const_0x6218);
+ reg0 = (v8u16)__msa_dpadd_u_h(reg0, vec1, const_0x23);
+ reg1 = (v8u16)__msa_dpadd_u_h(reg1, vec1, const_0x2D);
+ reg2 = (v8u16)__msa_dpadd_u_h(reg2, vec1, const_0x32);
+ reg0 = (v8u16)__msa_srai_h((v8i16)reg0, 7);
+ reg1 = (v8u16)__msa_srai_h((v8i16)reg1, 7);
+ reg2 = (v8u16)__msa_srai_h((v8i16)reg2, 7);
+ reg1 = (v8u16)__msa_min_u_h((v8u16)reg1, const_0xFF);
+ reg2 = (v8u16)__msa_min_u_h((v8u16)reg2, const_0xFF);
+ vec0 = (v16u8)__msa_pckev_b((v16i8)reg0, (v16i8)reg0);
+ vec1 = (v16u8)__msa_pckev_b((v16i8)reg1, (v16i8)reg1);
+ vec2 = (v16u8)__msa_pckev_b((v16i8)reg2, (v16i8)reg2);
+ vec4 = (v16u8)__msa_ilvr_b((v16i8)vec2, (v16i8)vec0);
+ vec5 = (v16u8)__msa_ilvr_b((v16i8)vec3, (v16i8)vec1);
+ dst0 = (v16u8)__msa_ilvr_b((v16i8)vec5, (v16i8)vec4);
+ dst1 = (v16u8)__msa_ilvl_b((v16i8)vec5, (v16i8)vec4);
+ ST_UB2(dst0, dst1, dst_argb, 16);
+ dst_argb += 32;
+ }
+}
+
+void ARGB4444ToARGBRow_MSA(const uint8_t* src_argb4444,
+ uint8_t* dst_argb,
+ int width) {
+ int x;
+ v16u8 src0, src1;
+ v8u16 vec0, vec1, vec2, vec3;
+ v16u8 dst0, dst1, dst2, dst3;
+
+ for (x = 0; x < width; x += 16) {
+ src0 = (v16u8)__msa_ld_b((const v16u8*)src_argb4444, 0);
+ src1 = (v16u8)__msa_ld_b((const v16u8*)src_argb4444, 16);
+ vec0 = (v8u16)__msa_andi_b(src0, 0x0F);
+ vec1 = (v8u16)__msa_andi_b(src1, 0x0F);
+ vec2 = (v8u16)__msa_andi_b(src0, 0xF0);
+ vec3 = (v8u16)__msa_andi_b(src1, 0xF0);
+ vec0 |= (v8u16)__msa_slli_b((v16i8)vec0, 4);
+ vec1 |= (v8u16)__msa_slli_b((v16i8)vec1, 4);
+ vec2 |= (v8u16)__msa_srli_b((v16i8)vec2, 4);
+ vec3 |= (v8u16)__msa_srli_b((v16i8)vec3, 4);
+ dst0 = (v16u8)__msa_ilvr_b((v16i8)vec2, (v16i8)vec0);
+ dst1 = (v16u8)__msa_ilvl_b((v16i8)vec2, (v16i8)vec0);
+ dst2 = (v16u8)__msa_ilvr_b((v16i8)vec3, (v16i8)vec1);
+ dst3 = (v16u8)__msa_ilvl_b((v16i8)vec3, (v16i8)vec1);
+ ST_UB4(dst0, dst1, dst2, dst3, dst_argb, 16);
+ src_argb4444 += 32;
+ dst_argb += 64;
+ }
+}
+
+void ARGB1555ToARGBRow_MSA(const uint8_t* src_argb1555,
+ uint8_t* dst_argb,
+ int width) {
+ int x;
+ v8u16 src0, src1;
+ v8u16 vec0, vec1, vec2, vec3, vec4, vec5;
+ v16u8 reg0, reg1, reg2, reg3, reg4, reg5, reg6;
+ v16u8 dst0, dst1, dst2, dst3;
+ v8u16 const_0x1F = (v8u16)__msa_ldi_h(0x1F);
+
+ for (x = 0; x < width; x += 16) {
+ src0 = (v8u16)__msa_ld_h((const v8u16*)src_argb1555, 0);
+ src1 = (v8u16)__msa_ld_h((const v8u16*)src_argb1555, 16);
+ vec0 = src0 & const_0x1F;
+ vec1 = src1 & const_0x1F;
+ src0 = (v8u16)__msa_srli_h((v8i16)src0, 5);
+ src1 = (v8u16)__msa_srli_h((v8i16)src1, 5);
+ vec2 = src0 & const_0x1F;
+ vec3 = src1 & const_0x1F;
+ src0 = (v8u16)__msa_srli_h((v8i16)src0, 5);
+ src1 = (v8u16)__msa_srli_h((v8i16)src1, 5);
+ vec4 = src0 & const_0x1F;
+ vec5 = src1 & const_0x1F;
+ src0 = (v8u16)__msa_srli_h((v8i16)src0, 5);
+ src1 = (v8u16)__msa_srli_h((v8i16)src1, 5);
+ reg0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0);
+ reg1 = (v16u8)__msa_pckev_b((v16i8)vec3, (v16i8)vec2);
+ reg2 = (v16u8)__msa_pckev_b((v16i8)vec5, (v16i8)vec4);
+ reg3 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0);
+ reg4 = (v16u8)__msa_slli_b((v16i8)reg0, 3);
+ reg5 = (v16u8)__msa_slli_b((v16i8)reg1, 3);
+ reg6 = (v16u8)__msa_slli_b((v16i8)reg2, 3);
+ reg4 |= (v16u8)__msa_srai_b((v16i8)reg0, 2);
+ reg5 |= (v16u8)__msa_srai_b((v16i8)reg1, 2);
+ reg6 |= (v16u8)__msa_srai_b((v16i8)reg2, 2);
+ reg3 = -reg3;
+ reg0 = (v16u8)__msa_ilvr_b((v16i8)reg6, (v16i8)reg4);
+ reg1 = (v16u8)__msa_ilvl_b((v16i8)reg6, (v16i8)reg4);
+ reg2 = (v16u8)__msa_ilvr_b((v16i8)reg3, (v16i8)reg5);
+ reg3 = (v16u8)__msa_ilvl_b((v16i8)reg3, (v16i8)reg5);
+ dst0 = (v16u8)__msa_ilvr_b((v16i8)reg2, (v16i8)reg0);
+ dst1 = (v16u8)__msa_ilvl_b((v16i8)reg2, (v16i8)reg0);
+ dst2 = (v16u8)__msa_ilvr_b((v16i8)reg3, (v16i8)reg1);
+ dst3 = (v16u8)__msa_ilvl_b((v16i8)reg3, (v16i8)reg1);
+ ST_UB4(dst0, dst1, dst2, dst3, dst_argb, 16);
+ src_argb1555 += 32;
+ dst_argb += 64;
+ }
+}
+
+void RGB565ToARGBRow_MSA(const uint8_t* src_rgb565,
+ uint8_t* dst_argb,
+ int width) {
+ int x;
+ v8u16 src0, src1, vec0, vec1, vec2, vec3, vec4, vec5;
+ v8u16 reg0, reg1, reg2, reg3, reg4, reg5;
+ v16u8 res0, res1, res2, res3, dst0, dst1, dst2, dst3;
+ v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL);
+ v8u16 const_0x1F = (v8u16)__msa_ldi_h(0x1F);
+ v8u16 const_0x7E0 = (v8u16)__msa_fill_h(0x7E0);
+ v8u16 const_0xF800 = (v8u16)__msa_fill_h(0xF800);
+
+ for (x = 0; x < width; x += 16) {
+ src0 = (v8u16)__msa_ld_h((const v8u16*)src_rgb565, 0);
+ src1 = (v8u16)__msa_ld_h((const v8u16*)src_rgb565, 16);
+ vec0 = src0 & const_0x1F;
+ vec1 = src0 & const_0x7E0;
+ vec2 = src0 & const_0xF800;
+ vec3 = src1 & const_0x1F;
+ vec4 = src1 & const_0x7E0;
+ vec5 = src1 & const_0xF800;
+ reg0 = (v8u16)__msa_slli_h((v8i16)vec0, 3);
+ reg1 = (v8u16)__msa_srli_h((v8i16)vec1, 3);
+ reg2 = (v8u16)__msa_srli_h((v8i16)vec2, 8);
+ reg3 = (v8u16)__msa_slli_h((v8i16)vec3, 3);
+ reg4 = (v8u16)__msa_srli_h((v8i16)vec4, 3);
+ reg5 = (v8u16)__msa_srli_h((v8i16)vec5, 8);
+ reg0 |= (v8u16)__msa_srli_h((v8i16)vec0, 2);
+ reg1 |= (v8u16)__msa_srli_h((v8i16)vec1, 9);
+ reg2 |= (v8u16)__msa_srli_h((v8i16)vec2, 13);
+ reg3 |= (v8u16)__msa_srli_h((v8i16)vec3, 2);
+ reg4 |= (v8u16)__msa_srli_h((v8i16)vec4, 9);
+ reg5 |= (v8u16)__msa_srli_h((v8i16)vec5, 13);
+ res0 = (v16u8)__msa_ilvev_b((v16i8)reg2, (v16i8)reg0);
+ res1 = (v16u8)__msa_ilvev_b((v16i8)alpha, (v16i8)reg1);
+ res2 = (v16u8)__msa_ilvev_b((v16i8)reg5, (v16i8)reg3);
+ res3 = (v16u8)__msa_ilvev_b((v16i8)alpha, (v16i8)reg4);
+ dst0 = (v16u8)__msa_ilvr_b((v16i8)res1, (v16i8)res0);
+ dst1 = (v16u8)__msa_ilvl_b((v16i8)res1, (v16i8)res0);
+ dst2 = (v16u8)__msa_ilvr_b((v16i8)res3, (v16i8)res2);
+ dst3 = (v16u8)__msa_ilvl_b((v16i8)res3, (v16i8)res2);
+ ST_UB4(dst0, dst1, dst2, dst3, dst_argb, 16);
+ src_rgb565 += 32;
+ dst_argb += 64;
+ }
+}
+
+void RGB24ToARGBRow_MSA(const uint8_t* src_rgb24,
+ uint8_t* dst_argb,
+ int width) {
+ int x;
+ v16u8 src0, src1, src2;
+ v16u8 vec0, vec1, vec2;
+ v16u8 dst0, dst1, dst2, dst3;
+ v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL);
+ v16i8 shuffler = {0, 1, 2, 16, 3, 4, 5, 17, 6, 7, 8, 18, 9, 10, 11, 19};
+
+ for (x = 0; x < width; x += 16) {
+ src0 = (v16u8)__msa_ld_b((const v16i8*)src_rgb24, 0);
+ src1 = (v16u8)__msa_ld_b((const v16i8*)src_rgb24, 16);
+ src2 = (v16u8)__msa_ld_b((const v16i8*)src_rgb24, 32);
+ vec0 = (v16u8)__msa_sldi_b((v16i8)src1, (v16i8)src0, 12);
+ vec1 = (v16u8)__msa_sldi_b((v16i8)src2, (v16i8)src1, 8);
+ vec2 = (v16u8)__msa_sldi_b((v16i8)src2, (v16i8)src2, 4);
+ dst0 = (v16u8)__msa_vshf_b(shuffler, (v16i8)alpha, (v16i8)src0);
+ dst1 = (v16u8)__msa_vshf_b(shuffler, (v16i8)alpha, (v16i8)vec0);
+ dst2 = (v16u8)__msa_vshf_b(shuffler, (v16i8)alpha, (v16i8)vec1);
+ dst3 = (v16u8)__msa_vshf_b(shuffler, (v16i8)alpha, (v16i8)vec2);
+ ST_UB4(dst0, dst1, dst2, dst3, dst_argb, 16);
+ src_rgb24 += 48;
+ dst_argb += 64;
+ }
+}
+
+void RAWToARGBRow_MSA(const uint8_t* src_raw, uint8_t* dst_argb, int width) {
+ int x;
+ v16u8 src0, src1, src2;
+ v16u8 vec0, vec1, vec2;
+ v16u8 dst0, dst1, dst2, dst3;
+ v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL);
+ v16i8 mask = {2, 1, 0, 16, 5, 4, 3, 17, 8, 7, 6, 18, 11, 10, 9, 19};
+
+ for (x = 0; x < width; x += 16) {
+ src0 = (v16u8)__msa_ld_b((const v16i8*)src_raw, 0);
+ src1 = (v16u8)__msa_ld_b((const v16i8*)src_raw, 16);
+ src2 = (v16u8)__msa_ld_b((const v16i8*)src_raw, 32);
+ vec0 = (v16u8)__msa_sldi_b((v16i8)src1, (v16i8)src0, 12);
+ vec1 = (v16u8)__msa_sldi_b((v16i8)src2, (v16i8)src1, 8);
+ vec2 = (v16u8)__msa_sldi_b((v16i8)src2, (v16i8)src2, 4);
+ dst0 = (v16u8)__msa_vshf_b(mask, (v16i8)alpha, (v16i8)src0);
+ dst1 = (v16u8)__msa_vshf_b(mask, (v16i8)alpha, (v16i8)vec0);
+ dst2 = (v16u8)__msa_vshf_b(mask, (v16i8)alpha, (v16i8)vec1);
+ dst3 = (v16u8)__msa_vshf_b(mask, (v16i8)alpha, (v16i8)vec2);
+ ST_UB4(dst0, dst1, dst2, dst3, dst_argb, 16);
+ src_raw += 48;
+ dst_argb += 64;
+ }
+}
+
+void ARGB1555ToYRow_MSA(const uint8_t* src_argb1555,
+ uint8_t* dst_y,
+ int width) {
+ int x;
+ v8u16 src0, src1, vec0, vec1, vec2, vec3, vec4, vec5;
+ v8u16 reg0, reg1, reg2, reg3, reg4, reg5;
+ v16u8 dst0;
+ v8u16 const_0x19 = (v8u16)__msa_ldi_h(0x19);
+ v8u16 const_0x81 = (v8u16)__msa_ldi_h(0x81);
+ v8u16 const_0x42 = (v8u16)__msa_ldi_h(0x42);
+ v8u16 const_0x1F = (v8u16)__msa_ldi_h(0x1F);
+ v8u16 const_0x1080 = (v8u16)__msa_fill_h(0x1080);
+
+ for (x = 0; x < width; x += 16) {
+ src0 = (v8u16)__msa_ld_b((const v8i16*)src_argb1555, 0);
+ src1 = (v8u16)__msa_ld_b((const v8i16*)src_argb1555, 16);
+ vec0 = src0 & const_0x1F;
+ vec1 = src1 & const_0x1F;
+ src0 = (v8u16)__msa_srai_h((v8i16)src0, 5);
+ src1 = (v8u16)__msa_srai_h((v8i16)src1, 5);
+ vec2 = src0 & const_0x1F;
+ vec3 = src1 & const_0x1F;
+ src0 = (v8u16)__msa_srai_h((v8i16)src0, 5);
+ src1 = (v8u16)__msa_srai_h((v8i16)src1, 5);
+ vec4 = src0 & const_0x1F;
+ vec5 = src1 & const_0x1F;
+ reg0 = (v8u16)__msa_slli_h((v8i16)vec0, 3);
+ reg1 = (v8u16)__msa_slli_h((v8i16)vec1, 3);
+ reg0 |= (v8u16)__msa_srai_h((v8i16)vec0, 2);
+ reg1 |= (v8u16)__msa_srai_h((v8i16)vec1, 2);
+ reg2 = (v8u16)__msa_slli_h((v8i16)vec2, 3);
+ reg3 = (v8u16)__msa_slli_h((v8i16)vec3, 3);
+ reg2 |= (v8u16)__msa_srai_h((v8i16)vec2, 2);
+ reg3 |= (v8u16)__msa_srai_h((v8i16)vec3, 2);
+ reg4 = (v8u16)__msa_slli_h((v8i16)vec4, 3);
+ reg5 = (v8u16)__msa_slli_h((v8i16)vec5, 3);
+ reg4 |= (v8u16)__msa_srai_h((v8i16)vec4, 2);
+ reg5 |= (v8u16)__msa_srai_h((v8i16)vec5, 2);
+ reg0 *= const_0x19;
+ reg1 *= const_0x19;
+ reg2 *= const_0x81;
+ reg3 *= const_0x81;
+ reg4 *= const_0x42;
+ reg5 *= const_0x42;
+ reg0 += reg2;
+ reg1 += reg3;
+ reg0 += reg4;
+ reg1 += reg5;
+ reg0 += const_0x1080;
+ reg1 += const_0x1080;
+ reg0 = (v8u16)__msa_srai_h((v8i16)reg0, 8);
+ reg1 = (v8u16)__msa_srai_h((v8i16)reg1, 8);
+ dst0 = (v16u8)__msa_pckev_b((v16i8)reg1, (v16i8)reg0);
+ ST_UB(dst0, dst_y);
+ src_argb1555 += 32;
+ dst_y += 16;
+ }
+}
+
+void RGB565ToYRow_MSA(const uint8_t* src_rgb565, uint8_t* dst_y, int width) {
+ int x;
+ v8u16 src0, src1, vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+ v8u16 reg0, reg1, reg2, reg3, reg4, reg5;
+ v4u32 res0, res1, res2, res3;
+ v16u8 dst0;
+ v4u32 const_0x810019 = (v4u32)__msa_fill_w(0x810019);
+ v4u32 const_0x010042 = (v4u32)__msa_fill_w(0x010042);
+ v8i16 const_0x1080 = __msa_fill_h(0x1080);
+ v8u16 const_0x1F = (v8u16)__msa_ldi_h(0x1F);
+ v8u16 const_0x7E0 = (v8u16)__msa_fill_h(0x7E0);
+ v8u16 const_0xF800 = (v8u16)__msa_fill_h(0xF800);
+
+ for (x = 0; x < width; x += 16) {
+ src0 = (v8u16)__msa_ld_b((const v8i16*)src_rgb565, 0);
+ src1 = (v8u16)__msa_ld_b((const v8i16*)src_rgb565, 16);
+ vec0 = src0 & const_0x1F;
+ vec1 = src0 & const_0x7E0;
+ vec2 = src0 & const_0xF800;
+ vec3 = src1 & const_0x1F;
+ vec4 = src1 & const_0x7E0;
+ vec5 = src1 & const_0xF800;
+ reg0 = (v8u16)__msa_slli_h((v8i16)vec0, 3);
+ reg1 = (v8u16)__msa_srli_h((v8i16)vec1, 3);
+ reg2 = (v8u16)__msa_srli_h((v8i16)vec2, 8);
+ reg3 = (v8u16)__msa_slli_h((v8i16)vec3, 3);
+ reg4 = (v8u16)__msa_srli_h((v8i16)vec4, 3);
+ reg5 = (v8u16)__msa_srli_h((v8i16)vec5, 8);
+ reg0 |= (v8u16)__msa_srli_h((v8i16)vec0, 2);
+ reg1 |= (v8u16)__msa_srli_h((v8i16)vec1, 9);
+ reg2 |= (v8u16)__msa_srli_h((v8i16)vec2, 13);
+ reg3 |= (v8u16)__msa_srli_h((v8i16)vec3, 2);
+ reg4 |= (v8u16)__msa_srli_h((v8i16)vec4, 9);
+ reg5 |= (v8u16)__msa_srli_h((v8i16)vec5, 13);
+ vec0 = (v8u16)__msa_ilvr_h((v8i16)reg1, (v8i16)reg0);
+ vec1 = (v8u16)__msa_ilvl_h((v8i16)reg1, (v8i16)reg0);
+ vec2 = (v8u16)__msa_ilvr_h((v8i16)reg4, (v8i16)reg3);
+ vec3 = (v8u16)__msa_ilvl_h((v8i16)reg4, (v8i16)reg3);
+ vec4 = (v8u16)__msa_ilvr_h(const_0x1080, (v8i16)reg2);
+ vec5 = (v8u16)__msa_ilvl_h(const_0x1080, (v8i16)reg2);
+ vec6 = (v8u16)__msa_ilvr_h(const_0x1080, (v8i16)reg5);
+ vec7 = (v8u16)__msa_ilvl_h(const_0x1080, (v8i16)reg5);
+ res0 = __msa_dotp_u_w(vec0, (v8u16)const_0x810019);
+ res1 = __msa_dotp_u_w(vec1, (v8u16)const_0x810019);
+ res2 = __msa_dotp_u_w(vec2, (v8u16)const_0x810019);
+ res3 = __msa_dotp_u_w(vec3, (v8u16)const_0x810019);
+ res0 = __msa_dpadd_u_w(res0, vec4, (v8u16)const_0x010042);
+ res1 = __msa_dpadd_u_w(res1, vec5, (v8u16)const_0x010042);
+ res2 = __msa_dpadd_u_w(res2, vec6, (v8u16)const_0x010042);
+ res3 = __msa_dpadd_u_w(res3, vec7, (v8u16)const_0x010042);
+ res0 = (v4u32)__msa_srai_w((v4i32)res0, 8);
+ res1 = (v4u32)__msa_srai_w((v4i32)res1, 8);
+ res2 = (v4u32)__msa_srai_w((v4i32)res2, 8);
+ res3 = (v4u32)__msa_srai_w((v4i32)res3, 8);
+ vec0 = (v8u16)__msa_pckev_h((v8i16)res1, (v8i16)res0);
+ vec1 = (v8u16)__msa_pckev_h((v8i16)res3, (v8i16)res2);
+ dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0);
+ ST_UB(dst0, dst_y);
+ src_rgb565 += 32;
+ dst_y += 16;
+ }
+}
+
+void RGB24ToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
+ int x;
+ v16u8 src0, src1, src2, reg0, reg1, reg2, reg3, dst0;
+ v8u16 vec0, vec1, vec2, vec3;
+ v8u16 const_0x8119 = (v8u16)__msa_fill_h(0x8119);
+ v8u16 const_0x42 = (v8u16)__msa_fill_h(0x42);
+ v8u16 const_0x1080 = (v8u16)__msa_fill_h(0x1080);
+ v16i8 mask0 = {0, 1, 2, 3, 3, 4, 5, 6, 6, 7, 8, 9, 9, 10, 11, 12};
+ v16i8 mask1 = {12, 13, 14, 15, 15, 16, 17, 18,
+ 18, 19, 20, 21, 21, 22, 23, 24};
+ v16i8 mask2 = {8, 9, 10, 11, 11, 12, 13, 14, 14, 15, 16, 17, 17, 18, 19, 20};
+ v16i8 mask3 = {4, 5, 6, 7, 7, 8, 9, 10, 10, 11, 12, 13, 13, 14, 15, 16};
+ v16i8 zero = {0};
+
+ for (x = 0; x < width; x += 16) {
+ src0 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 0);
+ src1 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 16);
+ src2 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 32);
+ reg0 = (v16u8)__msa_vshf_b(mask0, zero, (v16i8)src0);
+ reg1 = (v16u8)__msa_vshf_b(mask1, (v16i8)src1, (v16i8)src0);
+ reg2 = (v16u8)__msa_vshf_b(mask2, (v16i8)src2, (v16i8)src1);
+ reg3 = (v16u8)__msa_vshf_b(mask3, zero, (v16i8)src2);
+ vec0 = (v8u16)__msa_pckev_h((v8i16)reg1, (v8i16)reg0);
+ vec1 = (v8u16)__msa_pckev_h((v8i16)reg3, (v8i16)reg2);
+ vec2 = (v8u16)__msa_pckod_h((v8i16)reg1, (v8i16)reg0);
+ vec3 = (v8u16)__msa_pckod_h((v8i16)reg3, (v8i16)reg2);
+ vec0 = __msa_dotp_u_h((v16u8)vec0, (v16u8)const_0x8119);
+ vec1 = __msa_dotp_u_h((v16u8)vec1, (v16u8)const_0x8119);
+ vec0 = __msa_dpadd_u_h(vec0, (v16u8)vec2, (v16u8)const_0x42);
+ vec1 = __msa_dpadd_u_h(vec1, (v16u8)vec3, (v16u8)const_0x42);
+ vec0 += const_0x1080;
+ vec1 += const_0x1080;
+ vec0 = (v8u16)__msa_srai_h((v8i16)vec0, 8);
+ vec1 = (v8u16)__msa_srai_h((v8i16)vec1, 8);
+ dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0);
+ ST_UB(dst0, dst_y);
+ src_argb0 += 48;
+ dst_y += 16;
+ }
+}
+
+void RAWToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
+ int x;
+ v16u8 src0, src1, src2, reg0, reg1, reg2, reg3, dst0;
+ v8u16 vec0, vec1, vec2, vec3;
+ v8u16 const_0x8142 = (v8u16)__msa_fill_h(0x8142);
+ v8u16 const_0x19 = (v8u16)__msa_fill_h(0x19);
+ v8u16 const_0x1080 = (v8u16)__msa_fill_h(0x1080);
+ v16i8 mask0 = {0, 1, 2, 3, 3, 4, 5, 6, 6, 7, 8, 9, 9, 10, 11, 12};
+ v16i8 mask1 = {12, 13, 14, 15, 15, 16, 17, 18,
+ 18, 19, 20, 21, 21, 22, 23, 24};
+ v16i8 mask2 = {8, 9, 10, 11, 11, 12, 13, 14, 14, 15, 16, 17, 17, 18, 19, 20};
+ v16i8 mask3 = {4, 5, 6, 7, 7, 8, 9, 10, 10, 11, 12, 13, 13, 14, 15, 16};
+ v16i8 zero = {0};
+
+ for (x = 0; x < width; x += 16) {
+ src0 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 0);
+ src1 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 16);
+ src2 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 32);
+ reg0 = (v16u8)__msa_vshf_b(mask0, zero, (v16i8)src0);
+ reg1 = (v16u8)__msa_vshf_b(mask1, (v16i8)src1, (v16i8)src0);
+ reg2 = (v16u8)__msa_vshf_b(mask2, (v16i8)src2, (v16i8)src1);
+ reg3 = (v16u8)__msa_vshf_b(mask3, zero, (v16i8)src2);
+ vec0 = (v8u16)__msa_pckev_h((v8i16)reg1, (v8i16)reg0);
+ vec1 = (v8u16)__msa_pckev_h((v8i16)reg3, (v8i16)reg2);
+ vec2 = (v8u16)__msa_pckod_h((v8i16)reg1, (v8i16)reg0);
+ vec3 = (v8u16)__msa_pckod_h((v8i16)reg3, (v8i16)reg2);
+ vec0 = __msa_dotp_u_h((v16u8)vec0, (v16u8)const_0x8142);
+ vec1 = __msa_dotp_u_h((v16u8)vec1, (v16u8)const_0x8142);
+ vec0 = __msa_dpadd_u_h(vec0, (v16u8)vec2, (v16u8)const_0x19);
+ vec1 = __msa_dpadd_u_h(vec1, (v16u8)vec3, (v16u8)const_0x19);
+ vec0 += const_0x1080;
+ vec1 += const_0x1080;
+ vec0 = (v8u16)__msa_srai_h((v8i16)vec0, 8);
+ vec1 = (v8u16)__msa_srai_h((v8i16)vec1, 8);
+ dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0);
+ ST_UB(dst0, dst_y);
+ src_argb0 += 48;
+ dst_y += 16;
+ }
+}
+
+void ARGB1555ToUVRow_MSA(const uint8_t* src_argb1555,
+ int src_stride_argb1555,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ int x;
+ const uint16_t* s = (const uint16_t*)src_argb1555;
+ const uint16_t* t = (const uint16_t*)(src_argb1555 + src_stride_argb1555);
+ int64_t res0, res1;
+ v8u16 src0, src1, src2, src3, reg0, reg1, reg2, reg3;
+ v8u16 vec0, vec1, vec2, vec3, vec4, vec5, vec6;
+ v16u8 dst0;
+ v8u16 const_0x70 = (v8u16)__msa_ldi_h(0x70);
+ v8u16 const_0x4A = (v8u16)__msa_ldi_h(0x4A);
+ v8u16 const_0x26 = (v8u16)__msa_ldi_h(0x26);
+ v8u16 const_0x5E = (v8u16)__msa_ldi_h(0x5E);
+ v8u16 const_0x12 = (v8u16)__msa_ldi_h(0x12);
+ v8u16 const_0x8080 = (v8u16)__msa_fill_h(0x8080);
+ v8u16 const_0x1F = (v8u16)__msa_ldi_h(0x1F);
+
+ for (x = 0; x < width; x += 16) {
+ src0 = (v8u16)__msa_ld_b((v8i16*)s, 0);
+ src1 = (v8u16)__msa_ld_b((v8i16*)s, 16);
+ src2 = (v8u16)__msa_ld_b((v8i16*)t, 0);
+ src3 = (v8u16)__msa_ld_b((v8i16*)t, 16);
+ vec0 = src0 & const_0x1F;
+ vec1 = src1 & const_0x1F;
+ vec0 += src2 & const_0x1F;
+ vec1 += src3 & const_0x1F;
+ vec0 = (v8u16)__msa_pckev_b((v16i8)vec1, (v16i8)vec0);
+ src0 = (v8u16)__msa_srai_h((v8i16)src0, 5);
+ src1 = (v8u16)__msa_srai_h((v8i16)src1, 5);
+ src2 = (v8u16)__msa_srai_h((v8i16)src2, 5);
+ src3 = (v8u16)__msa_srai_h((v8i16)src3, 5);
+ vec2 = src0 & const_0x1F;
+ vec3 = src1 & const_0x1F;
+ vec2 += src2 & const_0x1F;
+ vec3 += src3 & const_0x1F;
+ vec2 = (v8u16)__msa_pckev_b((v16i8)vec3, (v16i8)vec2);
+ src0 = (v8u16)__msa_srai_h((v8i16)src0, 5);
+ src1 = (v8u16)__msa_srai_h((v8i16)src1, 5);
+ src2 = (v8u16)__msa_srai_h((v8i16)src2, 5);
+ src3 = (v8u16)__msa_srai_h((v8i16)src3, 5);
+ vec4 = src0 & const_0x1F;
+ vec5 = src1 & const_0x1F;
+ vec4 += src2 & const_0x1F;
+ vec5 += src3 & const_0x1F;
+ vec4 = (v8u16)__msa_pckev_b((v16i8)vec5, (v16i8)vec4);
+ vec0 = __msa_hadd_u_h((v16u8)vec0, (v16u8)vec0);
+ vec2 = __msa_hadd_u_h((v16u8)vec2, (v16u8)vec2);
+ vec4 = __msa_hadd_u_h((v16u8)vec4, (v16u8)vec4);
+ vec6 = (v8u16)__msa_slli_h((v8i16)vec0, 1);
+ vec6 |= (v8u16)__msa_srai_h((v8i16)vec0, 6);
+ vec0 = (v8u16)__msa_slli_h((v8i16)vec2, 1);
+ vec0 |= (v8u16)__msa_srai_h((v8i16)vec2, 6);
+ vec2 = (v8u16)__msa_slli_h((v8i16)vec4, 1);
+ vec2 |= (v8u16)__msa_srai_h((v8i16)vec4, 6);
+ reg0 = vec6 * const_0x70;
+ reg1 = vec0 * const_0x4A;
+ reg2 = vec2 * const_0x70;
+ reg3 = vec0 * const_0x5E;
+ reg0 += const_0x8080;
+ reg1 += vec2 * const_0x26;
+ reg2 += const_0x8080;
+ reg3 += vec6 * const_0x12;
+ reg0 -= reg1;
+ reg2 -= reg3;
+ reg0 = (v8u16)__msa_srai_h((v8i16)reg0, 8);
+ reg2 = (v8u16)__msa_srai_h((v8i16)reg2, 8);
+ dst0 = (v16u8)__msa_pckev_b((v16i8)reg2, (v16i8)reg0);
+ res0 = __msa_copy_u_d((v2i64)dst0, 0);
+ res1 = __msa_copy_u_d((v2i64)dst0, 1);
+ SD(res0, dst_u);
+ SD(res1, dst_v);
+ s += 16;
+ t += 16;
+ dst_u += 8;
+ dst_v += 8;
+ }
+}
+
+void RGB565ToUVRow_MSA(const uint8_t* src_rgb565,
+ int src_stride_rgb565,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ int x;
+ const uint16_t* s = (const uint16_t*)src_rgb565;
+ const uint16_t* t = (const uint16_t*)(src_rgb565 + src_stride_rgb565);
+ int64_t res0, res1;
+ v8u16 src0, src1, src2, src3, reg0, reg1, reg2, reg3;
+ v8u16 vec0, vec1, vec2, vec3, vec4, vec5;
+ v16u8 dst0;
+ v8u16 const_0x70 = (v8u16)__msa_ldi_h(0x70);
+ v8u16 const_0x4A = (v8u16)__msa_ldi_h(0x4A);
+ v8u16 const_0x26 = (v8u16)__msa_ldi_h(0x26);
+ v8u16 const_0x5E = (v8u16)__msa_ldi_h(0x5E);
+ v8u16 const_0x12 = (v8u16)__msa_ldi_h(0x12);
+ v8u16 const_32896 = (v8u16)__msa_fill_h(0x8080);
+ v8u16 const_0x1F = (v8u16)__msa_ldi_h(0x1F);
+ v8u16 const_0x3F = (v8u16)__msa_fill_h(0x3F);
+
+ for (x = 0; x < width; x += 16) {
+ src0 = (v8u16)__msa_ld_b((v8i16*)s, 0);
+ src1 = (v8u16)__msa_ld_b((v8i16*)s, 16);
+ src2 = (v8u16)__msa_ld_b((v8i16*)t, 0);
+ src3 = (v8u16)__msa_ld_b((v8i16*)t, 16);
+ vec0 = src0 & const_0x1F;
+ vec1 = src1 & const_0x1F;
+ vec0 += src2 & const_0x1F;
+ vec1 += src3 & const_0x1F;
+ vec0 = (v8u16)__msa_pckev_b((v16i8)vec1, (v16i8)vec0);
+ src0 = (v8u16)__msa_srai_h((v8i16)src0, 5);
+ src1 = (v8u16)__msa_srai_h((v8i16)src1, 5);
+ src2 = (v8u16)__msa_srai_h((v8i16)src2, 5);
+ src3 = (v8u16)__msa_srai_h((v8i16)src3, 5);
+ vec2 = src0 & const_0x3F;
+ vec3 = src1 & const_0x3F;
+ vec2 += src2 & const_0x3F;
+ vec3 += src3 & const_0x3F;
+ vec1 = (v8u16)__msa_pckev_b((v16i8)vec3, (v16i8)vec2);
+ src0 = (v8u16)__msa_srai_h((v8i16)src0, 6);
+ src1 = (v8u16)__msa_srai_h((v8i16)src1, 6);
+ src2 = (v8u16)__msa_srai_h((v8i16)src2, 6);
+ src3 = (v8u16)__msa_srai_h((v8i16)src3, 6);
+ vec4 = src0 & const_0x1F;
+ vec5 = src1 & const_0x1F;
+ vec4 += src2 & const_0x1F;
+ vec5 += src3 & const_0x1F;
+ vec2 = (v8u16)__msa_pckev_b((v16i8)vec5, (v16i8)vec4);
+ vec0 = __msa_hadd_u_h((v16u8)vec0, (v16u8)vec0);
+ vec1 = __msa_hadd_u_h((v16u8)vec1, (v16u8)vec1);
+ vec2 = __msa_hadd_u_h((v16u8)vec2, (v16u8)vec2);
+ vec3 = (v8u16)__msa_slli_h((v8i16)vec0, 1);
+ vec3 |= (v8u16)__msa_srai_h((v8i16)vec0, 6);
+ vec4 = (v8u16)__msa_slli_h((v8i16)vec2, 1);
+ vec4 |= (v8u16)__msa_srai_h((v8i16)vec2, 6);
+ reg0 = vec3 * const_0x70;
+ reg1 = vec1 * const_0x4A;
+ reg2 = vec4 * const_0x70;
+ reg3 = vec1 * const_0x5E;
+ reg0 += const_32896;
+ reg1 += vec4 * const_0x26;
+ reg2 += const_32896;
+ reg3 += vec3 * const_0x12;
+ reg0 -= reg1;
+ reg2 -= reg3;
+ reg0 = (v8u16)__msa_srai_h((v8i16)reg0, 8);
+ reg2 = (v8u16)__msa_srai_h((v8i16)reg2, 8);
+ dst0 = (v16u8)__msa_pckev_b((v16i8)reg2, (v16i8)reg0);
+ res0 = __msa_copy_u_d((v2i64)dst0, 0);
+ res1 = __msa_copy_u_d((v2i64)dst0, 1);
+ SD(res0, dst_u);
+ SD(res1, dst_v);
+ s += 16;
+ t += 16;
+ dst_u += 8;
+ dst_v += 8;
+ }
+}
+
+void RGB24ToUVRow_MSA(const uint8_t* src_rgb0,
+ int src_stride_rgb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ int x;
+ const uint8_t* s = src_rgb0;
+ const uint8_t* t = src_rgb0 + src_stride_rgb;
+ int64_t res0, res1;
+ v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
+ v16u8 inp0, inp1, inp2, inp3, inp4, inp5;
+ v8u16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+ v8i16 reg0, reg1, reg2, reg3;
+ v16u8 dst0;
+ v8u16 const_0x70 = (v8u16)__msa_fill_h(0x70);
+ v8u16 const_0x4A = (v8u16)__msa_fill_h(0x4A);
+ v8u16 const_0x26 = (v8u16)__msa_fill_h(0x26);
+ v8u16 const_0x5E = (v8u16)__msa_fill_h(0x5E);
+ v8u16 const_0x12 = (v8u16)__msa_fill_h(0x12);
+ v8u16 const_0x8080 = (v8u16)__msa_fill_h(0x8080);
+ v16i8 mask = {0, 1, 2, 16, 3, 4, 5, 17, 6, 7, 8, 18, 9, 10, 11, 19};
+ v16i8 zero = {0};
+
+ for (x = 0; x < width; x += 16) {
+ inp0 = (v16u8)__msa_ld_b((const v16i8*)s, 0);
+ inp1 = (v16u8)__msa_ld_b((const v16i8*)s, 16);
+ inp2 = (v16u8)__msa_ld_b((const v16i8*)s, 32);
+ inp3 = (v16u8)__msa_ld_b((const v16i8*)t, 0);
+ inp4 = (v16u8)__msa_ld_b((const v16i8*)t, 16);
+ inp5 = (v16u8)__msa_ld_b((const v16i8*)t, 32);
+ src1 = (v16u8)__msa_sldi_b((v16i8)inp1, (v16i8)inp0, 12);
+ src5 = (v16u8)__msa_sldi_b((v16i8)inp4, (v16i8)inp3, 12);
+ src2 = (v16u8)__msa_sldi_b((v16i8)inp2, (v16i8)inp1, 8);
+ src6 = (v16u8)__msa_sldi_b((v16i8)inp5, (v16i8)inp4, 8);
+ src3 = (v16u8)__msa_sldi_b((v16i8)inp2, (v16i8)inp2, 4);
+ src7 = (v16u8)__msa_sldi_b((v16i8)inp5, (v16i8)inp5, 4);
+ src0 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)inp0);
+ src1 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)src1);
+ src2 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)src2);
+ src3 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)src3);
+ src4 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)inp3);
+ src5 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)src5);
+ src6 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)src6);
+ src7 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)src7);
+ vec0 = (v8u16)__msa_ilvr_b((v16i8)src4, (v16i8)src0);
+ vec1 = (v8u16)__msa_ilvl_b((v16i8)src4, (v16i8)src0);
+ vec2 = (v8u16)__msa_ilvr_b((v16i8)src5, (v16i8)src1);
+ vec3 = (v8u16)__msa_ilvl_b((v16i8)src5, (v16i8)src1);
+ vec4 = (v8u16)__msa_ilvr_b((v16i8)src6, (v16i8)src2);
+ vec5 = (v8u16)__msa_ilvl_b((v16i8)src6, (v16i8)src2);
+ vec6 = (v8u16)__msa_ilvr_b((v16i8)src7, (v16i8)src3);
+ vec7 = (v8u16)__msa_ilvl_b((v16i8)src7, (v16i8)src3);
+ vec0 = (v8u16)__msa_hadd_u_h((v16u8)vec0, (v16u8)vec0);
+ vec1 = (v8u16)__msa_hadd_u_h((v16u8)vec1, (v16u8)vec1);
+ vec2 = (v8u16)__msa_hadd_u_h((v16u8)vec2, (v16u8)vec2);
+ vec3 = (v8u16)__msa_hadd_u_h((v16u8)vec3, (v16u8)vec3);
+ vec4 = (v8u16)__msa_hadd_u_h((v16u8)vec4, (v16u8)vec4);
+ vec5 = (v8u16)__msa_hadd_u_h((v16u8)vec5, (v16u8)vec5);
+ vec6 = (v8u16)__msa_hadd_u_h((v16u8)vec6, (v16u8)vec6);
+ vec7 = (v8u16)__msa_hadd_u_h((v16u8)vec7, (v16u8)vec7);
+ reg0 = (v8i16)__msa_pckev_d((v2i64)vec1, (v2i64)vec0);
+ reg1 = (v8i16)__msa_pckev_d((v2i64)vec3, (v2i64)vec2);
+ reg2 = (v8i16)__msa_pckev_d((v2i64)vec5, (v2i64)vec4);
+ reg3 = (v8i16)__msa_pckev_d((v2i64)vec7, (v2i64)vec6);
+ reg0 += (v8i16)__msa_pckod_d((v2i64)vec1, (v2i64)vec0);
+ reg1 += (v8i16)__msa_pckod_d((v2i64)vec3, (v2i64)vec2);
+ reg2 += (v8i16)__msa_pckod_d((v2i64)vec5, (v2i64)vec4);
+ reg3 += (v8i16)__msa_pckod_d((v2i64)vec7, (v2i64)vec6);
+ reg0 = __msa_srai_h((v8i16)reg0, 2);
+ reg1 = __msa_srai_h((v8i16)reg1, 2);
+ reg2 = __msa_srai_h((v8i16)reg2, 2);
+ reg3 = __msa_srai_h((v8i16)reg3, 2);
+ vec4 = (v8u16)__msa_pckev_h(reg1, reg0);
+ vec5 = (v8u16)__msa_pckev_h(reg3, reg2);
+ vec6 = (v8u16)__msa_pckod_h(reg1, reg0);
+ vec7 = (v8u16)__msa_pckod_h(reg3, reg2);
+ vec0 = (v8u16)__msa_pckev_h((v8i16)vec5, (v8i16)vec4);
+ vec1 = (v8u16)__msa_pckev_h((v8i16)vec7, (v8i16)vec6);
+ vec2 = (v8u16)__msa_pckod_h((v8i16)vec5, (v8i16)vec4);
+ vec3 = vec0 * const_0x70;
+ vec4 = vec1 * const_0x4A;
+ vec5 = vec2 * const_0x26;
+ vec2 *= const_0x70;
+ vec1 *= const_0x5E;
+ vec0 *= const_0x12;
+ reg0 = __msa_subv_h((v8i16)vec3, (v8i16)vec4);
+ reg1 = __msa_subv_h((v8i16)const_0x8080, (v8i16)vec5);
+ reg2 = __msa_subv_h((v8i16)vec2, (v8i16)vec1);
+ reg3 = __msa_subv_h((v8i16)const_0x8080, (v8i16)vec0);
+ reg0 += reg1;
+ reg2 += reg3;
+ reg0 = __msa_srai_h(reg0, 8);
+ reg2 = __msa_srai_h(reg2, 8);
+ dst0 = (v16u8)__msa_pckev_b((v16i8)reg2, (v16i8)reg0);
+ res0 = __msa_copy_u_d((v2i64)dst0, 0);
+ res1 = __msa_copy_u_d((v2i64)dst0, 1);
+ SD(res0, dst_u);
+ SD(res1, dst_v);
+ t += 48;
+ s += 48;
+ dst_u += 8;
+ dst_v += 8;
+ }
+}
+
+void RAWToUVRow_MSA(const uint8_t* src_rgb0,
+ int src_stride_rgb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ int x;
+ const uint8_t* s = src_rgb0;
+ const uint8_t* t = src_rgb0 + src_stride_rgb;
+ int64_t res0, res1;
+ v16u8 inp0, inp1, inp2, inp3, inp4, inp5;
+ v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
+ v8u16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+ v8i16 reg0, reg1, reg2, reg3;
+ v16u8 dst0;
+ v8u16 const_0x70 = (v8u16)__msa_fill_h(0x70);
+ v8u16 const_0x4A = (v8u16)__msa_fill_h(0x4A);
+ v8u16 const_0x26 = (v8u16)__msa_fill_h(0x26);
+ v8u16 const_0x5E = (v8u16)__msa_fill_h(0x5E);
+ v8u16 const_0x12 = (v8u16)__msa_fill_h(0x12);
+ v8u16 const_0x8080 = (v8u16)__msa_fill_h(0x8080);
+ v16i8 mask = {0, 1, 2, 16, 3, 4, 5, 17, 6, 7, 8, 18, 9, 10, 11, 19};
+ v16i8 zero = {0};
+
+ for (x = 0; x < width; x += 16) {
+ inp0 = (v16u8)__msa_ld_b((const v16i8*)s, 0);
+ inp1 = (v16u8)__msa_ld_b((const v16i8*)s, 16);
+ inp2 = (v16u8)__msa_ld_b((const v16i8*)s, 32);
+ inp3 = (v16u8)__msa_ld_b((const v16i8*)t, 0);
+ inp4 = (v16u8)__msa_ld_b((const v16i8*)t, 16);
+ inp5 = (v16u8)__msa_ld_b((const v16i8*)t, 32);
+ src1 = (v16u8)__msa_sldi_b((v16i8)inp1, (v16i8)inp0, 12);
+ src5 = (v16u8)__msa_sldi_b((v16i8)inp4, (v16i8)inp3, 12);
+ src2 = (v16u8)__msa_sldi_b((v16i8)inp2, (v16i8)inp1, 8);
+ src6 = (v16u8)__msa_sldi_b((v16i8)inp5, (v16i8)inp4, 8);
+ src3 = (v16u8)__msa_sldi_b((v16i8)inp2, (v16i8)inp2, 4);
+ src7 = (v16u8)__msa_sldi_b((v16i8)inp5, (v16i8)inp5, 4);
+ src0 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)inp0);
+ src1 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)src1);
+ src2 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)src2);
+ src3 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)src3);
+ src4 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)inp3);
+ src5 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)src5);
+ src6 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)src6);
+ src7 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)src7);
+ vec0 = (v8u16)__msa_ilvr_b((v16i8)src4, (v16i8)src0);
+ vec1 = (v8u16)__msa_ilvl_b((v16i8)src4, (v16i8)src0);
+ vec2 = (v8u16)__msa_ilvr_b((v16i8)src5, (v16i8)src1);
+ vec3 = (v8u16)__msa_ilvl_b((v16i8)src5, (v16i8)src1);
+ vec4 = (v8u16)__msa_ilvr_b((v16i8)src6, (v16i8)src2);
+ vec5 = (v8u16)__msa_ilvl_b((v16i8)src6, (v16i8)src2);
+ vec6 = (v8u16)__msa_ilvr_b((v16i8)src7, (v16i8)src3);
+ vec7 = (v8u16)__msa_ilvl_b((v16i8)src7, (v16i8)src3);
+ vec0 = (v8u16)__msa_hadd_u_h((v16u8)vec0, (v16u8)vec0);
+ vec1 = (v8u16)__msa_hadd_u_h((v16u8)vec1, (v16u8)vec1);
+ vec2 = (v8u16)__msa_hadd_u_h((v16u8)vec2, (v16u8)vec2);
+ vec3 = (v8u16)__msa_hadd_u_h((v16u8)vec3, (v16u8)vec3);
+ vec4 = (v8u16)__msa_hadd_u_h((v16u8)vec4, (v16u8)vec4);
+ vec5 = (v8u16)__msa_hadd_u_h((v16u8)vec5, (v16u8)vec5);
+ vec6 = (v8u16)__msa_hadd_u_h((v16u8)vec6, (v16u8)vec6);
+ vec7 = (v8u16)__msa_hadd_u_h((v16u8)vec7, (v16u8)vec7);
+ reg0 = (v8i16)__msa_pckev_d((v2i64)vec1, (v2i64)vec0);
+ reg1 = (v8i16)__msa_pckev_d((v2i64)vec3, (v2i64)vec2);
+ reg2 = (v8i16)__msa_pckev_d((v2i64)vec5, (v2i64)vec4);
+ reg3 = (v8i16)__msa_pckev_d((v2i64)vec7, (v2i64)vec6);
+ reg0 += (v8i16)__msa_pckod_d((v2i64)vec1, (v2i64)vec0);
+ reg1 += (v8i16)__msa_pckod_d((v2i64)vec3, (v2i64)vec2);
+ reg2 += (v8i16)__msa_pckod_d((v2i64)vec5, (v2i64)vec4);
+ reg3 += (v8i16)__msa_pckod_d((v2i64)vec7, (v2i64)vec6);
+ reg0 = __msa_srai_h(reg0, 2);
+ reg1 = __msa_srai_h(reg1, 2);
+ reg2 = __msa_srai_h(reg2, 2);
+ reg3 = __msa_srai_h(reg3, 2);
+ vec4 = (v8u16)__msa_pckev_h((v8i16)reg1, (v8i16)reg0);
+ vec5 = (v8u16)__msa_pckev_h((v8i16)reg3, (v8i16)reg2);
+ vec6 = (v8u16)__msa_pckod_h((v8i16)reg1, (v8i16)reg0);
+ vec7 = (v8u16)__msa_pckod_h((v8i16)reg3, (v8i16)reg2);
+ vec0 = (v8u16)__msa_pckod_h((v8i16)vec5, (v8i16)vec4);
+ vec1 = (v8u16)__msa_pckev_h((v8i16)vec7, (v8i16)vec6);
+ vec2 = (v8u16)__msa_pckev_h((v8i16)vec5, (v8i16)vec4);
+ vec3 = vec0 * const_0x70;
+ vec4 = vec1 * const_0x4A;
+ vec5 = vec2 * const_0x26;
+ vec2 *= const_0x70;
+ vec1 *= const_0x5E;
+ vec0 *= const_0x12;
+ reg0 = __msa_subv_h((v8i16)vec3, (v8i16)vec4);
+ reg1 = __msa_subv_h((v8i16)const_0x8080, (v8i16)vec5);
+ reg2 = __msa_subv_h((v8i16)vec2, (v8i16)vec1);
+ reg3 = __msa_subv_h((v8i16)const_0x8080, (v8i16)vec0);
+ reg0 += reg1;
+ reg2 += reg3;
+ reg0 = __msa_srai_h(reg0, 8);
+ reg2 = __msa_srai_h(reg2, 8);
+ dst0 = (v16u8)__msa_pckev_b((v16i8)reg2, (v16i8)reg0);
+ res0 = __msa_copy_u_d((v2i64)dst0, 0);
+ res1 = __msa_copy_u_d((v2i64)dst0, 1);
+ SD(res0, dst_u);
+ SD(res1, dst_v);
+ t += 48;
+ s += 48;
+ dst_u += 8;
+ dst_v += 8;
+ }
+}
+
+void NV12ToARGBRow_MSA(const uint8_t* src_y,
+ const uint8_t* src_uv,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ int x;
+ uint64_t val0, val1;
+ v16u8 src0, src1, res0, res1, dst0, dst1;
+ v8i16 vec0, vec1, vec2;
+ v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg;
+ v4i32 vec_ubvr, vec_ugvg;
+ v16u8 zero = {0};
+ v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL);
+
+ YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg,
+ vec_br, vec_yg);
+ vec_ubvr = __msa_ilvr_w(vec_vr, vec_ub);
+ vec_ugvg = (v4i32)__msa_ilvev_h((v8i16)vec_vg, (v8i16)vec_ug);
+
+ for (x = 0; x < width; x += 8) {
+ val0 = LD(src_y);
+ val1 = LD(src_uv);
+ src0 = (v16u8)__msa_insert_d((v2i64)zero, 0, val0);
+ src1 = (v16u8)__msa_insert_d((v2i64)zero, 0, val1);
+ YUVTORGB(src0, src1, vec_ubvr, vec_ugvg, vec_bb, vec_bg, vec_br, vec_yg,
+ vec0, vec1, vec2);
+ res0 = (v16u8)__msa_ilvev_b((v16i8)vec2, (v16i8)vec0);
+ res1 = (v16u8)__msa_ilvev_b((v16i8)alpha, (v16i8)vec1);
+ dst0 = (v16u8)__msa_ilvr_b((v16i8)res1, (v16i8)res0);
+ dst1 = (v16u8)__msa_ilvl_b((v16i8)res1, (v16i8)res0);
+ ST_UB2(dst0, dst1, dst_argb, 16);
+ src_y += 8;
+ src_uv += 8;
+ dst_argb += 32;
+ }
+}
+
+void NV12ToRGB565Row_MSA(const uint8_t* src_y,
+ const uint8_t* src_uv,
+ uint8_t* dst_rgb565,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ int x;
+ uint64_t val0, val1;
+ v16u8 src0, src1, dst0;
+ v8i16 vec0, vec1, vec2;
+ v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg;
+ v4i32 vec_ubvr, vec_ugvg;
+ v16u8 zero = {0};
+
+ YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg,
+ vec_br, vec_yg);
+ vec_ubvr = __msa_ilvr_w(vec_vr, vec_ub);
+ vec_ugvg = (v4i32)__msa_ilvev_h((v8i16)vec_vg, (v8i16)vec_ug);
+
+ for (x = 0; x < width; x += 8) {
+ val0 = LD(src_y);
+ val1 = LD(src_uv);
+ src0 = (v16u8)__msa_insert_d((v2i64)zero, 0, val0);
+ src1 = (v16u8)__msa_insert_d((v2i64)zero, 0, val1);
+ YUVTORGB(src0, src1, vec_ubvr, vec_ugvg, vec_bb, vec_bg, vec_br, vec_yg,
+ vec0, vec1, vec2);
+ vec0 = vec0 >> 3;
+ vec1 = (vec1 >> 2) << 5;
+ vec2 = (vec2 >> 3) << 11;
+ dst0 = (v16u8)(vec0 | vec1 | vec2);
+ ST_UB(dst0, dst_rgb565);
+ src_y += 8;
+ src_uv += 8;
+ dst_rgb565 += 16;
+ }
+}
+
+void NV21ToARGBRow_MSA(const uint8_t* src_y,
+ const uint8_t* src_vu,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ int x;
+ uint64_t val0, val1;
+ v16u8 src0, src1, res0, res1, dst0, dst1;
+ v8i16 vec0, vec1, vec2;
+ v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg;
+ v4i32 vec_ubvr, vec_ugvg;
+ v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL);
+ v16u8 zero = {0};
+ v16i8 shuffler = {1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14};
+
+ YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg,
+ vec_br, vec_yg);
+ vec_ubvr = __msa_ilvr_w(vec_vr, vec_ub);
+ vec_ugvg = (v4i32)__msa_ilvev_h((v8i16)vec_vg, (v8i16)vec_ug);
+
+ for (x = 0; x < width; x += 8) {
+ val0 = LD(src_y);
+ val1 = LD(src_vu);
+ src0 = (v16u8)__msa_insert_d((v2i64)zero, 0, val0);
+ src1 = (v16u8)__msa_insert_d((v2i64)zero, 0, val1);
+ src1 = (v16u8)__msa_vshf_b(shuffler, (v16i8)src1, (v16i8)src1);
+ YUVTORGB(src0, src1, vec_ubvr, vec_ugvg, vec_bb, vec_bg, vec_br, vec_yg,
+ vec0, vec1, vec2);
+ res0 = (v16u8)__msa_ilvev_b((v16i8)vec2, (v16i8)vec0);
+ res1 = (v16u8)__msa_ilvev_b((v16i8)alpha, (v16i8)vec1);
+ dst0 = (v16u8)__msa_ilvr_b((v16i8)res1, (v16i8)res0);
+ dst1 = (v16u8)__msa_ilvl_b((v16i8)res1, (v16i8)res0);
+ ST_UB2(dst0, dst1, dst_argb, 16);
+ src_y += 8;
+ src_vu += 8;
+ dst_argb += 32;
+ }
+}
+
+void SobelRow_MSA(const uint8_t* src_sobelx,
+ const uint8_t* src_sobely,
+ uint8_t* dst_argb,
+ int width) {
+ int x;
+ v16u8 src0, src1, vec0, dst0, dst1, dst2, dst3;
+ v16i8 mask0 = {0, 0, 0, 16, 1, 1, 1, 16, 2, 2, 2, 16, 3, 3, 3, 16};
+ v16i8 const_0x4 = __msa_ldi_b(0x4);
+ v16i8 mask1 = mask0 + const_0x4;
+ v16i8 mask2 = mask1 + const_0x4;
+ v16i8 mask3 = mask2 + const_0x4;
+ v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL);
+
+ for (x = 0; x < width; x += 16) {
+ src0 = (v16u8)__msa_ld_b((const v16i8*)src_sobelx, 0);
+ src1 = (v16u8)__msa_ld_b((const v16i8*)src_sobely, 0);
+ vec0 = __msa_adds_u_b(src0, src1);
+ dst0 = (v16u8)__msa_vshf_b(mask0, (v16i8)alpha, (v16i8)vec0);
+ dst1 = (v16u8)__msa_vshf_b(mask1, (v16i8)alpha, (v16i8)vec0);
+ dst2 = (v16u8)__msa_vshf_b(mask2, (v16i8)alpha, (v16i8)vec0);
+ dst3 = (v16u8)__msa_vshf_b(mask3, (v16i8)alpha, (v16i8)vec0);
+ ST_UB4(dst0, dst1, dst2, dst3, dst_argb, 16);
+ src_sobelx += 16;
+ src_sobely += 16;
+ dst_argb += 64;
+ }
+}
+
+void SobelToPlaneRow_MSA(const uint8_t* src_sobelx,
+ const uint8_t* src_sobely,
+ uint8_t* dst_y,
+ int width) {
+ int x;
+ v16u8 src0, src1, src2, src3, dst0, dst1;
+
+ for (x = 0; x < width; x += 32) {
+ src0 = (v16u8)__msa_ld_b((const v16i8*)src_sobelx, 0);
+ src1 = (v16u8)__msa_ld_b((const v16i8*)src_sobelx, 16);
+ src2 = (v16u8)__msa_ld_b((const v16i8*)src_sobely, 0);
+ src3 = (v16u8)__msa_ld_b((const v16i8*)src_sobely, 16);
+ dst0 = __msa_adds_u_b(src0, src2);
+ dst1 = __msa_adds_u_b(src1, src3);
+ ST_UB2(dst0, dst1, dst_y, 16);
+ src_sobelx += 32;
+ src_sobely += 32;
+ dst_y += 32;
+ }
+}
+
+void SobelXYRow_MSA(const uint8_t* src_sobelx,
+ const uint8_t* src_sobely,
+ uint8_t* dst_argb,
+ int width) {
+ int x;
+ v16u8 src0, src1, vec0, vec1, vec2;
+ v16u8 reg0, reg1, dst0, dst1, dst2, dst3;
+ v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL);
+
+ for (x = 0; x < width; x += 16) {
+ src0 = (v16u8)__msa_ld_b((const v16i8*)src_sobelx, 0);
+ src1 = (v16u8)__msa_ld_b((const v16i8*)src_sobely, 0);
+ vec0 = __msa_adds_u_b(src0, src1);
+ vec1 = (v16u8)__msa_ilvr_b((v16i8)src0, (v16i8)src1);
+ vec2 = (v16u8)__msa_ilvl_b((v16i8)src0, (v16i8)src1);
+ reg0 = (v16u8)__msa_ilvr_b((v16i8)alpha, (v16i8)vec0);
+ reg1 = (v16u8)__msa_ilvl_b((v16i8)alpha, (v16i8)vec0);
+ dst0 = (v16u8)__msa_ilvr_b((v16i8)reg0, (v16i8)vec1);
+ dst1 = (v16u8)__msa_ilvl_b((v16i8)reg0, (v16i8)vec1);
+ dst2 = (v16u8)__msa_ilvr_b((v16i8)reg1, (v16i8)vec2);
+ dst3 = (v16u8)__msa_ilvl_b((v16i8)reg1, (v16i8)vec2);
+ ST_UB4(dst0, dst1, dst2, dst3, dst_argb, 16);
+ src_sobelx += 16;
+ src_sobely += 16;
+ dst_argb += 64;
+ }
+}
+
+void ARGBToYJRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
+ int x;
+ v16u8 src0, src1, src2, src3, dst0;
+ v16u8 const_0x4B0F = (v16u8)__msa_fill_h(0x4B0F);
+ v16u8 const_0x26 = (v16u8)__msa_fill_h(0x26);
+ v8u16 const_0x40 = (v8u16)__msa_fill_h(0x40);
+
+ for (x = 0; x < width; x += 16) {
+ src0 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 0);
+ src1 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 16);
+ src2 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 32);
+ src3 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 48);
+ ARGBTOY(src0, src1, src2, src3, const_0x4B0F, const_0x26, const_0x40, 7,
+ dst0);
+ ST_UB(dst0, dst_y);
+ src_argb0 += 64;
+ dst_y += 16;
+ }
+}
+
+void BGRAToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
+ int x;
+ v16u8 src0, src1, src2, src3, dst0;
+ v16u8 const_0x4200 = (v16u8)__msa_fill_h(0x4200);
+ v16u8 const_0x1981 = (v16u8)__msa_fill_h(0x1981);
+ v8u16 const_0x1080 = (v8u16)__msa_fill_h(0x1080);
+
+ for (x = 0; x < width; x += 16) {
+ src0 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 0);
+ src1 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 16);
+ src2 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 32);
+ src3 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 48);
+ ARGBTOY(src0, src1, src2, src3, const_0x4200, const_0x1981, const_0x1080, 8,
+ dst0);
+ ST_UB(dst0, dst_y);
+ src_argb0 += 64;
+ dst_y += 16;
+ }
+}
+
+void ABGRToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
+ int x;
+ v16u8 src0, src1, src2, src3, dst0;
+ v16u8 const_0x8142 = (v16u8)__msa_fill_h(0x8142);
+ v16u8 const_0x19 = (v16u8)__msa_fill_h(0x19);
+ v8u16 const_0x1080 = (v8u16)__msa_fill_h(0x1080);
+
+ for (x = 0; x < width; x += 16) {
+ src0 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 0);
+ src1 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 16);
+ src2 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 32);
+ src3 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 48);
+ ARGBTOY(src0, src1, src2, src3, const_0x8142, const_0x19, const_0x1080, 8,
+ dst0);
+ ST_UB(dst0, dst_y);
+ src_argb0 += 64;
+ dst_y += 16;
+ }
+}
+
+void RGBAToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
+ int x;
+ v16u8 src0, src1, src2, src3, dst0;
+ v16u8 const_0x1900 = (v16u8)__msa_fill_h(0x1900);
+ v16u8 const_0x4281 = (v16u8)__msa_fill_h(0x4281);
+ v8u16 const_0x1080 = (v8u16)__msa_fill_h(0x1080);
+
+ for (x = 0; x < width; x += 16) {
+ src0 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 0);
+ src1 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 16);
+ src2 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 32);
+ src3 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 48);
+ ARGBTOY(src0, src1, src2, src3, const_0x1900, const_0x4281, const_0x1080, 8,
+ dst0);
+ ST_UB(dst0, dst_y);
+ src_argb0 += 64;
+ dst_y += 16;
+ }
+}
+
+void ARGBToUVJRow_MSA(const uint8_t* src_rgb0,
+ int src_stride_rgb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ int x;
+ const uint8_t* s = src_rgb0;
+ const uint8_t* t = src_rgb0 + src_stride_rgb;
+ v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
+ v16u8 vec0, vec1, vec2, vec3;
+ v16u8 dst0, dst1;
+ v16i8 shuffler0 = {0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29};
+ v16i8 shuffler1 = {2, 3, 6, 7, 10, 11, 14, 15,
+ 18, 19, 22, 23, 26, 27, 30, 31};
+ v16i8 shuffler2 = {0, 3, 4, 7, 8, 11, 12, 15, 16, 19, 20, 23, 24, 27, 28, 31};
+ v16i8 shuffler3 = {1, 2, 5, 6, 9, 10, 13, 14, 17, 18, 21, 22, 25, 26, 29, 30};
+ v16u8 const_0x7F = (v16u8)__msa_fill_h(0x7F);
+ v16u8 const_0x6B14 = (v16u8)__msa_fill_h(0x6B14);
+ v16u8 const_0x2B54 = (v16u8)__msa_fill_h(0x2B54);
+ v8u16 const_0x8080 = (v8u16)__msa_fill_h(0x8080);
+
+ for (x = 0; x < width; x += 32) {
+ src0 = (v16u8)__msa_ld_b((const v16i8*)s, 0);
+ src1 = (v16u8)__msa_ld_b((const v16i8*)s, 16);
+ src2 = (v16u8)__msa_ld_b((const v16i8*)s, 32);
+ src3 = (v16u8)__msa_ld_b((const v16i8*)s, 48);
+ src4 = (v16u8)__msa_ld_b((const v16i8*)t, 0);
+ src5 = (v16u8)__msa_ld_b((const v16i8*)t, 16);
+ src6 = (v16u8)__msa_ld_b((const v16i8*)t, 32);
+ src7 = (v16u8)__msa_ld_b((const v16i8*)t, 48);
+ src0 = __msa_aver_u_b(src0, src4);
+ src1 = __msa_aver_u_b(src1, src5);
+ src2 = __msa_aver_u_b(src2, src6);
+ src3 = __msa_aver_u_b(src3, src7);
+ src4 = (v16u8)__msa_pckev_w((v4i32)src1, (v4i32)src0);
+ src5 = (v16u8)__msa_pckev_w((v4i32)src3, (v4i32)src2);
+ src6 = (v16u8)__msa_pckod_w((v4i32)src1, (v4i32)src0);
+ src7 = (v16u8)__msa_pckod_w((v4i32)src3, (v4i32)src2);
+ vec0 = __msa_aver_u_b(src4, src6);
+ vec1 = __msa_aver_u_b(src5, src7);
+ src0 = (v16u8)__msa_ld_b((v16i8*)s, 64);
+ src1 = (v16u8)__msa_ld_b((v16i8*)s, 80);
+ src2 = (v16u8)__msa_ld_b((v16i8*)s, 96);
+ src3 = (v16u8)__msa_ld_b((v16i8*)s, 112);
+ src4 = (v16u8)__msa_ld_b((v16i8*)t, 64);
+ src5 = (v16u8)__msa_ld_b((v16i8*)t, 80);
+ src6 = (v16u8)__msa_ld_b((v16i8*)t, 96);
+ src7 = (v16u8)__msa_ld_b((v16i8*)t, 112);
+ src0 = __msa_aver_u_b(src0, src4);
+ src1 = __msa_aver_u_b(src1, src5);
+ src2 = __msa_aver_u_b(src2, src6);
+ src3 = __msa_aver_u_b(src3, src7);
+ src4 = (v16u8)__msa_pckev_w((v4i32)src1, (v4i32)src0);
+ src5 = (v16u8)__msa_pckev_w((v4i32)src3, (v4i32)src2);
+ src6 = (v16u8)__msa_pckod_w((v4i32)src1, (v4i32)src0);
+ src7 = (v16u8)__msa_pckod_w((v4i32)src3, (v4i32)src2);
+ vec2 = __msa_aver_u_b(src4, src6);
+ vec3 = __msa_aver_u_b(src5, src7);
+ ARGBTOUV(vec0, vec1, vec2, vec3, const_0x6B14, const_0x7F, const_0x2B54,
+ const_0x8080, shuffler1, shuffler0, shuffler2, shuffler3, dst0,
+ dst1);
+ ST_UB(dst0, dst_v);
+ ST_UB(dst1, dst_u);
+ s += 128;
+ t += 128;
+ dst_v += 16;
+ dst_u += 16;
+ }
+}
+
+void BGRAToUVRow_MSA(const uint8_t* src_rgb0,
+ int src_stride_rgb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ int x;
+ const uint8_t* s = src_rgb0;
+ const uint8_t* t = src_rgb0 + src_stride_rgb;
+ v16u8 dst0, dst1, vec0, vec1, vec2, vec3;
+ v16i8 shuffler0 = {0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29};
+ v16i8 shuffler1 = {2, 3, 6, 7, 10, 11, 14, 15,
+ 18, 19, 22, 23, 26, 27, 30, 31};
+ v16i8 shuffler2 = {0, 3, 4, 7, 8, 11, 12, 15, 16, 19, 20, 23, 24, 27, 28, 31};
+ v16i8 shuffler3 = {2, 1, 6, 5, 10, 9, 14, 13, 18, 17, 22, 21, 26, 25, 30, 29};
+ v16u8 const_0x125E = (v16u8)__msa_fill_h(0x125E);
+ v16u8 const_0x7000 = (v16u8)__msa_fill_h(0x7000);
+ v16u8 const_0x264A = (v16u8)__msa_fill_h(0x264A);
+ v8u16 const_0x8080 = (v8u16)__msa_fill_h(0x8080);
+
+ for (x = 0; x < width; x += 32) {
+ READ_ARGB(s, t, vec0, vec1, vec2, vec3);
+ ARGBTOUV(vec0, vec1, vec2, vec3, const_0x125E, const_0x7000, const_0x264A,
+ const_0x8080, shuffler0, shuffler1, shuffler2, shuffler3, dst0,
+ dst1);
+ ST_UB(dst0, dst_v);
+ ST_UB(dst1, dst_u);
+ s += 128;
+ t += 128;
+ dst_v += 16;
+ dst_u += 16;
+ }
+}
+
+void ABGRToUVRow_MSA(const uint8_t* src_rgb0,
+ int src_stride_rgb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ int x;
+ const uint8_t* s = src_rgb0;
+ const uint8_t* t = src_rgb0 + src_stride_rgb;
+ v16u8 src0, src1, src2, src3;
+ v16u8 dst0, dst1;
+ v16i8 shuffler0 = {0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29};
+ v16i8 shuffler1 = {2, 3, 6, 7, 10, 11, 14, 15,
+ 18, 19, 22, 23, 26, 27, 30, 31};
+ v16i8 shuffler2 = {0, 3, 4, 7, 8, 11, 12, 15, 16, 19, 20, 23, 24, 27, 28, 31};
+ v16i8 shuffler3 = {1, 2, 5, 6, 9, 10, 13, 14, 17, 18, 21, 22, 25, 26, 29, 30};
+ v16u8 const_0x4A26 = (v16u8)__msa_fill_h(0x4A26);
+ v16u8 const_0x0070 = (v16u8)__msa_fill_h(0x0070);
+ v16u8 const_0x125E = (v16u8)__msa_fill_h(0x125E);
+ v8u16 const_0x8080 = (v8u16)__msa_fill_h(0x8080);
+
+ for (x = 0; x < width; x += 32) {
+ READ_ARGB(s, t, src0, src1, src2, src3);
+ ARGBTOUV(src0, src1, src2, src3, const_0x4A26, const_0x0070, const_0x125E,
+ const_0x8080, shuffler1, shuffler0, shuffler2, shuffler3, dst0,
+ dst1);
+ ST_UB(dst0, dst_u);
+ ST_UB(dst1, dst_v);
+ s += 128;
+ t += 128;
+ dst_u += 16;
+ dst_v += 16;
+ }
+}
+
+void RGBAToUVRow_MSA(const uint8_t* src_rgb0,
+ int src_stride_rgb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ int x;
+ const uint8_t* s = src_rgb0;
+ const uint8_t* t = src_rgb0 + src_stride_rgb;
+ v16u8 dst0, dst1, vec0, vec1, vec2, vec3;
+ v16i8 shuffler0 = {0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29};
+ v16i8 shuffler1 = {2, 3, 6, 7, 10, 11, 14, 15,
+ 18, 19, 22, 23, 26, 27, 30, 31};
+ v16i8 shuffler2 = {0, 3, 4, 7, 8, 11, 12, 15, 16, 19, 20, 23, 24, 27, 28, 31};
+ v16i8 shuffler3 = {2, 1, 6, 5, 10, 9, 14, 13, 18, 17, 22, 21, 26, 25, 30, 29};
+ v16u8 const_0x125E = (v16u8)__msa_fill_h(0x264A);
+ v16u8 const_0x7000 = (v16u8)__msa_fill_h(0x7000);
+ v16u8 const_0x264A = (v16u8)__msa_fill_h(0x125E);
+ v8u16 const_0x8080 = (v8u16)__msa_fill_h(0x8080);
+
+ for (x = 0; x < width; x += 32) {
+ READ_ARGB(s, t, vec0, vec1, vec2, vec3);
+ ARGBTOUV(vec0, vec1, vec2, vec3, const_0x125E, const_0x7000, const_0x264A,
+ const_0x8080, shuffler0, shuffler1, shuffler2, shuffler3, dst0,
+ dst1);
+ ST_UB(dst0, dst_u);
+ ST_UB(dst1, dst_v);
+ s += 128;
+ t += 128;
+ dst_u += 16;
+ dst_v += 16;
+ }
+}
+
+void I444ToARGBRow_MSA(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ int x;
+ v16u8 src0, src1, src2, dst0, dst1;
+ v8u16 vec0, vec1, vec2;
+ v4i32 reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7, reg8, reg9;
+ v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg;
+ v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL);
+ v8i16 zero = {0};
+
+ YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg,
+ vec_br, vec_yg);
+
+ for (x = 0; x < width; x += 8) {
+ READI444(src_y, src_u, src_v, src0, src1, src2);
+ vec0 = (v8u16)__msa_ilvr_b((v16i8)src0, (v16i8)src0);
+ reg0 = (v4i32)__msa_ilvr_h((v8i16)zero, (v8i16)vec0);
+ reg1 = (v4i32)__msa_ilvl_h((v8i16)zero, (v8i16)vec0);
+ reg0 *= vec_yg;
+ reg1 *= vec_yg;
+ reg0 = __msa_srai_w(reg0, 16);
+ reg1 = __msa_srai_w(reg1, 16);
+ reg4 = reg0 + vec_br;
+ reg5 = reg1 + vec_br;
+ reg2 = reg0 + vec_bg;
+ reg3 = reg1 + vec_bg;
+ reg0 += vec_bb;
+ reg1 += vec_bb;
+ vec0 = (v8u16)__msa_ilvr_b((v16i8)zero, (v16i8)src1);
+ vec1 = (v8u16)__msa_ilvr_b((v16i8)zero, (v16i8)src2);
+ reg6 = (v4i32)__msa_ilvr_h((v8i16)zero, (v8i16)vec0);
+ reg7 = (v4i32)__msa_ilvl_h((v8i16)zero, (v8i16)vec0);
+ reg8 = (v4i32)__msa_ilvr_h((v8i16)zero, (v8i16)vec1);
+ reg9 = (v4i32)__msa_ilvl_h((v8i16)zero, (v8i16)vec1);
+ reg0 -= reg6 * vec_ub;
+ reg1 -= reg7 * vec_ub;
+ reg2 -= reg6 * vec_ug;
+ reg3 -= reg7 * vec_ug;
+ reg4 -= reg8 * vec_vr;
+ reg5 -= reg9 * vec_vr;
+ reg2 -= reg8 * vec_vg;
+ reg3 -= reg9 * vec_vg;
+ reg0 = __msa_srai_w(reg0, 6);
+ reg1 = __msa_srai_w(reg1, 6);
+ reg2 = __msa_srai_w(reg2, 6);
+ reg3 = __msa_srai_w(reg3, 6);
+ reg4 = __msa_srai_w(reg4, 6);
+ reg5 = __msa_srai_w(reg5, 6);
+ CLIP_0TO255(reg0, reg1, reg2, reg3, reg4, reg5);
+ vec0 = (v8u16)__msa_pckev_h((v8i16)reg1, (v8i16)reg0);
+ vec1 = (v8u16)__msa_pckev_h((v8i16)reg3, (v8i16)reg2);
+ vec2 = (v8u16)__msa_pckev_h((v8i16)reg5, (v8i16)reg4);
+ vec0 = (v8u16)__msa_ilvev_b((v16i8)vec1, (v16i8)vec0);
+ vec1 = (v8u16)__msa_ilvev_b((v16i8)alpha, (v16i8)vec2);
+ dst0 = (v16u8)__msa_ilvr_h((v8i16)vec1, (v8i16)vec0);
+ dst1 = (v16u8)__msa_ilvl_h((v8i16)vec1, (v8i16)vec0);
+ ST_UB2(dst0, dst1, dst_argb, 16);
+ src_y += 8;
+ src_u += 8;
+ src_v += 8;
+ dst_argb += 32;
+ }
+}
+
+void I400ToARGBRow_MSA(const uint8_t* src_y, uint8_t* dst_argb, int width) {
+ int x;
+ v16u8 src0, res0, res1, res2, res3, res4, dst0, dst1, dst2, dst3;
+ v8i16 vec0, vec1;
+ v4i32 reg0, reg1, reg2, reg3;
+ v4i32 vec_yg = __msa_fill_w(0x4A35);
+ v8i16 vec_ygb = __msa_fill_h(0xFB78);
+ v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL);
+ v8i16 max = __msa_ldi_h(0xFF);
+ v8i16 zero = {0};
+
+ for (x = 0; x < width; x += 16) {
+ src0 = (v16u8)__msa_ld_b((const v16i8*)src_y, 0);
+ vec0 = (v8i16)__msa_ilvr_b((v16i8)src0, (v16i8)src0);
+ vec1 = (v8i16)__msa_ilvl_b((v16i8)src0, (v16i8)src0);
+ reg0 = (v4i32)__msa_ilvr_h(zero, vec0);
+ reg1 = (v4i32)__msa_ilvl_h(zero, vec0);
+ reg2 = (v4i32)__msa_ilvr_h(zero, vec1);
+ reg3 = (v4i32)__msa_ilvl_h(zero, vec1);
+ reg0 *= vec_yg;
+ reg1 *= vec_yg;
+ reg2 *= vec_yg;
+ reg3 *= vec_yg;
+ reg0 = __msa_srai_w(reg0, 16);
+ reg1 = __msa_srai_w(reg1, 16);
+ reg2 = __msa_srai_w(reg2, 16);
+ reg3 = __msa_srai_w(reg3, 16);
+ vec0 = (v8i16)__msa_pckev_h((v8i16)reg1, (v8i16)reg0);
+ vec1 = (v8i16)__msa_pckev_h((v8i16)reg3, (v8i16)reg2);
+ vec0 += vec_ygb;
+ vec1 += vec_ygb;
+ vec0 = __msa_srai_h(vec0, 6);
+ vec1 = __msa_srai_h(vec1, 6);
+ vec0 = __msa_maxi_s_h(vec0, 0);
+ vec1 = __msa_maxi_s_h(vec1, 0);
+ vec0 = __msa_min_s_h(max, vec0);
+ vec1 = __msa_min_s_h(max, vec1);
+ res0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0);
+ res1 = (v16u8)__msa_ilvr_b((v16i8)res0, (v16i8)res0);
+ res2 = (v16u8)__msa_ilvl_b((v16i8)res0, (v16i8)res0);
+ res3 = (v16u8)__msa_ilvr_b((v16i8)alpha, (v16i8)res0);
+ res4 = (v16u8)__msa_ilvl_b((v16i8)alpha, (v16i8)res0);
+ dst0 = (v16u8)__msa_ilvr_b((v16i8)res3, (v16i8)res1);
+ dst1 = (v16u8)__msa_ilvl_b((v16i8)res3, (v16i8)res1);
+ dst2 = (v16u8)__msa_ilvr_b((v16i8)res4, (v16i8)res2);
+ dst3 = (v16u8)__msa_ilvl_b((v16i8)res4, (v16i8)res2);
+ ST_UB4(dst0, dst1, dst2, dst3, dst_argb, 16);
+ src_y += 16;
+ dst_argb += 64;
+ }
+}
+
+void J400ToARGBRow_MSA(const uint8_t* src_y, uint8_t* dst_argb, int width) {
+ int x;
+ v16u8 src0, vec0, vec1, vec2, vec3, dst0, dst1, dst2, dst3;
+ v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL);
+
+ for (x = 0; x < width; x += 16) {
+ src0 = (v16u8)__msa_ld_b((const v16i8*)src_y, 0);
+ vec0 = (v16u8)__msa_ilvr_b((v16i8)src0, (v16i8)src0);
+ vec1 = (v16u8)__msa_ilvl_b((v16i8)src0, (v16i8)src0);
+ vec2 = (v16u8)__msa_ilvr_b((v16i8)alpha, (v16i8)src0);
+ vec3 = (v16u8)__msa_ilvl_b((v16i8)alpha, (v16i8)src0);
+ dst0 = (v16u8)__msa_ilvr_b((v16i8)vec2, (v16i8)vec0);
+ dst1 = (v16u8)__msa_ilvl_b((v16i8)vec2, (v16i8)vec0);
+ dst2 = (v16u8)__msa_ilvr_b((v16i8)vec3, (v16i8)vec1);
+ dst3 = (v16u8)__msa_ilvl_b((v16i8)vec3, (v16i8)vec1);
+ ST_UB4(dst0, dst1, dst2, dst3, dst_argb, 16);
+ src_y += 16;
+ dst_argb += 64;
+ }
+}
+
+void YUY2ToARGBRow_MSA(const uint8_t* src_yuy2,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ int x;
+ v16u8 src0, src1, src2;
+ v8i16 vec0, vec1, vec2;
+ v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg;
+ v4i32 vec_ubvr, vec_ugvg;
+ v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL);
+
+ YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg,
+ vec_br, vec_yg);
+ vec_ubvr = __msa_ilvr_w(vec_vr, vec_ub);
+ vec_ugvg = (v4i32)__msa_ilvev_h((v8i16)vec_vg, (v8i16)vec_ug);
+
+ for (x = 0; x < width; x += 8) {
+ src0 = (v16u8)__msa_ld_b((const v16i8*)src_yuy2, 0);
+ src1 = (v16u8)__msa_pckev_b((v16i8)src0, (v16i8)src0);
+ src2 = (v16u8)__msa_pckod_b((v16i8)src0, (v16i8)src0);
+ YUVTORGB(src1, src2, vec_ubvr, vec_ugvg, vec_bb, vec_bg, vec_br, vec_yg,
+ vec0, vec1, vec2);
+ STOREARGB(vec0, vec1, vec2, alpha, dst_argb);
+ src_yuy2 += 16;
+ dst_argb += 32;
+ }
+}
+
+void UYVYToARGBRow_MSA(const uint8_t* src_uyvy,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ int x;
+ v16u8 src0, src1, src2;
+ v8i16 vec0, vec1, vec2;
+ v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg;
+ v4i32 vec_ubvr, vec_ugvg;
+ v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL);
+
+ YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg,
+ vec_br, vec_yg);
+ vec_ubvr = __msa_ilvr_w(vec_vr, vec_ub);
+ vec_ugvg = (v4i32)__msa_ilvev_h((v8i16)vec_vg, (v8i16)vec_ug);
+
+ for (x = 0; x < width; x += 8) {
+ src0 = (v16u8)__msa_ld_b((const v16i8*)src_uyvy, 0);
+ src1 = (v16u8)__msa_pckod_b((v16i8)src0, (v16i8)src0);
+ src2 = (v16u8)__msa_pckev_b((v16i8)src0, (v16i8)src0);
+ YUVTORGB(src1, src2, vec_ubvr, vec_ugvg, vec_bb, vec_bg, vec_br, vec_yg,
+ vec0, vec1, vec2);
+ STOREARGB(vec0, vec1, vec2, alpha, dst_argb);
+ src_uyvy += 16;
+ dst_argb += 32;
+ }
+}
+
+void InterpolateRow_MSA(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ int width,
+ int32_t source_y_fraction) {
+ int32_t y1_fraction = source_y_fraction;
+ int32_t y0_fraction = 256 - y1_fraction;
+ uint16_t y_fractions;
+ const uint8_t* s = src_ptr;
+ const uint8_t* t = src_ptr + src_stride;
+ int x;
+ v16u8 src0, src1, src2, src3, dst0, dst1;
+ v8u16 vec0, vec1, vec2, vec3, y_frac;
+
+ if (0 == y1_fraction) {
+ memcpy(dst_ptr, src_ptr, width);
+ return;
+ }
+
+ if (128 == y1_fraction) {
+ for (x = 0; x < width; x += 32) {
+ src0 = (v16u8)__msa_ld_b((const v16i8*)s, 0);
+ src1 = (v16u8)__msa_ld_b((const v16i8*)s, 16);
+ src2 = (v16u8)__msa_ld_b((const v16i8*)t, 0);
+ src3 = (v16u8)__msa_ld_b((const v16i8*)t, 16);
+ dst0 = __msa_aver_u_b(src0, src2);
+ dst1 = __msa_aver_u_b(src1, src3);
+ ST_UB2(dst0, dst1, dst_ptr, 16);
+ s += 32;
+ t += 32;
+ dst_ptr += 32;
+ }
+ return;
+ }
+
+ y_fractions = (uint16_t)(y0_fraction + (y1_fraction << 8));
+ y_frac = (v8u16)__msa_fill_h(y_fractions);
+
+ for (x = 0; x < width; x += 32) {
+ src0 = (v16u8)__msa_ld_b((const v16i8*)s, 0);
+ src1 = (v16u8)__msa_ld_b((const v16i8*)s, 16);
+ src2 = (v16u8)__msa_ld_b((const v16i8*)t, 0);
+ src3 = (v16u8)__msa_ld_b((const v16i8*)t, 16);
+ vec0 = (v8u16)__msa_ilvr_b((v16i8)src2, (v16i8)src0);
+ vec1 = (v8u16)__msa_ilvl_b((v16i8)src2, (v16i8)src0);
+ vec2 = (v8u16)__msa_ilvr_b((v16i8)src3, (v16i8)src1);
+ vec3 = (v8u16)__msa_ilvl_b((v16i8)src3, (v16i8)src1);
+ vec0 = (v8u16)__msa_dotp_u_h((v16u8)vec0, (v16u8)y_frac);
+ vec1 = (v8u16)__msa_dotp_u_h((v16u8)vec1, (v16u8)y_frac);
+ vec2 = (v8u16)__msa_dotp_u_h((v16u8)vec2, (v16u8)y_frac);
+ vec3 = (v8u16)__msa_dotp_u_h((v16u8)vec3, (v16u8)y_frac);
+ vec0 = (v8u16)__msa_srari_h((v8i16)vec0, 8);
+ vec1 = (v8u16)__msa_srari_h((v8i16)vec1, 8);
+ vec2 = (v8u16)__msa_srari_h((v8i16)vec2, 8);
+ vec3 = (v8u16)__msa_srari_h((v8i16)vec3, 8);
+ dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0);
+ dst1 = (v16u8)__msa_pckev_b((v16i8)vec3, (v16i8)vec2);
+ ST_UB2(dst0, dst1, dst_ptr, 16);
+ s += 32;
+ t += 32;
+ dst_ptr += 32;
+ }
+}
+
+void ARGBSetRow_MSA(uint8_t* dst_argb, uint32_t v32, int width) {
+ int x;
+ v4i32 dst0 = __builtin_msa_fill_w(v32);
+
+ for (x = 0; x < width; x += 4) {
+ ST_UB(dst0, dst_argb);
+ dst_argb += 16;
+ }
+}
+
+void RAWToRGB24Row_MSA(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) {
+ int x;
+ v16u8 src0, src1, src2, src3, src4, dst0, dst1, dst2;
+ v16i8 shuffler0 = {2, 1, 0, 5, 4, 3, 8, 7, 6, 11, 10, 9, 14, 13, 12, 17};
+ v16i8 shuffler1 = {8, 7, 12, 11, 10, 15, 14, 13,
+ 18, 17, 16, 21, 20, 19, 24, 23};
+ v16i8 shuffler2 = {14, 19, 18, 17, 22, 21, 20, 25,
+ 24, 23, 28, 27, 26, 31, 30, 29};
+
+ for (x = 0; x < width; x += 16) {
+ src0 = (v16u8)__msa_ld_b((const v16i8*)src_raw, 0);
+ src1 = (v16u8)__msa_ld_b((const v16i8*)src_raw, 16);
+ src2 = (v16u8)__msa_ld_b((const v16i8*)src_raw, 32);
+ src3 = (v16u8)__msa_sldi_b((v16i8)src1, (v16i8)src0, 8);
+ src4 = (v16u8)__msa_sldi_b((v16i8)src2, (v16i8)src1, 8);
+ dst0 = (v16u8)__msa_vshf_b(shuffler0, (v16i8)src1, (v16i8)src0);
+ dst1 = (v16u8)__msa_vshf_b(shuffler1, (v16i8)src4, (v16i8)src3);
+ dst2 = (v16u8)__msa_vshf_b(shuffler2, (v16i8)src2, (v16i8)src1);
+ ST_UB2(dst0, dst1, dst_rgb24, 16);
+ ST_UB(dst2, (dst_rgb24 + 32));
+ src_raw += 48;
+ dst_rgb24 += 48;
+ }
+}
+
+void MergeUVRow_MSA(const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_uv,
+ int width) {
+ int x;
+ v16u8 src0, src1, dst0, dst1;
+
+ for (x = 0; x < width; x += 16) {
+ src0 = (v16u8)__msa_ld_b((const v16i8*)src_u, 0);
+ src1 = (v16u8)__msa_ld_b((const v16i8*)src_v, 0);
+ dst0 = (v16u8)__msa_ilvr_b((v16i8)src1, (v16i8)src0);
+ dst1 = (v16u8)__msa_ilvl_b((v16i8)src1, (v16i8)src0);
+ ST_UB2(dst0, dst1, dst_uv, 16);
+ src_u += 16;
+ src_v += 16;
+ dst_uv += 32;
+ }
+}
+
+void ARGBExtractAlphaRow_MSA(const uint8_t* src_argb,
+ uint8_t* dst_a,
+ int width) {
+ int i;
+ v16u8 src0, src1, src2, src3, vec0, vec1, dst0;
+
+ for (i = 0; i < width; i += 16) {
+ src0 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 0);
+ src1 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 16);
+ src2 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 32);
+ src3 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 48);
+ vec0 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0);
+ vec1 = (v16u8)__msa_pckod_b((v16i8)src3, (v16i8)src2);
+ dst0 = (v16u8)__msa_pckod_b((v16i8)vec1, (v16i8)vec0);
+ ST_UB(dst0, dst_a);
+ src_argb += 64;
+ dst_a += 16;
+ }
+}
+
+void ARGBBlendRow_MSA(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
+ int width) {
+ int x;
+ v16u8 src0, src1, src2, src3, dst0, dst1;
+ v8u16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+ v8u16 vec8, vec9, vec10, vec11, vec12, vec13;
+ v8u16 const_256 = (v8u16)__msa_ldi_h(256);
+ v16u8 const_255 = (v16u8)__msa_ldi_b(255);
+ v16u8 mask = {0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255};
+ v16i8 zero = {0};
+
+ for (x = 0; x < width; x += 8) {
+ src0 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 0);
+ src1 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 16);
+ src2 = (v16u8)__msa_ld_b((const v16i8*)src_argb1, 0);
+ src3 = (v16u8)__msa_ld_b((const v16i8*)src_argb1, 16);
+ vec0 = (v8u16)__msa_ilvr_b(zero, (v16i8)src0);
+ vec1 = (v8u16)__msa_ilvl_b(zero, (v16i8)src0);
+ vec2 = (v8u16)__msa_ilvr_b(zero, (v16i8)src1);
+ vec3 = (v8u16)__msa_ilvl_b(zero, (v16i8)src1);
+ vec4 = (v8u16)__msa_ilvr_b(zero, (v16i8)src2);
+ vec5 = (v8u16)__msa_ilvl_b(zero, (v16i8)src2);
+ vec6 = (v8u16)__msa_ilvr_b(zero, (v16i8)src3);
+ vec7 = (v8u16)__msa_ilvl_b(zero, (v16i8)src3);
+ vec8 = (v8u16)__msa_fill_h(vec0[3]);
+ vec9 = (v8u16)__msa_fill_h(vec0[7]);
+ vec10 = (v8u16)__msa_fill_h(vec1[3]);
+ vec11 = (v8u16)__msa_fill_h(vec1[7]);
+ vec8 = (v8u16)__msa_pckev_d((v2i64)vec9, (v2i64)vec8);
+ vec9 = (v8u16)__msa_pckev_d((v2i64)vec11, (v2i64)vec10);
+ vec10 = (v8u16)__msa_fill_h(vec2[3]);
+ vec11 = (v8u16)__msa_fill_h(vec2[7]);
+ vec12 = (v8u16)__msa_fill_h(vec3[3]);
+ vec13 = (v8u16)__msa_fill_h(vec3[7]);
+ vec10 = (v8u16)__msa_pckev_d((v2i64)vec11, (v2i64)vec10);
+ vec11 = (v8u16)__msa_pckev_d((v2i64)vec13, (v2i64)vec12);
+ vec8 = const_256 - vec8;
+ vec9 = const_256 - vec9;
+ vec10 = const_256 - vec10;
+ vec11 = const_256 - vec11;
+ vec8 *= vec4;
+ vec9 *= vec5;
+ vec10 *= vec6;
+ vec11 *= vec7;
+ vec8 = (v8u16)__msa_srai_h((v8i16)vec8, 8);
+ vec9 = (v8u16)__msa_srai_h((v8i16)vec9, 8);
+ vec10 = (v8u16)__msa_srai_h((v8i16)vec10, 8);
+ vec11 = (v8u16)__msa_srai_h((v8i16)vec11, 8);
+ vec0 += vec8;
+ vec1 += vec9;
+ vec2 += vec10;
+ vec3 += vec11;
+ dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0);
+ dst1 = (v16u8)__msa_pckev_b((v16i8)vec3, (v16i8)vec2);
+ dst0 = __msa_bmnz_v(dst0, const_255, mask);
+ dst1 = __msa_bmnz_v(dst1, const_255, mask);
+ ST_UB2(dst0, dst1, dst_argb, 16);
+ src_argb0 += 32;
+ src_argb1 += 32;
+ dst_argb += 32;
+ }
+}
+
+void ARGBQuantizeRow_MSA(uint8_t* dst_argb,
+ int scale,
+ int interval_size,
+ int interval_offset,
+ int width) {
+ int x;
+ v16u8 src0, src1, src2, src3, dst0, dst1, dst2, dst3;
+ v8i16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+ v4i32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+ v4i32 tmp8, tmp9, tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
+ v4i32 vec_scale = __msa_fill_w(scale);
+ v16u8 vec_int_sz = (v16u8)__msa_fill_b(interval_size);
+ v16u8 vec_int_ofst = (v16u8)__msa_fill_b(interval_offset);
+ v16i8 mask = {0, 1, 2, 19, 4, 5, 6, 23, 8, 9, 10, 27, 12, 13, 14, 31};
+ v16i8 zero = {0};
+
+ for (x = 0; x < width; x += 8) {
+ src0 = (v16u8)__msa_ld_b((v16i8*)dst_argb, 0);
+ src1 = (v16u8)__msa_ld_b((v16i8*)dst_argb, 16);
+ src2 = (v16u8)__msa_ld_b((v16i8*)dst_argb, 32);
+ src3 = (v16u8)__msa_ld_b((v16i8*)dst_argb, 48);
+ vec0 = (v8i16)__msa_ilvr_b(zero, (v16i8)src0);
+ vec1 = (v8i16)__msa_ilvl_b(zero, (v16i8)src0);
+ vec2 = (v8i16)__msa_ilvr_b(zero, (v16i8)src1);
+ vec3 = (v8i16)__msa_ilvl_b(zero, (v16i8)src1);
+ vec4 = (v8i16)__msa_ilvr_b(zero, (v16i8)src2);
+ vec5 = (v8i16)__msa_ilvl_b(zero, (v16i8)src2);
+ vec6 = (v8i16)__msa_ilvr_b(zero, (v16i8)src3);
+ vec7 = (v8i16)__msa_ilvl_b(zero, (v16i8)src3);
+ tmp0 = (v4i32)__msa_ilvr_h((v8i16)zero, (v8i16)vec0);
+ tmp1 = (v4i32)__msa_ilvl_h((v8i16)zero, (v8i16)vec0);
+ tmp2 = (v4i32)__msa_ilvr_h((v8i16)zero, (v8i16)vec1);
+ tmp3 = (v4i32)__msa_ilvl_h((v8i16)zero, (v8i16)vec1);
+ tmp4 = (v4i32)__msa_ilvr_h((v8i16)zero, (v8i16)vec2);
+ tmp5 = (v4i32)__msa_ilvl_h((v8i16)zero, (v8i16)vec2);
+ tmp6 = (v4i32)__msa_ilvr_h((v8i16)zero, (v8i16)vec3);
+ tmp7 = (v4i32)__msa_ilvl_h((v8i16)zero, (v8i16)vec3);
+ tmp8 = (v4i32)__msa_ilvr_h((v8i16)zero, (v8i16)vec4);
+ tmp9 = (v4i32)__msa_ilvl_h((v8i16)zero, (v8i16)vec4);
+ tmp10 = (v4i32)__msa_ilvr_h((v8i16)zero, (v8i16)vec5);
+ tmp11 = (v4i32)__msa_ilvl_h((v8i16)zero, (v8i16)vec5);
+ tmp12 = (v4i32)__msa_ilvr_h((v8i16)zero, (v8i16)vec6);
+ tmp13 = (v4i32)__msa_ilvl_h((v8i16)zero, (v8i16)vec6);
+ tmp14 = (v4i32)__msa_ilvr_h((v8i16)zero, (v8i16)vec7);
+ tmp15 = (v4i32)__msa_ilvl_h((v8i16)zero, (v8i16)vec7);
+ tmp0 *= vec_scale;
+ tmp1 *= vec_scale;
+ tmp2 *= vec_scale;
+ tmp3 *= vec_scale;
+ tmp4 *= vec_scale;
+ tmp5 *= vec_scale;
+ tmp6 *= vec_scale;
+ tmp7 *= vec_scale;
+ tmp8 *= vec_scale;
+ tmp9 *= vec_scale;
+ tmp10 *= vec_scale;
+ tmp11 *= vec_scale;
+ tmp12 *= vec_scale;
+ tmp13 *= vec_scale;
+ tmp14 *= vec_scale;
+ tmp15 *= vec_scale;
+ tmp0 >>= 16;
+ tmp1 >>= 16;
+ tmp2 >>= 16;
+ tmp3 >>= 16;
+ tmp4 >>= 16;
+ tmp5 >>= 16;
+ tmp6 >>= 16;
+ tmp7 >>= 16;
+ tmp8 >>= 16;
+ tmp9 >>= 16;
+ tmp10 >>= 16;
+ tmp11 >>= 16;
+ tmp12 >>= 16;
+ tmp13 >>= 16;
+ tmp14 >>= 16;
+ tmp15 >>= 16;
+ vec0 = (v8i16)__msa_pckev_h((v8i16)tmp1, (v8i16)tmp0);
+ vec1 = (v8i16)__msa_pckev_h((v8i16)tmp3, (v8i16)tmp2);
+ vec2 = (v8i16)__msa_pckev_h((v8i16)tmp5, (v8i16)tmp4);
+ vec3 = (v8i16)__msa_pckev_h((v8i16)tmp7, (v8i16)tmp6);
+ vec4 = (v8i16)__msa_pckev_h((v8i16)tmp9, (v8i16)tmp8);
+ vec5 = (v8i16)__msa_pckev_h((v8i16)tmp11, (v8i16)tmp10);
+ vec6 = (v8i16)__msa_pckev_h((v8i16)tmp13, (v8i16)tmp12);
+ vec7 = (v8i16)__msa_pckev_h((v8i16)tmp15, (v8i16)tmp14);
+ dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0);
+ dst1 = (v16u8)__msa_pckev_b((v16i8)vec3, (v16i8)vec2);
+ dst2 = (v16u8)__msa_pckev_b((v16i8)vec5, (v16i8)vec4);
+ dst3 = (v16u8)__msa_pckev_b((v16i8)vec7, (v16i8)vec6);
+ dst0 *= vec_int_sz;
+ dst1 *= vec_int_sz;
+ dst2 *= vec_int_sz;
+ dst3 *= vec_int_sz;
+ dst0 += vec_int_ofst;
+ dst1 += vec_int_ofst;
+ dst2 += vec_int_ofst;
+ dst3 += vec_int_ofst;
+ dst0 = (v16u8)__msa_vshf_b(mask, (v16i8)src0, (v16i8)dst0);
+ dst1 = (v16u8)__msa_vshf_b(mask, (v16i8)src1, (v16i8)dst1);
+ dst2 = (v16u8)__msa_vshf_b(mask, (v16i8)src2, (v16i8)dst2);
+ dst3 = (v16u8)__msa_vshf_b(mask, (v16i8)src3, (v16i8)dst3);
+ ST_UB4(dst0, dst1, dst2, dst3, dst_argb, 16);
+ dst_argb += 64;
+ }
+}
+
+void ARGBColorMatrixRow_MSA(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ const int8_t* matrix_argb,
+ int width) {
+ int32_t x;
+ v16i8 src0;
+ v16u8 src1, src2, dst0, dst1;
+ v8i16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8, vec9;
+ v8i16 vec10, vec11, vec12, vec13, vec14, vec15, vec16, vec17;
+ v4i32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+ v4i32 tmp8, tmp9, tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
+ v16i8 zero = {0};
+ v8i16 max = __msa_ldi_h(255);
+
+ src0 = __msa_ld_b((v16i8*)matrix_argb, 0);
+ vec0 = (v8i16)__msa_ilvr_b(zero, src0);
+ vec1 = (v8i16)__msa_ilvl_b(zero, src0);
+
+ for (x = 0; x < width; x += 8) {
+ src1 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 0);
+ src2 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 16);
+ vec2 = (v8i16)__msa_ilvr_b(zero, (v16i8)src1);
+ vec3 = (v8i16)__msa_ilvl_b(zero, (v16i8)src1);
+ vec4 = (v8i16)__msa_ilvr_b(zero, (v16i8)src2);
+ vec5 = (v8i16)__msa_ilvl_b(zero, (v16i8)src2);
+ vec6 = (v8i16)__msa_pckod_d((v2i64)vec2, (v2i64)vec2);
+ vec7 = (v8i16)__msa_pckod_d((v2i64)vec3, (v2i64)vec3);
+ vec8 = (v8i16)__msa_pckod_d((v2i64)vec4, (v2i64)vec4);
+ vec9 = (v8i16)__msa_pckod_d((v2i64)vec5, (v2i64)vec5);
+ vec2 = (v8i16)__msa_pckev_d((v2i64)vec2, (v2i64)vec2);
+ vec3 = (v8i16)__msa_pckev_d((v2i64)vec3, (v2i64)vec3);
+ vec4 = (v8i16)__msa_pckev_d((v2i64)vec4, (v2i64)vec4);
+ vec5 = (v8i16)__msa_pckev_d((v2i64)vec5, (v2i64)vec5);
+ vec10 = vec2 * vec0;
+ vec11 = vec2 * vec1;
+ vec12 = vec6 * vec0;
+ vec13 = vec6 * vec1;
+ tmp0 = __msa_hadd_s_w(vec10, vec10);
+ tmp1 = __msa_hadd_s_w(vec11, vec11);
+ tmp2 = __msa_hadd_s_w(vec12, vec12);
+ tmp3 = __msa_hadd_s_w(vec13, vec13);
+ vec14 = vec3 * vec0;
+ vec15 = vec3 * vec1;
+ vec16 = vec7 * vec0;
+ vec17 = vec7 * vec1;
+ tmp4 = __msa_hadd_s_w(vec14, vec14);
+ tmp5 = __msa_hadd_s_w(vec15, vec15);
+ tmp6 = __msa_hadd_s_w(vec16, vec16);
+ tmp7 = __msa_hadd_s_w(vec17, vec17);
+ vec10 = __msa_pckev_h((v8i16)tmp1, (v8i16)tmp0);
+ vec11 = __msa_pckev_h((v8i16)tmp3, (v8i16)tmp2);
+ vec12 = __msa_pckev_h((v8i16)tmp5, (v8i16)tmp4);
+ vec13 = __msa_pckev_h((v8i16)tmp7, (v8i16)tmp6);
+ tmp0 = __msa_hadd_s_w(vec10, vec10);
+ tmp1 = __msa_hadd_s_w(vec11, vec11);
+ tmp2 = __msa_hadd_s_w(vec12, vec12);
+ tmp3 = __msa_hadd_s_w(vec13, vec13);
+ tmp0 = __msa_srai_w(tmp0, 6);
+ tmp1 = __msa_srai_w(tmp1, 6);
+ tmp2 = __msa_srai_w(tmp2, 6);
+ tmp3 = __msa_srai_w(tmp3, 6);
+ vec2 = vec4 * vec0;
+ vec6 = vec4 * vec1;
+ vec3 = vec8 * vec0;
+ vec7 = vec8 * vec1;
+ tmp8 = __msa_hadd_s_w(vec2, vec2);
+ tmp9 = __msa_hadd_s_w(vec6, vec6);
+ tmp10 = __msa_hadd_s_w(vec3, vec3);
+ tmp11 = __msa_hadd_s_w(vec7, vec7);
+ vec4 = vec5 * vec0;
+ vec8 = vec5 * vec1;
+ vec5 = vec9 * vec0;
+ vec9 = vec9 * vec1;
+ tmp12 = __msa_hadd_s_w(vec4, vec4);
+ tmp13 = __msa_hadd_s_w(vec8, vec8);
+ tmp14 = __msa_hadd_s_w(vec5, vec5);
+ tmp15 = __msa_hadd_s_w(vec9, vec9);
+ vec14 = __msa_pckev_h((v8i16)tmp9, (v8i16)tmp8);
+ vec15 = __msa_pckev_h((v8i16)tmp11, (v8i16)tmp10);
+ vec16 = __msa_pckev_h((v8i16)tmp13, (v8i16)tmp12);
+ vec17 = __msa_pckev_h((v8i16)tmp15, (v8i16)tmp14);
+ tmp4 = __msa_hadd_s_w(vec14, vec14);
+ tmp5 = __msa_hadd_s_w(vec15, vec15);
+ tmp6 = __msa_hadd_s_w(vec16, vec16);
+ tmp7 = __msa_hadd_s_w(vec17, vec17);
+ tmp4 = __msa_srai_w(tmp4, 6);
+ tmp5 = __msa_srai_w(tmp5, 6);
+ tmp6 = __msa_srai_w(tmp6, 6);
+ tmp7 = __msa_srai_w(tmp7, 6);
+ vec10 = __msa_pckev_h((v8i16)tmp1, (v8i16)tmp0);
+ vec11 = __msa_pckev_h((v8i16)tmp3, (v8i16)tmp2);
+ vec12 = __msa_pckev_h((v8i16)tmp5, (v8i16)tmp4);
+ vec13 = __msa_pckev_h((v8i16)tmp7, (v8i16)tmp6);
+ vec10 = __msa_maxi_s_h(vec10, 0);
+ vec11 = __msa_maxi_s_h(vec11, 0);
+ vec12 = __msa_maxi_s_h(vec12, 0);
+ vec13 = __msa_maxi_s_h(vec13, 0);
+ vec10 = __msa_min_s_h(vec10, max);
+ vec11 = __msa_min_s_h(vec11, max);
+ vec12 = __msa_min_s_h(vec12, max);
+ vec13 = __msa_min_s_h(vec13, max);
+ dst0 = (v16u8)__msa_pckev_b((v16i8)vec11, (v16i8)vec10);
+ dst1 = (v16u8)__msa_pckev_b((v16i8)vec13, (v16i8)vec12);
+ ST_UB2(dst0, dst1, dst_argb, 16);
+ src_argb += 32;
+ dst_argb += 32;
+ }
+}
+
+void SplitUVRow_MSA(const uint8_t* src_uv,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ int x;
+ v16u8 src0, src1, src2, src3, dst0, dst1, dst2, dst3;
+
+ for (x = 0; x < width; x += 32) {
+ src0 = (v16u8)__msa_ld_b((const v16i8*)src_uv, 0);
+ src1 = (v16u8)__msa_ld_b((const v16i8*)src_uv, 16);
+ src2 = (v16u8)__msa_ld_b((const v16i8*)src_uv, 32);
+ src3 = (v16u8)__msa_ld_b((const v16i8*)src_uv, 48);
+ dst0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0);
+ dst1 = (v16u8)__msa_pckev_b((v16i8)src3, (v16i8)src2);
+ dst2 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0);
+ dst3 = (v16u8)__msa_pckod_b((v16i8)src3, (v16i8)src2);
+ ST_UB2(dst0, dst1, dst_u, 16);
+ ST_UB2(dst2, dst3, dst_v, 16);
+ src_uv += 64;
+ dst_u += 32;
+ dst_v += 32;
+ }
+}
+
+void SetRow_MSA(uint8_t* dst, uint8_t v8, int width) {
+ int x;
+ v16u8 dst0 = (v16u8)__msa_fill_b(v8);
+
+ for (x = 0; x < width; x += 16) {
+ ST_UB(dst0, dst);
+ dst += 16;
+ }
+}
+
+void MirrorUVRow_MSA(const uint8_t* src_uv,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ int x;
+ v16u8 src0, src1, src2, src3;
+ v16u8 dst0, dst1, dst2, dst3;
+ v16i8 mask0 = {30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0};
+ v16i8 mask1 = {31, 29, 27, 25, 23, 21, 19, 17, 15, 13, 11, 9, 7, 5, 3, 1};
+
+ src_uv += (2 * width);
+
+ for (x = 0; x < width; x += 32) {
+ src_uv -= 64;
+ src2 = (v16u8)__msa_ld_b((const v16i8*)src_uv, 0);
+ src3 = (v16u8)__msa_ld_b((const v16i8*)src_uv, 16);
+ src0 = (v16u8)__msa_ld_b((const v16i8*)src_uv, 32);
+ src1 = (v16u8)__msa_ld_b((const v16i8*)src_uv, 48);
+ dst0 = (v16u8)__msa_vshf_b(mask1, (v16i8)src1, (v16i8)src0);
+ dst1 = (v16u8)__msa_vshf_b(mask1, (v16i8)src3, (v16i8)src2);
+ dst2 = (v16u8)__msa_vshf_b(mask0, (v16i8)src1, (v16i8)src0);
+ dst3 = (v16u8)__msa_vshf_b(mask0, (v16i8)src3, (v16i8)src2);
+ ST_UB2(dst0, dst1, dst_v, 16);
+ ST_UB2(dst2, dst3, dst_u, 16);
+ dst_u += 32;
+ dst_v += 32;
+ }
+}
+
+void SobelXRow_MSA(const uint8_t* src_y0,
+ const uint8_t* src_y1,
+ const uint8_t* src_y2,
+ uint8_t* dst_sobelx,
+ int32_t width) {
+ int x;
+ v16u8 src0, src1, src2, src3, src4, src5, dst0;
+ v8i16 vec0, vec1, vec2, vec3, vec4, vec5;
+ v16i8 mask0 = {0, 2, 1, 3, 2, 4, 3, 5, 4, 6, 5, 7, 6, 8, 7, 9};
+ v16i8 tmp = __msa_ldi_b(8);
+ v16i8 mask1 = mask0 + tmp;
+ v8i16 zero = {0};
+ v8i16 max = __msa_ldi_h(255);
+
+ for (x = 0; x < width; x += 16) {
+ src0 = (v16u8)__msa_ld_b((const v16i8*)src_y0, 0);
+ src1 = (v16u8)__msa_ld_b((const v16i8*)src_y0, 16);
+ src2 = (v16u8)__msa_ld_b((const v16i8*)src_y1, 0);
+ src3 = (v16u8)__msa_ld_b((const v16i8*)src_y1, 16);
+ src4 = (v16u8)__msa_ld_b((const v16i8*)src_y2, 0);
+ src5 = (v16u8)__msa_ld_b((const v16i8*)src_y2, 16);
+ vec0 = (v8i16)__msa_vshf_b(mask0, (v16i8)src1, (v16i8)src0);
+ vec1 = (v8i16)__msa_vshf_b(mask1, (v16i8)src1, (v16i8)src0);
+ vec2 = (v8i16)__msa_vshf_b(mask0, (v16i8)src3, (v16i8)src2);
+ vec3 = (v8i16)__msa_vshf_b(mask1, (v16i8)src3, (v16i8)src2);
+ vec4 = (v8i16)__msa_vshf_b(mask0, (v16i8)src5, (v16i8)src4);
+ vec5 = (v8i16)__msa_vshf_b(mask1, (v16i8)src5, (v16i8)src4);
+ vec0 = (v8i16)__msa_hsub_u_h((v16u8)vec0, (v16u8)vec0);
+ vec1 = (v8i16)__msa_hsub_u_h((v16u8)vec1, (v16u8)vec1);
+ vec2 = (v8i16)__msa_hsub_u_h((v16u8)vec2, (v16u8)vec2);
+ vec3 = (v8i16)__msa_hsub_u_h((v16u8)vec3, (v16u8)vec3);
+ vec4 = (v8i16)__msa_hsub_u_h((v16u8)vec4, (v16u8)vec4);
+ vec5 = (v8i16)__msa_hsub_u_h((v16u8)vec5, (v16u8)vec5);
+ vec0 += vec2;
+ vec1 += vec3;
+ vec4 += vec2;
+ vec5 += vec3;
+ vec0 += vec4;
+ vec1 += vec5;
+ vec0 = __msa_add_a_h(zero, vec0);
+ vec1 = __msa_add_a_h(zero, vec1);
+ vec0 = __msa_maxi_s_h(vec0, 0);
+ vec1 = __msa_maxi_s_h(vec1, 0);
+ vec0 = __msa_min_s_h(max, vec0);
+ vec1 = __msa_min_s_h(max, vec1);
+ dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0);
+ ST_UB(dst0, dst_sobelx);
+ src_y0 += 16;
+ src_y1 += 16;
+ src_y2 += 16;
+ dst_sobelx += 16;
+ }
+}
+
+void SobelYRow_MSA(const uint8_t* src_y0,
+ const uint8_t* src_y1,
+ uint8_t* dst_sobely,
+ int32_t width) {
+ int x;
+ v16u8 src0, src1, dst0;
+ v8i16 vec0, vec1, vec2, vec3, vec4, vec5, vec6;
+ v8i16 zero = {0};
+ v8i16 max = __msa_ldi_h(255);
+
+ for (x = 0; x < width; x += 16) {
+ src0 = (v16u8)__msa_ld_b((const v16i8*)src_y0, 0);
+ src1 = (v16u8)__msa_ld_b((const v16i8*)src_y1, 0);
+ vec0 = (v8i16)__msa_ilvr_b((v16i8)zero, (v16i8)src0);
+ vec1 = (v8i16)__msa_ilvl_b((v16i8)zero, (v16i8)src0);
+ vec2 = (v8i16)__msa_ilvr_b((v16i8)zero, (v16i8)src1);
+ vec3 = (v8i16)__msa_ilvl_b((v16i8)zero, (v16i8)src1);
+ vec0 -= vec2;
+ vec1 -= vec3;
+ vec6[0] = src_y0[16] - src_y1[16];
+ vec6[1] = src_y0[17] - src_y1[17];
+ vec2 = (v8i16)__msa_sldi_b((v16i8)vec1, (v16i8)vec0, 2);
+ vec3 = (v8i16)__msa_sldi_b((v16i8)vec6, (v16i8)vec1, 2);
+ vec4 = (v8i16)__msa_sldi_b((v16i8)vec1, (v16i8)vec0, 4);
+ vec5 = (v8i16)__msa_sldi_b((v16i8)vec6, (v16i8)vec1, 4);
+ vec0 += vec2;
+ vec1 += vec3;
+ vec4 += vec2;
+ vec5 += vec3;
+ vec0 += vec4;
+ vec1 += vec5;
+ vec0 = __msa_add_a_h(zero, vec0);
+ vec1 = __msa_add_a_h(zero, vec1);
+ vec0 = __msa_maxi_s_h(vec0, 0);
+ vec1 = __msa_maxi_s_h(vec1, 0);
+ vec0 = __msa_min_s_h(max, vec0);
+ vec1 = __msa_min_s_h(max, vec1);
+ dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0);
+ ST_UB(dst0, dst_sobely);
+ src_y0 += 16;
+ src_y1 += 16;
+ dst_sobely += 16;
+ }
+}
+
+void HalfFloatRow_MSA(const uint16_t* src,
+ uint16_t* dst,
+ float scale,
+ int width) {
+ int i;
+ v8u16 src0, src1, src2, src3, dst0, dst1, dst2, dst3;
+ v4u32 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+ v4f32 fvec0, fvec1, fvec2, fvec3, fvec4, fvec5, fvec6, fvec7;
+ v4f32 mult_vec;
+ v8i16 zero = {0};
+ mult_vec[0] = 1.9259299444e-34f * scale;
+ mult_vec = (v4f32)__msa_splati_w((v4i32)mult_vec, 0);
+
+ for (i = 0; i < width; i += 32) {
+ src0 = (v8u16)__msa_ld_h((v8i16*)src, 0);
+ src1 = (v8u16)__msa_ld_h((v8i16*)src, 16);
+ src2 = (v8u16)__msa_ld_h((v8i16*)src, 32);
+ src3 = (v8u16)__msa_ld_h((v8i16*)src, 48);
+ vec0 = (v4u32)__msa_ilvr_h(zero, (v8i16)src0);
+ vec1 = (v4u32)__msa_ilvl_h(zero, (v8i16)src0);
+ vec2 = (v4u32)__msa_ilvr_h(zero, (v8i16)src1);
+ vec3 = (v4u32)__msa_ilvl_h(zero, (v8i16)src1);
+ vec4 = (v4u32)__msa_ilvr_h(zero, (v8i16)src2);
+ vec5 = (v4u32)__msa_ilvl_h(zero, (v8i16)src2);
+ vec6 = (v4u32)__msa_ilvr_h(zero, (v8i16)src3);
+ vec7 = (v4u32)__msa_ilvl_h(zero, (v8i16)src3);
+ fvec0 = __msa_ffint_u_w(vec0);
+ fvec1 = __msa_ffint_u_w(vec1);
+ fvec2 = __msa_ffint_u_w(vec2);
+ fvec3 = __msa_ffint_u_w(vec3);
+ fvec4 = __msa_ffint_u_w(vec4);
+ fvec5 = __msa_ffint_u_w(vec5);
+ fvec6 = __msa_ffint_u_w(vec6);
+ fvec7 = __msa_ffint_u_w(vec7);
+ fvec0 *= mult_vec;
+ fvec1 *= mult_vec;
+ fvec2 *= mult_vec;
+ fvec3 *= mult_vec;
+ fvec4 *= mult_vec;
+ fvec5 *= mult_vec;
+ fvec6 *= mult_vec;
+ fvec7 *= mult_vec;
+ vec0 = ((v4u32)fvec0) >> 13;
+ vec1 = ((v4u32)fvec1) >> 13;
+ vec2 = ((v4u32)fvec2) >> 13;
+ vec3 = ((v4u32)fvec3) >> 13;
+ vec4 = ((v4u32)fvec4) >> 13;
+ vec5 = ((v4u32)fvec5) >> 13;
+ vec6 = ((v4u32)fvec6) >> 13;
+ vec7 = ((v4u32)fvec7) >> 13;
+ dst0 = (v8u16)__msa_pckev_h((v8i16)vec1, (v8i16)vec0);
+ dst1 = (v8u16)__msa_pckev_h((v8i16)vec3, (v8i16)vec2);
+ dst2 = (v8u16)__msa_pckev_h((v8i16)vec5, (v8i16)vec4);
+ dst3 = (v8u16)__msa_pckev_h((v8i16)vec7, (v8i16)vec6);
+ ST_UH2(dst0, dst1, dst, 8);
+ ST_UH2(dst2, dst3, dst + 16, 8);
+ src += 32;
+ dst += 32;
+ }
+}
+
+#ifdef __cplusplus
+} // extern "C"
+} // namespace libyuv
+#endif
+
+#endif // !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
diff --git a/media/libvpx/libvpx/third_party/libyuv/source/row_neon.cc b/media/libvpx/libvpx/third_party/libyuv/source/row_neon.cc
new file mode 100644
index 0000000000..ff87e74c62
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/libyuv/source/row_neon.cc
@@ -0,0 +1,2693 @@
+/*
+ * Copyright 2011 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/row.h"
+
+#include <stdio.h>
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+// This module is for GCC Neon
+#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__) && \
+ !defined(__aarch64__)
+
+// Read 8 Y, 4 U and 4 V from 422
+#define READYUV422 \
+ "vld1.8 {d0}, [%0]! \n" \
+ "vld1.32 {d2[0]}, [%1]! \n" \
+ "vld1.32 {d2[1]}, [%2]! \n"
+
+// Read 8 Y, 8 U and 8 V from 444
+#define READYUV444 \
+ "vld1.8 {d0}, [%0]! \n" \
+ "vld1.8 {d2}, [%1]! \n" \
+ "vld1.8 {d3}, [%2]! \n" \
+ "vpaddl.u8 q1, q1 \n" \
+ "vrshrn.u16 d2, q1, #1 \n"
+
+// Read 8 Y, and set 4 U and 4 V to 128
+#define READYUV400 \
+ "vld1.8 {d0}, [%0]! \n" \
+ "vmov.u8 d2, #128 \n"
+
+// Read 8 Y and 4 UV from NV12
+#define READNV12 \
+ "vld1.8 {d0}, [%0]! \n" \
+ "vld1.8 {d2}, [%1]! \n" \
+ "vmov.u8 d3, d2 \n" /* split odd/even uv apart */ \
+ "vuzp.u8 d2, d3 \n" \
+ "vtrn.u32 d2, d3 \n"
+
+// Read 8 Y and 4 VU from NV21
+#define READNV21 \
+ "vld1.8 {d0}, [%0]! \n" \
+ "vld1.8 {d2}, [%1]! \n" \
+ "vmov.u8 d3, d2 \n" /* split odd/even uv apart */ \
+ "vuzp.u8 d3, d2 \n" \
+ "vtrn.u32 d2, d3 \n"
+
+// Read 8 YUY2
+#define READYUY2 \
+ "vld2.8 {d0, d2}, [%0]! \n" \
+ "vmov.u8 d3, d2 \n" \
+ "vuzp.u8 d2, d3 \n" \
+ "vtrn.u32 d2, d3 \n"
+
+// Read 8 UYVY
+#define READUYVY \
+ "vld2.8 {d2, d3}, [%0]! \n" \
+ "vmov.u8 d0, d3 \n" \
+ "vmov.u8 d3, d2 \n" \
+ "vuzp.u8 d2, d3 \n" \
+ "vtrn.u32 d2, d3 \n"
+
+#define YUVTORGB_SETUP \
+ "vld1.8 {d24}, [%[kUVToRB]] \n" \
+ "vld1.8 {d25}, [%[kUVToG]] \n" \
+ "vld1.16 {d26[], d27[]}, [%[kUVBiasBGR]]! \n" \
+ "vld1.16 {d8[], d9[]}, [%[kUVBiasBGR]]! \n" \
+ "vld1.16 {d28[], d29[]}, [%[kUVBiasBGR]] \n" \
+ "vld1.32 {d30[], d31[]}, [%[kYToRgb]] \n"
+
+#define YUVTORGB \
+ "vmull.u8 q8, d2, d24 \n" /* u/v B/R component */ \
+ "vmull.u8 q9, d2, d25 \n" /* u/v G component */ \
+ "vmovl.u8 q0, d0 \n" /* Y */ \
+ "vmovl.s16 q10, d1 \n" \
+ "vmovl.s16 q0, d0 \n" \
+ "vmul.s32 q10, q10, q15 \n" \
+ "vmul.s32 q0, q0, q15 \n" \
+ "vqshrun.s32 d0, q0, #16 \n" \
+ "vqshrun.s32 d1, q10, #16 \n" /* Y */ \
+ "vadd.s16 d18, d19 \n" \
+ "vshll.u16 q1, d16, #16 \n" /* Replicate u * UB */ \
+ "vshll.u16 q10, d17, #16 \n" /* Replicate v * VR */ \
+ "vshll.u16 q3, d18, #16 \n" /* Replicate (v*VG + u*UG)*/ \
+ "vaddw.u16 q1, q1, d16 \n" \
+ "vaddw.u16 q10, q10, d17 \n" \
+ "vaddw.u16 q3, q3, d18 \n" \
+ "vqadd.s16 q8, q0, q13 \n" /* B */ \
+ "vqadd.s16 q9, q0, q14 \n" /* R */ \
+ "vqadd.s16 q0, q0, q4 \n" /* G */ \
+ "vqadd.s16 q8, q8, q1 \n" /* B */ \
+ "vqadd.s16 q9, q9, q10 \n" /* R */ \
+ "vqsub.s16 q0, q0, q3 \n" /* G */ \
+ "vqshrun.s16 d20, q8, #6 \n" /* B */ \
+ "vqshrun.s16 d22, q9, #6 \n" /* R */ \
+ "vqshrun.s16 d21, q0, #6 \n" /* G */
+
+void I444ToARGBRow_NEON(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ asm volatile(
+ YUVTORGB_SETUP
+ "vmov.u8 d23, #255 \n"
+ "1: \n" READYUV444 YUVTORGB
+ "subs %4, %4, #8 \n"
+ "vst4.8 {d20, d21, d22, d23}, [%3]! \n"
+ "bgt 1b \n"
+ : "+r"(src_y), // %0
+ "+r"(src_u), // %1
+ "+r"(src_v), // %2
+ "+r"(dst_argb), // %3
+ "+r"(width) // %4
+ : [kUVToRB] "r"(&yuvconstants->kUVToRB),
+ [kUVToG] "r"(&yuvconstants->kUVToG),
+ [kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR),
+ [kYToRgb] "r"(&yuvconstants->kYToRgb)
+ : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", "q10", "q11",
+ "q12", "q13", "q14", "q15");
+}
+
+void I422ToARGBRow_NEON(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ asm volatile(
+ YUVTORGB_SETUP
+ "vmov.u8 d23, #255 \n"
+ "1: \n" READYUV422 YUVTORGB
+ "subs %4, %4, #8 \n"
+ "vst4.8 {d20, d21, d22, d23}, [%3]! \n"
+ "bgt 1b \n"
+ : "+r"(src_y), // %0
+ "+r"(src_u), // %1
+ "+r"(src_v), // %2
+ "+r"(dst_argb), // %3
+ "+r"(width) // %4
+ : [kUVToRB] "r"(&yuvconstants->kUVToRB),
+ [kUVToG] "r"(&yuvconstants->kUVToG),
+ [kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR),
+ [kYToRgb] "r"(&yuvconstants->kYToRgb)
+ : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", "q10", "q11",
+ "q12", "q13", "q14", "q15");
+}
+
+void I422AlphaToARGBRow_NEON(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ const uint8_t* src_a,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ asm volatile(
+ YUVTORGB_SETUP
+ "1: \n" READYUV422 YUVTORGB
+ "subs %5, %5, #8 \n"
+ "vld1.8 {d23}, [%3]! \n"
+ "vst4.8 {d20, d21, d22, d23}, [%4]! \n"
+ "bgt 1b \n"
+ : "+r"(src_y), // %0
+ "+r"(src_u), // %1
+ "+r"(src_v), // %2
+ "+r"(src_a), // %3
+ "+r"(dst_argb), // %4
+ "+r"(width) // %5
+ : [kUVToRB] "r"(&yuvconstants->kUVToRB),
+ [kUVToG] "r"(&yuvconstants->kUVToG),
+ [kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR),
+ [kYToRgb] "r"(&yuvconstants->kYToRgb)
+ : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", "q10", "q11",
+ "q12", "q13", "q14", "q15");
+}
+
+void I422ToRGBARow_NEON(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_rgba,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ asm volatile(
+ YUVTORGB_SETUP
+ "1: \n" READYUV422 YUVTORGB
+ "subs %4, %4, #8 \n"
+ "vmov.u8 d19, #255 \n" // YUVTORGB modified d19
+ "vst4.8 {d19, d20, d21, d22}, [%3]! \n"
+ "bgt 1b \n"
+ : "+r"(src_y), // %0
+ "+r"(src_u), // %1
+ "+r"(src_v), // %2
+ "+r"(dst_rgba), // %3
+ "+r"(width) // %4
+ : [kUVToRB] "r"(&yuvconstants->kUVToRB),
+ [kUVToG] "r"(&yuvconstants->kUVToG),
+ [kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR),
+ [kYToRgb] "r"(&yuvconstants->kYToRgb)
+ : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", "q10", "q11",
+ "q12", "q13", "q14", "q15");
+}
+
+void I422ToRGB24Row_NEON(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_rgb24,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ asm volatile(
+ YUVTORGB_SETUP
+ "1: \n" READYUV422 YUVTORGB
+ "subs %4, %4, #8 \n"
+ "vst3.8 {d20, d21, d22}, [%3]! \n"
+ "bgt 1b \n"
+ : "+r"(src_y), // %0
+ "+r"(src_u), // %1
+ "+r"(src_v), // %2
+ "+r"(dst_rgb24), // %3
+ "+r"(width) // %4
+ : [kUVToRB] "r"(&yuvconstants->kUVToRB),
+ [kUVToG] "r"(&yuvconstants->kUVToG),
+ [kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR),
+ [kYToRgb] "r"(&yuvconstants->kYToRgb)
+ : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", "q10", "q11",
+ "q12", "q13", "q14", "q15");
+}
+
+#define ARGBTORGB565 \
+ "vshll.u8 q0, d22, #8 \n" /* R */ \
+ "vshll.u8 q8, d21, #8 \n" /* G */ \
+ "vshll.u8 q9, d20, #8 \n" /* B */ \
+ "vsri.16 q0, q8, #5 \n" /* RG */ \
+ "vsri.16 q0, q9, #11 \n" /* RGB */
+
+void I422ToRGB565Row_NEON(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_rgb565,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ asm volatile(
+ YUVTORGB_SETUP
+ "1: \n" READYUV422 YUVTORGB
+ "subs %4, %4, #8 \n" ARGBTORGB565
+ "vst1.8 {q0}, [%3]! \n" // store 8 pixels RGB565.
+ "bgt 1b \n"
+ : "+r"(src_y), // %0
+ "+r"(src_u), // %1
+ "+r"(src_v), // %2
+ "+r"(dst_rgb565), // %3
+ "+r"(width) // %4
+ : [kUVToRB] "r"(&yuvconstants->kUVToRB),
+ [kUVToG] "r"(&yuvconstants->kUVToG),
+ [kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR),
+ [kYToRgb] "r"(&yuvconstants->kYToRgb)
+ : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", "q10", "q11",
+ "q12", "q13", "q14", "q15");
+}
+
+#define ARGBTOARGB1555 \
+ "vshll.u8 q0, d23, #8 \n" /* A */ \
+ "vshll.u8 q8, d22, #8 \n" /* R */ \
+ "vshll.u8 q9, d21, #8 \n" /* G */ \
+ "vshll.u8 q10, d20, #8 \n" /* B */ \
+ "vsri.16 q0, q8, #1 \n" /* AR */ \
+ "vsri.16 q0, q9, #6 \n" /* ARG */ \
+ "vsri.16 q0, q10, #11 \n" /* ARGB */
+
+void I422ToARGB1555Row_NEON(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb1555,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ asm volatile(
+ YUVTORGB_SETUP
+ "1: \n" READYUV422 YUVTORGB
+ "subs %4, %4, #8 \n"
+ "vmov.u8 d23, #255 \n" ARGBTOARGB1555
+ "vst1.8 {q0}, [%3]! \n" // store 8 pixels
+ "bgt 1b \n"
+ : "+r"(src_y), // %0
+ "+r"(src_u), // %1
+ "+r"(src_v), // %2
+ "+r"(dst_argb1555), // %3
+ "+r"(width) // %4
+ : [kUVToRB] "r"(&yuvconstants->kUVToRB),
+ [kUVToG] "r"(&yuvconstants->kUVToG),
+ [kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR),
+ [kYToRgb] "r"(&yuvconstants->kYToRgb)
+ : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", "q10", "q11",
+ "q12", "q13", "q14", "q15");
+}
+
+#define ARGBTOARGB4444 \
+ "vshr.u8 d20, d20, #4 \n" /* B */ \
+ "vbic.32 d21, d21, d4 \n" /* G */ \
+ "vshr.u8 d22, d22, #4 \n" /* R */ \
+ "vbic.32 d23, d23, d4 \n" /* A */ \
+ "vorr d0, d20, d21 \n" /* BG */ \
+ "vorr d1, d22, d23 \n" /* RA */ \
+ "vzip.u8 d0, d1 \n" /* BGRA */
+
+void I422ToARGB4444Row_NEON(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb4444,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ asm volatile(
+ YUVTORGB_SETUP
+ "vmov.u8 d4, #0x0f \n" // vbic bits to clear
+ "1: \n"
+
+ READYUV422 YUVTORGB
+ "subs %4, %4, #8 \n"
+ "vmov.u8 d23, #255 \n" ARGBTOARGB4444
+ "vst1.8 {q0}, [%3]! \n" // store 8 pixels
+ "bgt 1b \n"
+ : "+r"(src_y), // %0
+ "+r"(src_u), // %1
+ "+r"(src_v), // %2
+ "+r"(dst_argb4444), // %3
+ "+r"(width) // %4
+ : [kUVToRB] "r"(&yuvconstants->kUVToRB),
+ [kUVToG] "r"(&yuvconstants->kUVToG),
+ [kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR),
+ [kYToRgb] "r"(&yuvconstants->kYToRgb)
+ : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", "q10", "q11",
+ "q12", "q13", "q14", "q15");
+}
+
+void I400ToARGBRow_NEON(const uint8_t* src_y, uint8_t* dst_argb, int width) {
+ asm volatile(
+ YUVTORGB_SETUP
+ "vmov.u8 d23, #255 \n"
+ "1: \n" READYUV400 YUVTORGB
+ "subs %2, %2, #8 \n"
+ "vst4.8 {d20, d21, d22, d23}, [%1]! \n"
+ "bgt 1b \n"
+ : "+r"(src_y), // %0
+ "+r"(dst_argb), // %1
+ "+r"(width) // %2
+ : [kUVToRB] "r"(&kYuvI601Constants.kUVToRB),
+ [kUVToG] "r"(&kYuvI601Constants.kUVToG),
+ [kUVBiasBGR] "r"(&kYuvI601Constants.kUVBiasBGR),
+ [kYToRgb] "r"(&kYuvI601Constants.kYToRgb)
+ : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", "q10", "q11",
+ "q12", "q13", "q14", "q15");
+}
+
+void J400ToARGBRow_NEON(const uint8_t* src_y, uint8_t* dst_argb, int width) {
+ asm volatile(
+ "vmov.u8 d23, #255 \n"
+ "1: \n"
+ "vld1.8 {d20}, [%0]! \n"
+ "vmov d21, d20 \n"
+ "vmov d22, d20 \n"
+ "subs %2, %2, #8 \n"
+ "vst4.8 {d20, d21, d22, d23}, [%1]! \n"
+ "bgt 1b \n"
+ : "+r"(src_y), // %0
+ "+r"(dst_argb), // %1
+ "+r"(width) // %2
+ :
+ : "cc", "memory", "d20", "d21", "d22", "d23");
+}
+
+void NV12ToARGBRow_NEON(const uint8_t* src_y,
+ const uint8_t* src_uv,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ asm volatile(YUVTORGB_SETUP
+ "vmov.u8 d23, #255 \n"
+ "1: \n" READNV12 YUVTORGB
+ "subs %3, %3, #8 \n"
+ "vst4.8 {d20, d21, d22, d23}, [%2]! \n"
+ "bgt 1b \n"
+ : "+r"(src_y), // %0
+ "+r"(src_uv), // %1
+ "+r"(dst_argb), // %2
+ "+r"(width) // %3
+ : [kUVToRB] "r"(&yuvconstants->kUVToRB),
+ [kUVToG] "r"(&yuvconstants->kUVToG),
+ [kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR),
+ [kYToRgb] "r"(&yuvconstants->kYToRgb)
+ : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9",
+ "q10", "q11", "q12", "q13", "q14", "q15");
+}
+
+void NV21ToARGBRow_NEON(const uint8_t* src_y,
+ const uint8_t* src_vu,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ asm volatile(YUVTORGB_SETUP
+ "vmov.u8 d23, #255 \n"
+ "1: \n" READNV21 YUVTORGB
+ "subs %3, %3, #8 \n"
+ "vst4.8 {d20, d21, d22, d23}, [%2]! \n"
+ "bgt 1b \n"
+ : "+r"(src_y), // %0
+ "+r"(src_vu), // %1
+ "+r"(dst_argb), // %2
+ "+r"(width) // %3
+ : [kUVToRB] "r"(&yuvconstants->kUVToRB),
+ [kUVToG] "r"(&yuvconstants->kUVToG),
+ [kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR),
+ [kYToRgb] "r"(&yuvconstants->kYToRgb)
+ : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9",
+ "q10", "q11", "q12", "q13", "q14", "q15");
+}
+
+void NV12ToRGB24Row_NEON(const uint8_t* src_y,
+ const uint8_t* src_uv,
+ uint8_t* dst_rgb24,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ asm volatile(
+
+ YUVTORGB_SETUP
+
+ "1: \n"
+
+ READNV12 YUVTORGB
+ "subs %3, %3, #8 \n"
+ "vst3.8 {d20, d21, d22}, [%2]! \n"
+ "bgt 1b \n"
+ : "+r"(src_y), // %0
+ "+r"(src_uv), // %1
+ "+r"(dst_rgb24), // %2
+ "+r"(width) // %3
+ : [kUVToRB] "r"(&yuvconstants->kUVToRB),
+ [kUVToG] "r"(&yuvconstants->kUVToG),
+ [kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR),
+ [kYToRgb] "r"(&yuvconstants->kYToRgb)
+ : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", "q10", "q11",
+ "q12", "q13", "q14", "q15");
+}
+
+void NV21ToRGB24Row_NEON(const uint8_t* src_y,
+ const uint8_t* src_vu,
+ uint8_t* dst_rgb24,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ asm volatile(
+
+ YUVTORGB_SETUP
+
+ "1: \n"
+
+ READNV21 YUVTORGB
+ "subs %3, %3, #8 \n"
+ "vst3.8 {d20, d21, d22}, [%2]! \n"
+ "bgt 1b \n"
+ : "+r"(src_y), // %0
+ "+r"(src_vu), // %1
+ "+r"(dst_rgb24), // %2
+ "+r"(width) // %3
+ : [kUVToRB] "r"(&yuvconstants->kUVToRB),
+ [kUVToG] "r"(&yuvconstants->kUVToG),
+ [kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR),
+ [kYToRgb] "r"(&yuvconstants->kYToRgb)
+ : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", "q10", "q11",
+ "q12", "q13", "q14", "q15");
+}
+
+void NV12ToRGB565Row_NEON(const uint8_t* src_y,
+ const uint8_t* src_uv,
+ uint8_t* dst_rgb565,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ asm volatile(
+ YUVTORGB_SETUP
+ "1: \n" READNV12 YUVTORGB
+ "subs %3, %3, #8 \n" ARGBTORGB565
+ "vst1.8 {q0}, [%2]! \n" // store 8 pixels RGB565.
+ "bgt 1b \n"
+ : "+r"(src_y), // %0
+ "+r"(src_uv), // %1
+ "+r"(dst_rgb565), // %2
+ "+r"(width) // %3
+ : [kUVToRB] "r"(&yuvconstants->kUVToRB),
+ [kUVToG] "r"(&yuvconstants->kUVToG),
+ [kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR),
+ [kYToRgb] "r"(&yuvconstants->kYToRgb)
+ : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", "q10", "q11",
+ "q12", "q13", "q14", "q15");
+}
+
+void YUY2ToARGBRow_NEON(const uint8_t* src_yuy2,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ asm volatile(YUVTORGB_SETUP
+ "vmov.u8 d23, #255 \n"
+ "1: \n" READYUY2 YUVTORGB
+ "subs %2, %2, #8 \n"
+ "vst4.8 {d20, d21, d22, d23}, [%1]! \n"
+ "bgt 1b \n"
+ : "+r"(src_yuy2), // %0
+ "+r"(dst_argb), // %1
+ "+r"(width) // %2
+ : [kUVToRB] "r"(&yuvconstants->kUVToRB),
+ [kUVToG] "r"(&yuvconstants->kUVToG),
+ [kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR),
+ [kYToRgb] "r"(&yuvconstants->kYToRgb)
+ : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9",
+ "q10", "q11", "q12", "q13", "q14", "q15");
+}
+
+void UYVYToARGBRow_NEON(const uint8_t* src_uyvy,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ asm volatile(YUVTORGB_SETUP
+ "vmov.u8 d23, #255 \n"
+ "1: \n" READUYVY YUVTORGB
+ "subs %2, %2, #8 \n"
+ "vst4.8 {d20, d21, d22, d23}, [%1]! \n"
+ "bgt 1b \n"
+ : "+r"(src_uyvy), // %0
+ "+r"(dst_argb), // %1
+ "+r"(width) // %2
+ : [kUVToRB] "r"(&yuvconstants->kUVToRB),
+ [kUVToG] "r"(&yuvconstants->kUVToG),
+ [kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR),
+ [kYToRgb] "r"(&yuvconstants->kYToRgb)
+ : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9",
+ "q10", "q11", "q12", "q13", "q14", "q15");
+}
+
+// Reads 16 pairs of UV and write even values to dst_u and odd to dst_v.
+void SplitUVRow_NEON(const uint8_t* src_uv,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ asm volatile(
+ "1: \n"
+ "vld2.8 {q0, q1}, [%0]! \n" // load 16 pairs of UV
+ "subs %3, %3, #16 \n" // 16 processed per loop
+ "vst1.8 {q0}, [%1]! \n" // store U
+ "vst1.8 {q1}, [%2]! \n" // store V
+ "bgt 1b \n"
+ : "+r"(src_uv), // %0
+ "+r"(dst_u), // %1
+ "+r"(dst_v), // %2
+ "+r"(width) // %3 // Output registers
+ : // Input registers
+ : "cc", "memory", "q0", "q1" // Clobber List
+ );
+}
+
+// Reads 16 U's and V's and writes out 16 pairs of UV.
+void MergeUVRow_NEON(const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_uv,
+ int width) {
+ asm volatile(
+ "1: \n"
+ "vld1.8 {q0}, [%0]! \n" // load U
+ "vld1.8 {q1}, [%1]! \n" // load V
+ "subs %3, %3, #16 \n" // 16 processed per loop
+ "vst2.8 {q0, q1}, [%2]! \n" // store 16 pairs of UV
+ "bgt 1b \n"
+ : "+r"(src_u), // %0
+ "+r"(src_v), // %1
+ "+r"(dst_uv), // %2
+ "+r"(width) // %3 // Output registers
+ : // Input registers
+ : "cc", "memory", "q0", "q1" // Clobber List
+ );
+}
+
+// Reads 16 packed RGB and write to planar dst_r, dst_g, dst_b.
+void SplitRGBRow_NEON(const uint8_t* src_rgb,
+ uint8_t* dst_r,
+ uint8_t* dst_g,
+ uint8_t* dst_b,
+ int width) {
+ asm volatile(
+ "1: \n"
+ "vld3.8 {d0, d2, d4}, [%0]! \n" // load 8 RGB
+ "vld3.8 {d1, d3, d5}, [%0]! \n" // next 8 RGB
+ "subs %4, %4, #16 \n" // 16 processed per loop
+ "vst1.8 {q0}, [%1]! \n" // store R
+ "vst1.8 {q1}, [%2]! \n" // store G
+ "vst1.8 {q2}, [%3]! \n" // store B
+ "bgt 1b \n"
+ : "+r"(src_rgb), // %0
+ "+r"(dst_r), // %1
+ "+r"(dst_g), // %2
+ "+r"(dst_b), // %3
+ "+r"(width) // %4
+ : // Input registers
+ : "cc", "memory", "d0", "d1", "d2" // Clobber List
+ );
+}
+
+// Reads 16 planar R's, G's and B's and writes out 16 packed RGB at a time
+void MergeRGBRow_NEON(const uint8_t* src_r,
+ const uint8_t* src_g,
+ const uint8_t* src_b,
+ uint8_t* dst_rgb,
+ int width) {
+ asm volatile(
+ "1: \n"
+ "vld1.8 {q0}, [%0]! \n" // load R
+ "vld1.8 {q1}, [%1]! \n" // load G
+ "vld1.8 {q2}, [%2]! \n" // load B
+ "subs %4, %4, #16 \n" // 16 processed per loop
+ "vst3.8 {d0, d2, d4}, [%3]! \n" // store 8 RGB
+ "vst3.8 {d1, d3, d5}, [%3]! \n" // next 8 RGB
+ "bgt 1b \n"
+ : "+r"(src_r), // %0
+ "+r"(src_g), // %1
+ "+r"(src_b), // %2
+ "+r"(dst_rgb), // %3
+ "+r"(width) // %4
+ : // Input registers
+ : "cc", "memory", "q0", "q1", "q2" // Clobber List
+ );
+}
+
+// Copy multiple of 32. vld4.8 allow unaligned and is fastest on a15.
+void CopyRow_NEON(const uint8_t* src, uint8_t* dst, int width) {
+ asm volatile(
+ "1: \n"
+ "vld1.8 {d0, d1, d2, d3}, [%0]! \n" // load 32
+ "subs %2, %2, #32 \n" // 32 processed per loop
+ "vst1.8 {d0, d1, d2, d3}, [%1]! \n" // store 32
+ "bgt 1b \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width) // %2 // Output registers
+ : // Input registers
+ : "cc", "memory", "q0", "q1" // Clobber List
+ );
+}
+
+// SetRow writes 'width' bytes using an 8 bit value repeated.
+void SetRow_NEON(uint8_t* dst, uint8_t v8, int width) {
+ asm volatile(
+ "vdup.8 q0, %2 \n" // duplicate 16 bytes
+ "1: \n"
+ "subs %1, %1, #16 \n" // 16 bytes per loop
+ "vst1.8 {q0}, [%0]! \n" // store
+ "bgt 1b \n"
+ : "+r"(dst), // %0
+ "+r"(width) // %1
+ : "r"(v8) // %2
+ : "cc", "memory", "q0");
+}
+
+// ARGBSetRow writes 'width' pixels using an 32 bit value repeated.
+void ARGBSetRow_NEON(uint8_t* dst, uint32_t v32, int width) {
+ asm volatile(
+ "vdup.u32 q0, %2 \n" // duplicate 4 ints
+ "1: \n"
+ "subs %1, %1, #4 \n" // 4 pixels per loop
+ "vst1.8 {q0}, [%0]! \n" // store
+ "bgt 1b \n"
+ : "+r"(dst), // %0
+ "+r"(width) // %1
+ : "r"(v32) // %2
+ : "cc", "memory", "q0");
+}
+
+void MirrorRow_NEON(const uint8_t* src, uint8_t* dst, int width) {
+ asm volatile(
+ // Start at end of source row.
+ "mov r3, #-16 \n"
+ "add %0, %0, %2 \n"
+ "sub %0, #16 \n"
+
+ "1: \n"
+ "vld1.8 {q0}, [%0], r3 \n" // src -= 16
+ "subs %2, #16 \n" // 16 pixels per loop.
+ "vrev64.8 q0, q0 \n"
+ "vst1.8 {d1}, [%1]! \n" // dst += 16
+ "vst1.8 {d0}, [%1]! \n"
+ "bgt 1b \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width) // %2
+ :
+ : "cc", "memory", "r3", "q0");
+}
+
+void MirrorUVRow_NEON(const uint8_t* src_uv,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ asm volatile(
+ // Start at end of source row.
+ "mov r12, #-16 \n"
+ "add %0, %0, %3, lsl #1 \n"
+ "sub %0, #16 \n"
+
+ "1: \n"
+ "vld2.8 {d0, d1}, [%0], r12 \n" // src -= 16
+ "subs %3, #8 \n" // 8 pixels per loop.
+ "vrev64.8 q0, q0 \n"
+ "vst1.8 {d0}, [%1]! \n" // dst += 8
+ "vst1.8 {d1}, [%2]! \n"
+ "bgt 1b \n"
+ : "+r"(src_uv), // %0
+ "+r"(dst_u), // %1
+ "+r"(dst_v), // %2
+ "+r"(width) // %3
+ :
+ : "cc", "memory", "r12", "q0");
+}
+
+void ARGBMirrorRow_NEON(const uint8_t* src, uint8_t* dst, int width) {
+ asm volatile(
+ // Start at end of source row.
+ "mov r3, #-16 \n"
+ "add %0, %0, %2, lsl #2 \n"
+ "sub %0, #16 \n"
+
+ "1: \n"
+ "vld1.8 {q0}, [%0], r3 \n" // src -= 16
+ "subs %2, #4 \n" // 4 pixels per loop.
+ "vrev64.32 q0, q0 \n"
+ "vst1.8 {d1}, [%1]! \n" // dst += 16
+ "vst1.8 {d0}, [%1]! \n"
+ "bgt 1b \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width) // %2
+ :
+ : "cc", "memory", "r3", "q0");
+}
+
+void RGB24ToARGBRow_NEON(const uint8_t* src_rgb24,
+ uint8_t* dst_argb,
+ int width) {
+ asm volatile(
+ "vmov.u8 d4, #255 \n" // Alpha
+ "1: \n"
+ "vld3.8 {d1, d2, d3}, [%0]! \n" // load 8 pixels of RGB24.
+ "subs %2, %2, #8 \n" // 8 processed per loop.
+ "vst4.8 {d1, d2, d3, d4}, [%1]! \n" // store 8 pixels of ARGB.
+ "bgt 1b \n"
+ : "+r"(src_rgb24), // %0
+ "+r"(dst_argb), // %1
+ "+r"(width) // %2
+ :
+ : "cc", "memory", "d1", "d2", "d3", "d4" // Clobber List
+ );
+}
+
+void RAWToARGBRow_NEON(const uint8_t* src_raw, uint8_t* dst_argb, int width) {
+ asm volatile(
+ "vmov.u8 d4, #255 \n" // Alpha
+ "1: \n"
+ "vld3.8 {d1, d2, d3}, [%0]! \n" // load 8 pixels of RAW.
+ "subs %2, %2, #8 \n" // 8 processed per loop.
+ "vswp.u8 d1, d3 \n" // swap R, B
+ "vst4.8 {d1, d2, d3, d4}, [%1]! \n" // store 8 pixels of ARGB.
+ "bgt 1b \n"
+ : "+r"(src_raw), // %0
+ "+r"(dst_argb), // %1
+ "+r"(width) // %2
+ :
+ : "cc", "memory", "d1", "d2", "d3", "d4" // Clobber List
+ );
+}
+
+void RAWToRGB24Row_NEON(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) {
+ asm volatile(
+ "1: \n"
+ "vld3.8 {d1, d2, d3}, [%0]! \n" // load 8 pixels of RAW.
+ "subs %2, %2, #8 \n" // 8 processed per loop.
+ "vswp.u8 d1, d3 \n" // swap R, B
+ "vst3.8 {d1, d2, d3}, [%1]! \n" // store 8 pixels of
+ // RGB24.
+ "bgt 1b \n"
+ : "+r"(src_raw), // %0
+ "+r"(dst_rgb24), // %1
+ "+r"(width) // %2
+ :
+ : "cc", "memory", "d1", "d2", "d3" // Clobber List
+ );
+}
+
+#define RGB565TOARGB \
+ "vshrn.u16 d6, q0, #5 \n" /* G xxGGGGGG */ \
+ "vuzp.u8 d0, d1 \n" /* d0 xxxBBBBB RRRRRxxx */ \
+ "vshl.u8 d6, d6, #2 \n" /* G GGGGGG00 upper 6 */ \
+ "vshr.u8 d1, d1, #3 \n" /* R 000RRRRR lower 5 */ \
+ "vshl.u8 q0, q0, #3 \n" /* B,R BBBBB000 upper 5 */ \
+ "vshr.u8 q2, q0, #5 \n" /* B,R 00000BBB lower 3 */ \
+ "vorr.u8 d0, d0, d4 \n" /* B */ \
+ "vshr.u8 d4, d6, #6 \n" /* G 000000GG lower 2 */ \
+ "vorr.u8 d2, d1, d5 \n" /* R */ \
+ "vorr.u8 d1, d4, d6 \n" /* G */
+
+void RGB565ToARGBRow_NEON(const uint8_t* src_rgb565,
+ uint8_t* dst_argb,
+ int width) {
+ asm volatile(
+ "vmov.u8 d3, #255 \n" // Alpha
+ "1: \n"
+ "vld1.8 {q0}, [%0]! \n" // load 8 RGB565 pixels.
+ "subs %2, %2, #8 \n" // 8 processed per loop.
+ RGB565TOARGB
+ "vst4.8 {d0, d1, d2, d3}, [%1]! \n" // store 8 pixels of ARGB.
+ "bgt 1b \n"
+ : "+r"(src_rgb565), // %0
+ "+r"(dst_argb), // %1
+ "+r"(width) // %2
+ :
+ : "cc", "memory", "q0", "q1", "q2", "q3" // Clobber List
+ );
+}
+
+#define ARGB1555TOARGB \
+ "vshrn.u16 d7, q0, #8 \n" /* A Arrrrrxx */ \
+ "vshr.u8 d6, d7, #2 \n" /* R xxxRRRRR */ \
+ "vshrn.u16 d5, q0, #5 \n" /* G xxxGGGGG */ \
+ "vmovn.u16 d4, q0 \n" /* B xxxBBBBB */ \
+ "vshr.u8 d7, d7, #7 \n" /* A 0000000A */ \
+ "vneg.s8 d7, d7 \n" /* A AAAAAAAA upper 8 */ \
+ "vshl.u8 d6, d6, #3 \n" /* R RRRRR000 upper 5 */ \
+ "vshr.u8 q1, q3, #5 \n" /* R,A 00000RRR lower 3 */ \
+ "vshl.u8 q0, q2, #3 \n" /* B,G BBBBB000 upper 5 */ \
+ "vshr.u8 q2, q0, #5 \n" /* B,G 00000BBB lower 3 */ \
+ "vorr.u8 q1, q1, q3 \n" /* R,A */ \
+ "vorr.u8 q0, q0, q2 \n" /* B,G */
+
+// RGB555TOARGB is same as ARGB1555TOARGB but ignores alpha.
+#define RGB555TOARGB \
+ "vshrn.u16 d6, q0, #5 \n" /* G xxxGGGGG */ \
+ "vuzp.u8 d0, d1 \n" /* d0 xxxBBBBB xRRRRRxx */ \
+ "vshl.u8 d6, d6, #3 \n" /* G GGGGG000 upper 5 */ \
+ "vshr.u8 d1, d1, #2 \n" /* R 00xRRRRR lower 5 */ \
+ "vshl.u8 q0, q0, #3 \n" /* B,R BBBBB000 upper 5 */ \
+ "vshr.u8 q2, q0, #5 \n" /* B,R 00000BBB lower 3 */ \
+ "vorr.u8 d0, d0, d4 \n" /* B */ \
+ "vshr.u8 d4, d6, #5 \n" /* G 00000GGG lower 3 */ \
+ "vorr.u8 d2, d1, d5 \n" /* R */ \
+ "vorr.u8 d1, d4, d6 \n" /* G */
+
+void ARGB1555ToARGBRow_NEON(const uint8_t* src_argb1555,
+ uint8_t* dst_argb,
+ int width) {
+ asm volatile(
+ "vmov.u8 d3, #255 \n" // Alpha
+ "1: \n"
+ "vld1.8 {q0}, [%0]! \n" // load 8 ARGB1555 pixels.
+ "subs %2, %2, #8 \n" // 8 processed per loop.
+ ARGB1555TOARGB
+ "vst4.8 {d0, d1, d2, d3}, [%1]! \n" // store 8 pixels of ARGB.
+ "bgt 1b \n"
+ : "+r"(src_argb1555), // %0
+ "+r"(dst_argb), // %1
+ "+r"(width) // %2
+ :
+ : "cc", "memory", "q0", "q1", "q2", "q3" // Clobber List
+ );
+}
+
+#define ARGB4444TOARGB \
+ "vuzp.u8 d0, d1 \n" /* d0 BG, d1 RA */ \
+ "vshl.u8 q2, q0, #4 \n" /* B,R BBBB0000 */ \
+ "vshr.u8 q1, q0, #4 \n" /* G,A 0000GGGG */ \
+ "vshr.u8 q0, q2, #4 \n" /* B,R 0000BBBB */ \
+ "vorr.u8 q0, q0, q2 \n" /* B,R BBBBBBBB */ \
+ "vshl.u8 q2, q1, #4 \n" /* G,A GGGG0000 */ \
+ "vorr.u8 q1, q1, q2 \n" /* G,A GGGGGGGG */ \
+ "vswp.u8 d1, d2 \n" /* B,R,G,A -> B,G,R,A */
+
+void ARGB4444ToARGBRow_NEON(const uint8_t* src_argb4444,
+ uint8_t* dst_argb,
+ int width) {
+ asm volatile(
+ "vmov.u8 d3, #255 \n" // Alpha
+ "1: \n"
+ "vld1.8 {q0}, [%0]! \n" // load 8 ARGB4444 pixels.
+ "subs %2, %2, #8 \n" // 8 processed per loop.
+ ARGB4444TOARGB
+ "vst4.8 {d0, d1, d2, d3}, [%1]! \n" // store 8 pixels of ARGB.
+ "bgt 1b \n"
+ : "+r"(src_argb4444), // %0
+ "+r"(dst_argb), // %1
+ "+r"(width) // %2
+ :
+ : "cc", "memory", "q0", "q1", "q2" // Clobber List
+ );
+}
+
+void ARGBToRGB24Row_NEON(const uint8_t* src_argb,
+ uint8_t* dst_rgb24,
+ int width) {
+ asm volatile(
+ "1: \n"
+ "vld4.8 {d1, d2, d3, d4}, [%0]! \n" // load 8 pixels of ARGB.
+ "subs %2, %2, #8 \n" // 8 processed per loop.
+ "vst3.8 {d1, d2, d3}, [%1]! \n" // store 8 pixels of
+ // RGB24.
+ "bgt 1b \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_rgb24), // %1
+ "+r"(width) // %2
+ :
+ : "cc", "memory", "d1", "d2", "d3", "d4" // Clobber List
+ );
+}
+
+void ARGBToRAWRow_NEON(const uint8_t* src_argb, uint8_t* dst_raw, int width) {
+ asm volatile(
+ "1: \n"
+ "vld4.8 {d1, d2, d3, d4}, [%0]! \n" // load 8 pixels of ARGB.
+ "subs %2, %2, #8 \n" // 8 processed per loop.
+ "vswp.u8 d1, d3 \n" // swap R, B
+ "vst3.8 {d1, d2, d3}, [%1]! \n" // store 8 pixels of RAW.
+ "bgt 1b \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_raw), // %1
+ "+r"(width) // %2
+ :
+ : "cc", "memory", "d1", "d2", "d3", "d4" // Clobber List
+ );
+}
+
+void YUY2ToYRow_NEON(const uint8_t* src_yuy2, uint8_t* dst_y, int width) {
+ asm volatile(
+ "1: \n"
+ "vld2.8 {q0, q1}, [%0]! \n" // load 16 pixels of YUY2.
+ "subs %2, %2, #16 \n" // 16 processed per loop.
+ "vst1.8 {q0}, [%1]! \n" // store 16 pixels of Y.
+ "bgt 1b \n"
+ : "+r"(src_yuy2), // %0
+ "+r"(dst_y), // %1
+ "+r"(width) // %2
+ :
+ : "cc", "memory", "q0", "q1" // Clobber List
+ );
+}
+
+void UYVYToYRow_NEON(const uint8_t* src_uyvy, uint8_t* dst_y, int width) {
+ asm volatile(
+ "1: \n"
+ "vld2.8 {q0, q1}, [%0]! \n" // load 16 pixels of UYVY.
+ "subs %2, %2, #16 \n" // 16 processed per loop.
+ "vst1.8 {q1}, [%1]! \n" // store 16 pixels of Y.
+ "bgt 1b \n"
+ : "+r"(src_uyvy), // %0
+ "+r"(dst_y), // %1
+ "+r"(width) // %2
+ :
+ : "cc", "memory", "q0", "q1" // Clobber List
+ );
+}
+
+void YUY2ToUV422Row_NEON(const uint8_t* src_yuy2,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ asm volatile(
+ "1: \n"
+ "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 16 pixels of YUY2.
+ "subs %3, %3, #16 \n" // 16 pixels = 8 UVs.
+ "vst1.8 {d1}, [%1]! \n" // store 8 U.
+ "vst1.8 {d3}, [%2]! \n" // store 8 V.
+ "bgt 1b \n"
+ : "+r"(src_yuy2), // %0
+ "+r"(dst_u), // %1
+ "+r"(dst_v), // %2
+ "+r"(width) // %3
+ :
+ : "cc", "memory", "d0", "d1", "d2", "d3" // Clobber List
+ );
+}
+
+void UYVYToUV422Row_NEON(const uint8_t* src_uyvy,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ asm volatile(
+ "1: \n"
+ "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 16 pixels of UYVY.
+ "subs %3, %3, #16 \n" // 16 pixels = 8 UVs.
+ "vst1.8 {d0}, [%1]! \n" // store 8 U.
+ "vst1.8 {d2}, [%2]! \n" // store 8 V.
+ "bgt 1b \n"
+ : "+r"(src_uyvy), // %0
+ "+r"(dst_u), // %1
+ "+r"(dst_v), // %2
+ "+r"(width) // %3
+ :
+ : "cc", "memory", "d0", "d1", "d2", "d3" // Clobber List
+ );
+}
+
+void YUY2ToUVRow_NEON(const uint8_t* src_yuy2,
+ int stride_yuy2,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ asm volatile(
+ "add %1, %0, %1 \n" // stride + src_yuy2
+ "1: \n"
+ "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 16 pixels of YUY2.
+ "subs %4, %4, #16 \n" // 16 pixels = 8 UVs.
+ "vld4.8 {d4, d5, d6, d7}, [%1]! \n" // load next row YUY2.
+ "vrhadd.u8 d1, d1, d5 \n" // average rows of U
+ "vrhadd.u8 d3, d3, d7 \n" // average rows of V
+ "vst1.8 {d1}, [%2]! \n" // store 8 U.
+ "vst1.8 {d3}, [%3]! \n" // store 8 V.
+ "bgt 1b \n"
+ : "+r"(src_yuy2), // %0
+ "+r"(stride_yuy2), // %1
+ "+r"(dst_u), // %2
+ "+r"(dst_v), // %3
+ "+r"(width) // %4
+ :
+ : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6",
+ "d7" // Clobber List
+ );
+}
+
+void UYVYToUVRow_NEON(const uint8_t* src_uyvy,
+ int stride_uyvy,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ asm volatile(
+ "add %1, %0, %1 \n" // stride + src_uyvy
+ "1: \n"
+ "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 16 pixels of UYVY.
+ "subs %4, %4, #16 \n" // 16 pixels = 8 UVs.
+ "vld4.8 {d4, d5, d6, d7}, [%1]! \n" // load next row UYVY.
+ "vrhadd.u8 d0, d0, d4 \n" // average rows of U
+ "vrhadd.u8 d2, d2, d6 \n" // average rows of V
+ "vst1.8 {d0}, [%2]! \n" // store 8 U.
+ "vst1.8 {d2}, [%3]! \n" // store 8 V.
+ "bgt 1b \n"
+ : "+r"(src_uyvy), // %0
+ "+r"(stride_uyvy), // %1
+ "+r"(dst_u), // %2
+ "+r"(dst_v), // %3
+ "+r"(width) // %4
+ :
+ : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6",
+ "d7" // Clobber List
+ );
+}
+
+// For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA.
+void ARGBShuffleRow_NEON(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ const uint8_t* shuffler,
+ int width) {
+ asm volatile(
+ "vld1.8 {q2}, [%3] \n" // shuffler
+ "1: \n"
+ "vld1.8 {q0}, [%0]! \n" // load 4 pixels.
+ "subs %2, %2, #4 \n" // 4 processed per loop
+ "vtbl.8 d2, {d0, d1}, d4 \n" // look up 2 first pixels
+ "vtbl.8 d3, {d0, d1}, d5 \n" // look up 2 next pixels
+ "vst1.8 {q1}, [%1]! \n" // store 4.
+ "bgt 1b \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_argb), // %1
+ "+r"(width) // %2
+ : "r"(shuffler) // %3
+ : "cc", "memory", "q0", "q1", "q2" // Clobber List
+ );
+}
+
+void I422ToYUY2Row_NEON(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_yuy2,
+ int width) {
+ asm volatile(
+ "1: \n"
+ "vld2.8 {d0, d2}, [%0]! \n" // load 16 Ys
+ "vld1.8 {d1}, [%1]! \n" // load 8 Us
+ "vld1.8 {d3}, [%2]! \n" // load 8 Vs
+ "subs %4, %4, #16 \n" // 16 pixels
+ "vst4.8 {d0, d1, d2, d3}, [%3]! \n" // Store 8 YUY2/16 pixels.
+ "bgt 1b \n"
+ : "+r"(src_y), // %0
+ "+r"(src_u), // %1
+ "+r"(src_v), // %2
+ "+r"(dst_yuy2), // %3
+ "+r"(width) // %4
+ :
+ : "cc", "memory", "d0", "d1", "d2", "d3");
+}
+
+void I422ToUYVYRow_NEON(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_uyvy,
+ int width) {
+ asm volatile(
+ "1: \n"
+ "vld2.8 {d1, d3}, [%0]! \n" // load 16 Ys
+ "vld1.8 {d0}, [%1]! \n" // load 8 Us
+ "vld1.8 {d2}, [%2]! \n" // load 8 Vs
+ "subs %4, %4, #16 \n" // 16 pixels
+ "vst4.8 {d0, d1, d2, d3}, [%3]! \n" // Store 8 UYVY/16 pixels.
+ "bgt 1b \n"
+ : "+r"(src_y), // %0
+ "+r"(src_u), // %1
+ "+r"(src_v), // %2
+ "+r"(dst_uyvy), // %3
+ "+r"(width) // %4
+ :
+ : "cc", "memory", "d0", "d1", "d2", "d3");
+}
+
+void ARGBToRGB565Row_NEON(const uint8_t* src_argb,
+ uint8_t* dst_rgb565,
+ int width) {
+ asm volatile(
+ "1: \n"
+ "vld4.8 {d20, d21, d22, d23}, [%0]! \n" // load 8 pixels of ARGB.
+ "subs %2, %2, #8 \n" // 8 processed per loop.
+ ARGBTORGB565
+ "vst1.8 {q0}, [%1]! \n" // store 8 pixels RGB565.
+ "bgt 1b \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_rgb565), // %1
+ "+r"(width) // %2
+ :
+ : "cc", "memory", "q0", "q8", "q9", "q10", "q11");
+}
+
+void ARGBToRGB565DitherRow_NEON(const uint8_t* src_argb,
+ uint8_t* dst_rgb,
+ const uint32_t dither4,
+ int width) {
+ asm volatile(
+ "vdup.32 d2, %2 \n" // dither4
+ "1: \n"
+ "vld4.8 {d20, d21, d22, d23}, [%1]! \n" // load 8 pixels of ARGB.
+ "subs %3, %3, #8 \n" // 8 processed per loop.
+ "vqadd.u8 d20, d20, d2 \n"
+ "vqadd.u8 d21, d21, d2 \n"
+ "vqadd.u8 d22, d22, d2 \n" // add for dither
+ ARGBTORGB565
+ "vst1.8 {q0}, [%0]! \n" // store 8 RGB565.
+ "bgt 1b \n"
+ : "+r"(dst_rgb) // %0
+ : "r"(src_argb), // %1
+ "r"(dither4), // %2
+ "r"(width) // %3
+ : "cc", "memory", "q0", "q1", "q8", "q9", "q10", "q11");
+}
+
+void ARGBToARGB1555Row_NEON(const uint8_t* src_argb,
+ uint8_t* dst_argb1555,
+ int width) {
+ asm volatile(
+ "1: \n"
+ "vld4.8 {d20, d21, d22, d23}, [%0]! \n" // load 8 pixels of ARGB.
+ "subs %2, %2, #8 \n" // 8 processed per loop.
+ ARGBTOARGB1555
+ "vst1.8 {q0}, [%1]! \n" // store 8 ARGB1555.
+ "bgt 1b \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_argb1555), // %1
+ "+r"(width) // %2
+ :
+ : "cc", "memory", "q0", "q8", "q9", "q10", "q11");
+}
+
+void ARGBToARGB4444Row_NEON(const uint8_t* src_argb,
+ uint8_t* dst_argb4444,
+ int width) {
+ asm volatile(
+ "vmov.u8 d4, #0x0f \n" // bits to clear with
+ // vbic.
+ "1: \n"
+ "vld4.8 {d20, d21, d22, d23}, [%0]! \n" // load 8 pixels of ARGB.
+ "subs %2, %2, #8 \n" // 8 processed per loop.
+ ARGBTOARGB4444
+ "vst1.8 {q0}, [%1]! \n" // store 8 ARGB4444.
+ "bgt 1b \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_argb4444), // %1
+ "+r"(width) // %2
+ :
+ : "cc", "memory", "q0", "q8", "q9", "q10", "q11");
+}
+
+void ARGBToYRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width) {
+ asm volatile(
+ "vmov.u8 d24, #13 \n" // B * 0.1016 coefficient
+ "vmov.u8 d25, #65 \n" // G * 0.5078 coefficient
+ "vmov.u8 d26, #33 \n" // R * 0.2578 coefficient
+ "vmov.u8 d27, #16 \n" // Add 16 constant
+ "1: \n"
+ "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels.
+ "subs %2, %2, #8 \n" // 8 processed per loop.
+ "vmull.u8 q2, d0, d24 \n" // B
+ "vmlal.u8 q2, d1, d25 \n" // G
+ "vmlal.u8 q2, d2, d26 \n" // R
+ "vqrshrun.s16 d0, q2, #7 \n" // 16 bit to 8 bit Y
+ "vqadd.u8 d0, d27 \n"
+ "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y.
+ "bgt 1b \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_y), // %1
+ "+r"(width) // %2
+ :
+ : "cc", "memory", "q0", "q1", "q2", "q12", "q13");
+}
+
+void ARGBExtractAlphaRow_NEON(const uint8_t* src_argb,
+ uint8_t* dst_a,
+ int width) {
+ asm volatile(
+ "1: \n"
+ "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels
+ "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB pixels
+ "subs %2, %2, #16 \n" // 16 processed per loop
+ "vst1.8 {q3}, [%1]! \n" // store 16 A's.
+ "bgt 1b \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_a), // %1
+ "+r"(width) // %2
+ :
+ : "cc", "memory", "q0", "q1", "q2", "q3" // Clobber List
+ );
+}
+
+void ARGBToYJRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width) {
+ asm volatile(
+ "vmov.u8 d24, #15 \n" // B * 0.11400 coefficient
+ "vmov.u8 d25, #75 \n" // G * 0.58700 coefficient
+ "vmov.u8 d26, #38 \n" // R * 0.29900 coefficient
+ "1: \n"
+ "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels.
+ "subs %2, %2, #8 \n" // 8 processed per loop.
+ "vmull.u8 q2, d0, d24 \n" // B
+ "vmlal.u8 q2, d1, d25 \n" // G
+ "vmlal.u8 q2, d2, d26 \n" // R
+ "vqrshrun.s16 d0, q2, #7 \n" // 15 bit to 8 bit Y
+ "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y.
+ "bgt 1b \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_y), // %1
+ "+r"(width) // %2
+ :
+ : "cc", "memory", "q0", "q1", "q2", "q12", "q13");
+}
+
+// 8x1 pixels.
+void ARGBToUV444Row_NEON(const uint8_t* src_argb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ asm volatile(
+ "vmov.u8 d24, #112 \n" // UB / VR 0.875
+ // coefficient
+ "vmov.u8 d25, #74 \n" // UG -0.5781 coefficient
+ "vmov.u8 d26, #38 \n" // UR -0.2969 coefficient
+ "vmov.u8 d27, #18 \n" // VB -0.1406 coefficient
+ "vmov.u8 d28, #94 \n" // VG -0.7344 coefficient
+ "vmov.u16 q15, #0x8080 \n" // 128.5
+ "1: \n"
+ "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels.
+ "subs %3, %3, #8 \n" // 8 processed per loop.
+ "vmull.u8 q2, d0, d24 \n" // B
+ "vmlsl.u8 q2, d1, d25 \n" // G
+ "vmlsl.u8 q2, d2, d26 \n" // R
+ "vadd.u16 q2, q2, q15 \n" // +128 -> unsigned
+
+ "vmull.u8 q3, d2, d24 \n" // R
+ "vmlsl.u8 q3, d1, d28 \n" // G
+ "vmlsl.u8 q3, d0, d27 \n" // B
+ "vadd.u16 q3, q3, q15 \n" // +128 -> unsigned
+
+ "vqshrn.u16 d0, q2, #8 \n" // 16 bit to 8 bit U
+ "vqshrn.u16 d1, q3, #8 \n" // 16 bit to 8 bit V
+
+ "vst1.8 {d0}, [%1]! \n" // store 8 pixels U.
+ "vst1.8 {d1}, [%2]! \n" // store 8 pixels V.
+ "bgt 1b \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_u), // %1
+ "+r"(dst_v), // %2
+ "+r"(width) // %3
+ :
+ : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q12", "q13", "q14",
+ "q15");
+}
+
+// clang-format off
+// 16x2 pixels -> 8x1. width is number of argb pixels. e.g. 16.
+#define RGBTOUV(QB, QG, QR) \
+ "vmul.s16 q8, " #QB ", q10 \n" /* B */ \
+ "vmls.s16 q8, " #QG ", q11 \n" /* G */ \
+ "vmls.s16 q8, " #QR ", q12 \n" /* R */ \
+ "vadd.u16 q8, q8, q15 \n" /* +128 -> unsigned */ \
+ "vmul.s16 q9, " #QR ", q10 \n" /* R */ \
+ "vmls.s16 q9, " #QG ", q14 \n" /* G */ \
+ "vmls.s16 q9, " #QB ", q13 \n" /* B */ \
+ "vadd.u16 q9, q9, q15 \n" /* +128 -> unsigned */ \
+ "vqshrn.u16 d0, q8, #8 \n" /* 16 bit to 8 bit U */ \
+ "vqshrn.u16 d1, q9, #8 \n" /* 16 bit to 8 bit V */
+// clang-format on
+
+// TODO(fbarchard): Consider vhadd vertical, then vpaddl horizontal, avoid shr.
+void ARGBToUVRow_NEON(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ asm volatile (
+ "add %1, %0, %1 \n" // src_stride + src_argb
+ "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient
+ "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient
+ "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient
+ "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient
+ "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient
+ "vmov.u16 q15, #0x8080 \n" // 128.5
+ "1: \n"
+ "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels.
+ "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB pixels.
+ "vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts.
+ "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts.
+ "vpaddl.u8 q2, q2 \n" // R 16 bytes -> 8 shorts.
+ "vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more ARGB pixels.
+ "vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 ARGB pixels.
+ "vpadal.u8 q0, q4 \n" // B 16 bytes -> 8 shorts.
+ "vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts.
+ "vpadal.u8 q2, q6 \n" // R 16 bytes -> 8 shorts.
+
+ "vrshr.u16 q0, q0, #1 \n" // 2x average
+ "vrshr.u16 q1, q1, #1 \n"
+ "vrshr.u16 q2, q2, #1 \n"
+
+ "subs %4, %4, #16 \n" // 32 processed per loop.
+ RGBTOUV(q0, q1, q2)
+ "vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
+ "vst1.8 {d1}, [%3]! \n" // store 8 pixels V.
+ "bgt 1b \n"
+ : "+r"(src_argb), // %0
+ "+r"(src_stride_argb), // %1
+ "+r"(dst_u), // %2
+ "+r"(dst_v), // %3
+ "+r"(width) // %4
+ :
+ : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
+ "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+ );
+}
+
+// TODO(fbarchard): Subsample match C code.
+void ARGBToUVJRow_NEON(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ asm volatile (
+ "add %1, %0, %1 \n" // src_stride + src_argb
+ "vmov.s16 q10, #127 / 2 \n" // UB / VR 0.500 coefficient
+ "vmov.s16 q11, #84 / 2 \n" // UG -0.33126 coefficient
+ "vmov.s16 q12, #43 / 2 \n" // UR -0.16874 coefficient
+ "vmov.s16 q13, #20 / 2 \n" // VB -0.08131 coefficient
+ "vmov.s16 q14, #107 / 2 \n" // VG -0.41869 coefficient
+ "vmov.u16 q15, #0x8080 \n" // 128.5
+ "1: \n"
+ "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels.
+ "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB pixels.
+ "vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts.
+ "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts.
+ "vpaddl.u8 q2, q2 \n" // R 16 bytes -> 8 shorts.
+ "vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more ARGB pixels.
+ "vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 ARGB pixels.
+ "vpadal.u8 q0, q4 \n" // B 16 bytes -> 8 shorts.
+ "vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts.
+ "vpadal.u8 q2, q6 \n" // R 16 bytes -> 8 shorts.
+
+ "vrshr.u16 q0, q0, #1 \n" // 2x average
+ "vrshr.u16 q1, q1, #1 \n"
+ "vrshr.u16 q2, q2, #1 \n"
+
+ "subs %4, %4, #16 \n" // 32 processed per loop.
+ RGBTOUV(q0, q1, q2)
+ "vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
+ "vst1.8 {d1}, [%3]! \n" // store 8 pixels V.
+ "bgt 1b \n"
+ : "+r"(src_argb), // %0
+ "+r"(src_stride_argb), // %1
+ "+r"(dst_u), // %2
+ "+r"(dst_v), // %3
+ "+r"(width) // %4
+ :
+ : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
+ "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+ );
+}
+
+void BGRAToUVRow_NEON(const uint8_t* src_bgra,
+ int src_stride_bgra,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ asm volatile (
+ "add %1, %0, %1 \n" // src_stride + src_bgra
+ "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient
+ "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient
+ "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient
+ "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient
+ "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient
+ "vmov.u16 q15, #0x8080 \n" // 128.5
+ "1: \n"
+ "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 BGRA pixels.
+ "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 BGRA pixels.
+ "vpaddl.u8 q3, q3 \n" // B 16 bytes -> 8 shorts.
+ "vpaddl.u8 q2, q2 \n" // G 16 bytes -> 8 shorts.
+ "vpaddl.u8 q1, q1 \n" // R 16 bytes -> 8 shorts.
+ "vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more BGRA pixels.
+ "vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 BGRA pixels.
+ "vpadal.u8 q3, q7 \n" // B 16 bytes -> 8 shorts.
+ "vpadal.u8 q2, q6 \n" // G 16 bytes -> 8 shorts.
+ "vpadal.u8 q1, q5 \n" // R 16 bytes -> 8 shorts.
+
+ "vrshr.u16 q1, q1, #1 \n" // 2x average
+ "vrshr.u16 q2, q2, #1 \n"
+ "vrshr.u16 q3, q3, #1 \n"
+
+ "subs %4, %4, #16 \n" // 32 processed per loop.
+ RGBTOUV(q3, q2, q1)
+ "vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
+ "vst1.8 {d1}, [%3]! \n" // store 8 pixels V.
+ "bgt 1b \n"
+ : "+r"(src_bgra), // %0
+ "+r"(src_stride_bgra), // %1
+ "+r"(dst_u), // %2
+ "+r"(dst_v), // %3
+ "+r"(width) // %4
+ :
+ : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
+ "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+ );
+}
+
+void ABGRToUVRow_NEON(const uint8_t* src_abgr,
+ int src_stride_abgr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ asm volatile (
+ "add %1, %0, %1 \n" // src_stride + src_abgr
+ "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient
+ "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient
+ "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient
+ "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient
+ "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient
+ "vmov.u16 q15, #0x8080 \n" // 128.5
+ "1: \n"
+ "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ABGR pixels.
+ "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ABGR pixels.
+ "vpaddl.u8 q2, q2 \n" // B 16 bytes -> 8 shorts.
+ "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts.
+ "vpaddl.u8 q0, q0 \n" // R 16 bytes -> 8 shorts.
+ "vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more ABGR pixels.
+ "vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 ABGR pixels.
+ "vpadal.u8 q2, q6 \n" // B 16 bytes -> 8 shorts.
+ "vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts.
+ "vpadal.u8 q0, q4 \n" // R 16 bytes -> 8 shorts.
+
+ "vrshr.u16 q0, q0, #1 \n" // 2x average
+ "vrshr.u16 q1, q1, #1 \n"
+ "vrshr.u16 q2, q2, #1 \n"
+
+ "subs %4, %4, #16 \n" // 32 processed per loop.
+ RGBTOUV(q2, q1, q0)
+ "vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
+ "vst1.8 {d1}, [%3]! \n" // store 8 pixels V.
+ "bgt 1b \n"
+ : "+r"(src_abgr), // %0
+ "+r"(src_stride_abgr), // %1
+ "+r"(dst_u), // %2
+ "+r"(dst_v), // %3
+ "+r"(width) // %4
+ :
+ : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
+ "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+ );
+}
+
+void RGBAToUVRow_NEON(const uint8_t* src_rgba,
+ int src_stride_rgba,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ asm volatile (
+ "add %1, %0, %1 \n" // src_stride + src_rgba
+ "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient
+ "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient
+ "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient
+ "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient
+ "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient
+ "vmov.u16 q15, #0x8080 \n" // 128.5
+ "1: \n"
+ "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 RGBA pixels.
+ "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 RGBA pixels.
+ "vpaddl.u8 q0, q1 \n" // B 16 bytes -> 8 shorts.
+ "vpaddl.u8 q1, q2 \n" // G 16 bytes -> 8 shorts.
+ "vpaddl.u8 q2, q3 \n" // R 16 bytes -> 8 shorts.
+ "vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more RGBA pixels.
+ "vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 RGBA pixels.
+ "vpadal.u8 q0, q5 \n" // B 16 bytes -> 8 shorts.
+ "vpadal.u8 q1, q6 \n" // G 16 bytes -> 8 shorts.
+ "vpadal.u8 q2, q7 \n" // R 16 bytes -> 8 shorts.
+
+ "vrshr.u16 q0, q0, #1 \n" // 2x average
+ "vrshr.u16 q1, q1, #1 \n"
+ "vrshr.u16 q2, q2, #1 \n"
+
+ "subs %4, %4, #16 \n" // 32 processed per loop.
+ RGBTOUV(q0, q1, q2)
+ "vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
+ "vst1.8 {d1}, [%3]! \n" // store 8 pixels V.
+ "bgt 1b \n"
+ : "+r"(src_rgba), // %0
+ "+r"(src_stride_rgba), // %1
+ "+r"(dst_u), // %2
+ "+r"(dst_v), // %3
+ "+r"(width) // %4
+ :
+ : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
+ "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+ );
+}
+
+void RGB24ToUVRow_NEON(const uint8_t* src_rgb24,
+ int src_stride_rgb24,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ asm volatile (
+ "add %1, %0, %1 \n" // src_stride + src_rgb24
+ "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient
+ "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient
+ "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient
+ "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient
+ "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient
+ "vmov.u16 q15, #0x8080 \n" // 128.5
+ "1: \n"
+ "vld3.8 {d0, d2, d4}, [%0]! \n" // load 8 RGB24 pixels.
+ "vld3.8 {d1, d3, d5}, [%0]! \n" // load next 8 RGB24 pixels.
+ "vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts.
+ "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts.
+ "vpaddl.u8 q2, q2 \n" // R 16 bytes -> 8 shorts.
+ "vld3.8 {d8, d10, d12}, [%1]! \n" // load 8 more RGB24 pixels.
+ "vld3.8 {d9, d11, d13}, [%1]! \n" // load last 8 RGB24 pixels.
+ "vpadal.u8 q0, q4 \n" // B 16 bytes -> 8 shorts.
+ "vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts.
+ "vpadal.u8 q2, q6 \n" // R 16 bytes -> 8 shorts.
+
+ "vrshr.u16 q0, q0, #1 \n" // 2x average
+ "vrshr.u16 q1, q1, #1 \n"
+ "vrshr.u16 q2, q2, #1 \n"
+
+ "subs %4, %4, #16 \n" // 32 processed per loop.
+ RGBTOUV(q0, q1, q2)
+ "vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
+ "vst1.8 {d1}, [%3]! \n" // store 8 pixels V.
+ "bgt 1b \n"
+ : "+r"(src_rgb24), // %0
+ "+r"(src_stride_rgb24), // %1
+ "+r"(dst_u), // %2
+ "+r"(dst_v), // %3
+ "+r"(width) // %4
+ :
+ : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
+ "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+ );
+}
+
+void RAWToUVRow_NEON(const uint8_t* src_raw,
+ int src_stride_raw,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ asm volatile (
+ "add %1, %0, %1 \n" // src_stride + src_raw
+ "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient
+ "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient
+ "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient
+ "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient
+ "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient
+ "vmov.u16 q15, #0x8080 \n" // 128.5
+ "1: \n"
+ "vld3.8 {d0, d2, d4}, [%0]! \n" // load 8 RAW pixels.
+ "vld3.8 {d1, d3, d5}, [%0]! \n" // load next 8 RAW pixels.
+ "vpaddl.u8 q2, q2 \n" // B 16 bytes -> 8 shorts.
+ "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts.
+ "vpaddl.u8 q0, q0 \n" // R 16 bytes -> 8 shorts.
+ "vld3.8 {d8, d10, d12}, [%1]! \n" // load 8 more RAW pixels.
+ "vld3.8 {d9, d11, d13}, [%1]! \n" // load last 8 RAW pixels.
+ "vpadal.u8 q2, q6 \n" // B 16 bytes -> 8 shorts.
+ "vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts.
+ "vpadal.u8 q0, q4 \n" // R 16 bytes -> 8 shorts.
+
+ "vrshr.u16 q0, q0, #1 \n" // 2x average
+ "vrshr.u16 q1, q1, #1 \n"
+ "vrshr.u16 q2, q2, #1 \n"
+
+ "subs %4, %4, #16 \n" // 32 processed per loop.
+ RGBTOUV(q2, q1, q0)
+ "vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
+ "vst1.8 {d1}, [%3]! \n" // store 8 pixels V.
+ "bgt 1b \n"
+ : "+r"(src_raw), // %0
+ "+r"(src_stride_raw), // %1
+ "+r"(dst_u), // %2
+ "+r"(dst_v), // %3
+ "+r"(width) // %4
+ :
+ : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
+ "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+ );
+}
+
+// 16x2 pixels -> 8x1. width is number of argb pixels. e.g. 16.
+void RGB565ToUVRow_NEON(const uint8_t* src_rgb565,
+ int src_stride_rgb565,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ asm volatile(
+ "add %1, %0, %1 \n" // src_stride + src_argb
+ "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875
+ // coefficient
+ "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient
+ "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient
+ "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient
+ "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient
+ "vmov.u16 q15, #0x8080 \n" // 128.5
+ "1: \n"
+ "vld1.8 {q0}, [%0]! \n" // load 8 RGB565 pixels.
+ RGB565TOARGB
+ "vpaddl.u8 d8, d0 \n" // B 8 bytes -> 4 shorts.
+ "vpaddl.u8 d10, d1 \n" // G 8 bytes -> 4 shorts.
+ "vpaddl.u8 d12, d2 \n" // R 8 bytes -> 4 shorts.
+ "vld1.8 {q0}, [%0]! \n" // next 8 RGB565 pixels.
+ RGB565TOARGB
+ "vpaddl.u8 d9, d0 \n" // B 8 bytes -> 4 shorts.
+ "vpaddl.u8 d11, d1 \n" // G 8 bytes -> 4 shorts.
+ "vpaddl.u8 d13, d2 \n" // R 8 bytes -> 4 shorts.
+
+ "vld1.8 {q0}, [%1]! \n" // load 8 RGB565 pixels.
+ RGB565TOARGB
+ "vpadal.u8 d8, d0 \n" // B 8 bytes -> 4 shorts.
+ "vpadal.u8 d10, d1 \n" // G 8 bytes -> 4 shorts.
+ "vpadal.u8 d12, d2 \n" // R 8 bytes -> 4 shorts.
+ "vld1.8 {q0}, [%1]! \n" // next 8 RGB565 pixels.
+ RGB565TOARGB
+ "vpadal.u8 d9, d0 \n" // B 8 bytes -> 4 shorts.
+ "vpadal.u8 d11, d1 \n" // G 8 bytes -> 4 shorts.
+ "vpadal.u8 d13, d2 \n" // R 8 bytes -> 4 shorts.
+
+ "vrshr.u16 q4, q4, #1 \n" // 2x average
+ "vrshr.u16 q5, q5, #1 \n"
+ "vrshr.u16 q6, q6, #1 \n"
+
+ "subs %4, %4, #16 \n" // 16 processed per loop.
+ "vmul.s16 q8, q4, q10 \n" // B
+ "vmls.s16 q8, q5, q11 \n" // G
+ "vmls.s16 q8, q6, q12 \n" // R
+ "vadd.u16 q8, q8, q15 \n" // +128 -> unsigned
+ "vmul.s16 q9, q6, q10 \n" // R
+ "vmls.s16 q9, q5, q14 \n" // G
+ "vmls.s16 q9, q4, q13 \n" // B
+ "vadd.u16 q9, q9, q15 \n" // +128 -> unsigned
+ "vqshrn.u16 d0, q8, #8 \n" // 16 bit to 8 bit U
+ "vqshrn.u16 d1, q9, #8 \n" // 16 bit to 8 bit V
+ "vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
+ "vst1.8 {d1}, [%3]! \n" // store 8 pixels V.
+ "bgt 1b \n"
+ : "+r"(src_rgb565), // %0
+ "+r"(src_stride_rgb565), // %1
+ "+r"(dst_u), // %2
+ "+r"(dst_v), // %3
+ "+r"(width) // %4
+ :
+ : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8",
+ "q9", "q10", "q11", "q12", "q13", "q14", "q15");
+}
+
+// 16x2 pixels -> 8x1. width is number of argb pixels. e.g. 16.
+void ARGB1555ToUVRow_NEON(const uint8_t* src_argb1555,
+ int src_stride_argb1555,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ asm volatile(
+ "add %1, %0, %1 \n" // src_stride + src_argb
+ "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875
+ // coefficient
+ "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient
+ "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient
+ "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient
+ "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient
+ "vmov.u16 q15, #0x8080 \n" // 128.5
+ "1: \n"
+ "vld1.8 {q0}, [%0]! \n" // load 8 ARGB1555 pixels.
+ RGB555TOARGB
+ "vpaddl.u8 d8, d0 \n" // B 8 bytes -> 4 shorts.
+ "vpaddl.u8 d10, d1 \n" // G 8 bytes -> 4 shorts.
+ "vpaddl.u8 d12, d2 \n" // R 8 bytes -> 4 shorts.
+ "vld1.8 {q0}, [%0]! \n" // next 8 ARGB1555 pixels.
+ RGB555TOARGB
+ "vpaddl.u8 d9, d0 \n" // B 8 bytes -> 4 shorts.
+ "vpaddl.u8 d11, d1 \n" // G 8 bytes -> 4 shorts.
+ "vpaddl.u8 d13, d2 \n" // R 8 bytes -> 4 shorts.
+
+ "vld1.8 {q0}, [%1]! \n" // load 8 ARGB1555 pixels.
+ RGB555TOARGB
+ "vpadal.u8 d8, d0 \n" // B 8 bytes -> 4 shorts.
+ "vpadal.u8 d10, d1 \n" // G 8 bytes -> 4 shorts.
+ "vpadal.u8 d12, d2 \n" // R 8 bytes -> 4 shorts.
+ "vld1.8 {q0}, [%1]! \n" // next 8 ARGB1555 pixels.
+ RGB555TOARGB
+ "vpadal.u8 d9, d0 \n" // B 8 bytes -> 4 shorts.
+ "vpadal.u8 d11, d1 \n" // G 8 bytes -> 4 shorts.
+ "vpadal.u8 d13, d2 \n" // R 8 bytes -> 4 shorts.
+
+ "vrshr.u16 q4, q4, #1 \n" // 2x average
+ "vrshr.u16 q5, q5, #1 \n"
+ "vrshr.u16 q6, q6, #1 \n"
+
+ "subs %4, %4, #16 \n" // 16 processed per loop.
+ "vmul.s16 q8, q4, q10 \n" // B
+ "vmls.s16 q8, q5, q11 \n" // G
+ "vmls.s16 q8, q6, q12 \n" // R
+ "vadd.u16 q8, q8, q15 \n" // +128 -> unsigned
+ "vmul.s16 q9, q6, q10 \n" // R
+ "vmls.s16 q9, q5, q14 \n" // G
+ "vmls.s16 q9, q4, q13 \n" // B
+ "vadd.u16 q9, q9, q15 \n" // +128 -> unsigned
+ "vqshrn.u16 d0, q8, #8 \n" // 16 bit to 8 bit U
+ "vqshrn.u16 d1, q9, #8 \n" // 16 bit to 8 bit V
+ "vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
+ "vst1.8 {d1}, [%3]! \n" // store 8 pixels V.
+ "bgt 1b \n"
+ : "+r"(src_argb1555), // %0
+ "+r"(src_stride_argb1555), // %1
+ "+r"(dst_u), // %2
+ "+r"(dst_v), // %3
+ "+r"(width) // %4
+ :
+ : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8",
+ "q9", "q10", "q11", "q12", "q13", "q14", "q15");
+}
+
+// 16x2 pixels -> 8x1. width is number of argb pixels. e.g. 16.
+void ARGB4444ToUVRow_NEON(const uint8_t* src_argb4444,
+ int src_stride_argb4444,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ asm volatile(
+ "add %1, %0, %1 \n" // src_stride + src_argb
+ "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875
+ // coefficient
+ "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient
+ "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient
+ "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient
+ "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient
+ "vmov.u16 q15, #0x8080 \n" // 128.5
+ "1: \n"
+ "vld1.8 {q0}, [%0]! \n" // load 8 ARGB4444 pixels.
+ ARGB4444TOARGB
+ "vpaddl.u8 d8, d0 \n" // B 8 bytes -> 4 shorts.
+ "vpaddl.u8 d10, d1 \n" // G 8 bytes -> 4 shorts.
+ "vpaddl.u8 d12, d2 \n" // R 8 bytes -> 4 shorts.
+ "vld1.8 {q0}, [%0]! \n" // next 8 ARGB4444 pixels.
+ ARGB4444TOARGB
+ "vpaddl.u8 d9, d0 \n" // B 8 bytes -> 4 shorts.
+ "vpaddl.u8 d11, d1 \n" // G 8 bytes -> 4 shorts.
+ "vpaddl.u8 d13, d2 \n" // R 8 bytes -> 4 shorts.
+
+ "vld1.8 {q0}, [%1]! \n" // load 8 ARGB4444 pixels.
+ ARGB4444TOARGB
+ "vpadal.u8 d8, d0 \n" // B 8 bytes -> 4 shorts.
+ "vpadal.u8 d10, d1 \n" // G 8 bytes -> 4 shorts.
+ "vpadal.u8 d12, d2 \n" // R 8 bytes -> 4 shorts.
+ "vld1.8 {q0}, [%1]! \n" // next 8 ARGB4444 pixels.
+ ARGB4444TOARGB
+ "vpadal.u8 d9, d0 \n" // B 8 bytes -> 4 shorts.
+ "vpadal.u8 d11, d1 \n" // G 8 bytes -> 4 shorts.
+ "vpadal.u8 d13, d2 \n" // R 8 bytes -> 4 shorts.
+
+ "vrshr.u16 q4, q4, #1 \n" // 2x average
+ "vrshr.u16 q5, q5, #1 \n"
+ "vrshr.u16 q6, q6, #1 \n"
+
+ "subs %4, %4, #16 \n" // 16 processed per loop.
+ "vmul.s16 q8, q4, q10 \n" // B
+ "vmls.s16 q8, q5, q11 \n" // G
+ "vmls.s16 q8, q6, q12 \n" // R
+ "vadd.u16 q8, q8, q15 \n" // +128 -> unsigned
+ "vmul.s16 q9, q6, q10 \n" // R
+ "vmls.s16 q9, q5, q14 \n" // G
+ "vmls.s16 q9, q4, q13 \n" // B
+ "vadd.u16 q9, q9, q15 \n" // +128 -> unsigned
+ "vqshrn.u16 d0, q8, #8 \n" // 16 bit to 8 bit U
+ "vqshrn.u16 d1, q9, #8 \n" // 16 bit to 8 bit V
+ "vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
+ "vst1.8 {d1}, [%3]! \n" // store 8 pixels V.
+ "bgt 1b \n"
+ : "+r"(src_argb4444), // %0
+ "+r"(src_stride_argb4444), // %1
+ "+r"(dst_u), // %2
+ "+r"(dst_v), // %3
+ "+r"(width) // %4
+ :
+ : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8",
+ "q9", "q10", "q11", "q12", "q13", "q14", "q15");
+}
+
+void RGB565ToYRow_NEON(const uint8_t* src_rgb565, uint8_t* dst_y, int width) {
+ asm volatile(
+ "vmov.u8 d24, #13 \n" // B * 0.1016 coefficient
+ "vmov.u8 d25, #65 \n" // G * 0.5078 coefficient
+ "vmov.u8 d26, #33 \n" // R * 0.2578 coefficient
+ "vmov.u8 d27, #16 \n" // Add 16 constant
+ "1: \n"
+ "vld1.8 {q0}, [%0]! \n" // load 8 RGB565 pixels.
+ "subs %2, %2, #8 \n" // 8 processed per loop.
+ RGB565TOARGB
+ "vmull.u8 q2, d0, d24 \n" // B
+ "vmlal.u8 q2, d1, d25 \n" // G
+ "vmlal.u8 q2, d2, d26 \n" // R
+ "vqrshrun.s16 d0, q2, #7 \n" // 16 bit to 8 bit Y
+ "vqadd.u8 d0, d27 \n"
+ "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y.
+ "bgt 1b \n"
+ : "+r"(src_rgb565), // %0
+ "+r"(dst_y), // %1
+ "+r"(width) // %2
+ :
+ : "cc", "memory", "q0", "q1", "q2", "q3", "q12", "q13");
+}
+
+void ARGB1555ToYRow_NEON(const uint8_t* src_argb1555,
+ uint8_t* dst_y,
+ int width) {
+ asm volatile(
+ "vmov.u8 d24, #13 \n" // B * 0.1016 coefficient
+ "vmov.u8 d25, #65 \n" // G * 0.5078 coefficient
+ "vmov.u8 d26, #33 \n" // R * 0.2578 coefficient
+ "vmov.u8 d27, #16 \n" // Add 16 constant
+ "1: \n"
+ "vld1.8 {q0}, [%0]! \n" // load 8 ARGB1555 pixels.
+ "subs %2, %2, #8 \n" // 8 processed per loop.
+ ARGB1555TOARGB
+ "vmull.u8 q2, d0, d24 \n" // B
+ "vmlal.u8 q2, d1, d25 \n" // G
+ "vmlal.u8 q2, d2, d26 \n" // R
+ "vqrshrun.s16 d0, q2, #7 \n" // 16 bit to 8 bit Y
+ "vqadd.u8 d0, d27 \n"
+ "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y.
+ "bgt 1b \n"
+ : "+r"(src_argb1555), // %0
+ "+r"(dst_y), // %1
+ "+r"(width) // %2
+ :
+ : "cc", "memory", "q0", "q1", "q2", "q3", "q12", "q13");
+}
+
+void ARGB4444ToYRow_NEON(const uint8_t* src_argb4444,
+ uint8_t* dst_y,
+ int width) {
+ asm volatile(
+ "vmov.u8 d24, #13 \n" // B * 0.1016 coefficient
+ "vmov.u8 d25, #65 \n" // G * 0.5078 coefficient
+ "vmov.u8 d26, #33 \n" // R * 0.2578 coefficient
+ "vmov.u8 d27, #16 \n" // Add 16 constant
+ "1: \n"
+ "vld1.8 {q0}, [%0]! \n" // load 8 ARGB4444 pixels.
+ "subs %2, %2, #8 \n" // 8 processed per loop.
+ ARGB4444TOARGB
+ "vmull.u8 q2, d0, d24 \n" // B
+ "vmlal.u8 q2, d1, d25 \n" // G
+ "vmlal.u8 q2, d2, d26 \n" // R
+ "vqrshrun.s16 d0, q2, #7 \n" // 16 bit to 8 bit Y
+ "vqadd.u8 d0, d27 \n"
+ "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y.
+ "bgt 1b \n"
+ : "+r"(src_argb4444), // %0
+ "+r"(dst_y), // %1
+ "+r"(width) // %2
+ :
+ : "cc", "memory", "q0", "q1", "q2", "q3", "q12", "q13");
+}
+
+void BGRAToYRow_NEON(const uint8_t* src_bgra, uint8_t* dst_y, int width) {
+ asm volatile(
+ "vmov.u8 d4, #33 \n" // R * 0.2578 coefficient
+ "vmov.u8 d5, #65 \n" // G * 0.5078 coefficient
+ "vmov.u8 d6, #13 \n" // B * 0.1016 coefficient
+ "vmov.u8 d7, #16 \n" // Add 16 constant
+ "1: \n"
+ "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 pixels of BGRA.
+ "subs %2, %2, #8 \n" // 8 processed per loop.
+ "vmull.u8 q8, d1, d4 \n" // R
+ "vmlal.u8 q8, d2, d5 \n" // G
+ "vmlal.u8 q8, d3, d6 \n" // B
+ "vqrshrun.s16 d0, q8, #7 \n" // 16 bit to 8 bit Y
+ "vqadd.u8 d0, d7 \n"
+ "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y.
+ "bgt 1b \n"
+ : "+r"(src_bgra), // %0
+ "+r"(dst_y), // %1
+ "+r"(width) // %2
+ :
+ : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8");
+}
+
+void ABGRToYRow_NEON(const uint8_t* src_abgr, uint8_t* dst_y, int width) {
+ asm volatile(
+ "vmov.u8 d4, #33 \n" // R * 0.2578 coefficient
+ "vmov.u8 d5, #65 \n" // G * 0.5078 coefficient
+ "vmov.u8 d6, #13 \n" // B * 0.1016 coefficient
+ "vmov.u8 d7, #16 \n" // Add 16 constant
+ "1: \n"
+ "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 pixels of ABGR.
+ "subs %2, %2, #8 \n" // 8 processed per loop.
+ "vmull.u8 q8, d0, d4 \n" // R
+ "vmlal.u8 q8, d1, d5 \n" // G
+ "vmlal.u8 q8, d2, d6 \n" // B
+ "vqrshrun.s16 d0, q8, #7 \n" // 16 bit to 8 bit Y
+ "vqadd.u8 d0, d7 \n"
+ "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y.
+ "bgt 1b \n"
+ : "+r"(src_abgr), // %0
+ "+r"(dst_y), // %1
+ "+r"(width) // %2
+ :
+ : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8");
+}
+
+void RGBAToYRow_NEON(const uint8_t* src_rgba, uint8_t* dst_y, int width) {
+ asm volatile(
+ "vmov.u8 d4, #13 \n" // B * 0.1016 coefficient
+ "vmov.u8 d5, #65 \n" // G * 0.5078 coefficient
+ "vmov.u8 d6, #33 \n" // R * 0.2578 coefficient
+ "vmov.u8 d7, #16 \n" // Add 16 constant
+ "1: \n"
+ "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 pixels of RGBA.
+ "subs %2, %2, #8 \n" // 8 processed per loop.
+ "vmull.u8 q8, d1, d4 \n" // B
+ "vmlal.u8 q8, d2, d5 \n" // G
+ "vmlal.u8 q8, d3, d6 \n" // R
+ "vqrshrun.s16 d0, q8, #7 \n" // 16 bit to 8 bit Y
+ "vqadd.u8 d0, d7 \n"
+ "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y.
+ "bgt 1b \n"
+ : "+r"(src_rgba), // %0
+ "+r"(dst_y), // %1
+ "+r"(width) // %2
+ :
+ : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8");
+}
+
+void RGB24ToYRow_NEON(const uint8_t* src_rgb24, uint8_t* dst_y, int width) {
+ asm volatile(
+ "vmov.u8 d4, #13 \n" // B * 0.1016 coefficient
+ "vmov.u8 d5, #65 \n" // G * 0.5078 coefficient
+ "vmov.u8 d6, #33 \n" // R * 0.2578 coefficient
+ "vmov.u8 d7, #16 \n" // Add 16 constant
+ "1: \n"
+ "vld3.8 {d0, d1, d2}, [%0]! \n" // load 8 pixels of RGB24.
+ "subs %2, %2, #8 \n" // 8 processed per loop.
+ "vmull.u8 q8, d0, d4 \n" // B
+ "vmlal.u8 q8, d1, d5 \n" // G
+ "vmlal.u8 q8, d2, d6 \n" // R
+ "vqrshrun.s16 d0, q8, #7 \n" // 16 bit to 8 bit Y
+ "vqadd.u8 d0, d7 \n"
+ "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y.
+ "bgt 1b \n"
+ : "+r"(src_rgb24), // %0
+ "+r"(dst_y), // %1
+ "+r"(width) // %2
+ :
+ : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8");
+}
+
+void RAWToYRow_NEON(const uint8_t* src_raw, uint8_t* dst_y, int width) {
+ asm volatile(
+ "vmov.u8 d4, #33 \n" // R * 0.2578 coefficient
+ "vmov.u8 d5, #65 \n" // G * 0.5078 coefficient
+ "vmov.u8 d6, #13 \n" // B * 0.1016 coefficient
+ "vmov.u8 d7, #16 \n" // Add 16 constant
+ "1: \n"
+ "vld3.8 {d0, d1, d2}, [%0]! \n" // load 8 pixels of RAW.
+ "subs %2, %2, #8 \n" // 8 processed per loop.
+ "vmull.u8 q8, d0, d4 \n" // B
+ "vmlal.u8 q8, d1, d5 \n" // G
+ "vmlal.u8 q8, d2, d6 \n" // R
+ "vqrshrun.s16 d0, q8, #7 \n" // 16 bit to 8 bit Y
+ "vqadd.u8 d0, d7 \n"
+ "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y.
+ "bgt 1b \n"
+ : "+r"(src_raw), // %0
+ "+r"(dst_y), // %1
+ "+r"(width) // %2
+ :
+ : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8");
+}
+
+// Bilinear filter 16x2 -> 16x1
+void InterpolateRow_NEON(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ int dst_width,
+ int source_y_fraction) {
+ int y1_fraction = source_y_fraction;
+ asm volatile(
+ "cmp %4, #0 \n"
+ "beq 100f \n"
+ "add %2, %1 \n"
+ "cmp %4, #128 \n"
+ "beq 50f \n"
+
+ "vdup.8 d5, %4 \n"
+ "rsb %4, #256 \n"
+ "vdup.8 d4, %4 \n"
+ // General purpose row blend.
+ "1: \n"
+ "vld1.8 {q0}, [%1]! \n"
+ "vld1.8 {q1}, [%2]! \n"
+ "subs %3, %3, #16 \n"
+ "vmull.u8 q13, d0, d4 \n"
+ "vmull.u8 q14, d1, d4 \n"
+ "vmlal.u8 q13, d2, d5 \n"
+ "vmlal.u8 q14, d3, d5 \n"
+ "vrshrn.u16 d0, q13, #8 \n"
+ "vrshrn.u16 d1, q14, #8 \n"
+ "vst1.8 {q0}, [%0]! \n"
+ "bgt 1b \n"
+ "b 99f \n"
+
+ // Blend 50 / 50.
+ "50: \n"
+ "vld1.8 {q0}, [%1]! \n"
+ "vld1.8 {q1}, [%2]! \n"
+ "subs %3, %3, #16 \n"
+ "vrhadd.u8 q0, q1 \n"
+ "vst1.8 {q0}, [%0]! \n"
+ "bgt 50b \n"
+ "b 99f \n"
+
+ // Blend 100 / 0 - Copy row unchanged.
+ "100: \n"
+ "vld1.8 {q0}, [%1]! \n"
+ "subs %3, %3, #16 \n"
+ "vst1.8 {q0}, [%0]! \n"
+ "bgt 100b \n"
+
+ "99: \n"
+ : "+r"(dst_ptr), // %0
+ "+r"(src_ptr), // %1
+ "+r"(src_stride), // %2
+ "+r"(dst_width), // %3
+ "+r"(y1_fraction) // %4
+ :
+ : "cc", "memory", "q0", "q1", "d4", "d5", "q13", "q14");
+}
+
+// dr * (256 - sa) / 256 + sr = dr - dr * sa / 256 + sr
+void ARGBBlendRow_NEON(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
+ int width) {
+ asm volatile(
+ "subs %3, #8 \n"
+ "blt 89f \n"
+ // Blend 8 pixels.
+ "8: \n"
+ "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 pixels of ARGB0.
+ "vld4.8 {d4, d5, d6, d7}, [%1]! \n" // load 8 pixels of ARGB1.
+ "subs %3, %3, #8 \n" // 8 processed per loop.
+ "vmull.u8 q10, d4, d3 \n" // db * a
+ "vmull.u8 q11, d5, d3 \n" // dg * a
+ "vmull.u8 q12, d6, d3 \n" // dr * a
+ "vqrshrn.u16 d20, q10, #8 \n" // db >>= 8
+ "vqrshrn.u16 d21, q11, #8 \n" // dg >>= 8
+ "vqrshrn.u16 d22, q12, #8 \n" // dr >>= 8
+ "vqsub.u8 q2, q2, q10 \n" // dbg - dbg * a / 256
+ "vqsub.u8 d6, d6, d22 \n" // dr - dr * a / 256
+ "vqadd.u8 q0, q0, q2 \n" // + sbg
+ "vqadd.u8 d2, d2, d6 \n" // + sr
+ "vmov.u8 d3, #255 \n" // a = 255
+ "vst4.8 {d0, d1, d2, d3}, [%2]! \n" // store 8 pixels of ARGB.
+ "bge 8b \n"
+
+ "89: \n"
+ "adds %3, #8-1 \n"
+ "blt 99f \n"
+
+ // Blend 1 pixels.
+ "1: \n"
+ "vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [%0]! \n" // load 1 pixel ARGB0.
+ "vld4.8 {d4[0],d5[0],d6[0],d7[0]}, [%1]! \n" // load 1 pixel ARGB1.
+ "subs %3, %3, #1 \n" // 1 processed per loop.
+ "vmull.u8 q10, d4, d3 \n" // db * a
+ "vmull.u8 q11, d5, d3 \n" // dg * a
+ "vmull.u8 q12, d6, d3 \n" // dr * a
+ "vqrshrn.u16 d20, q10, #8 \n" // db >>= 8
+ "vqrshrn.u16 d21, q11, #8 \n" // dg >>= 8
+ "vqrshrn.u16 d22, q12, #8 \n" // dr >>= 8
+ "vqsub.u8 q2, q2, q10 \n" // dbg - dbg * a / 256
+ "vqsub.u8 d6, d6, d22 \n" // dr - dr * a / 256
+ "vqadd.u8 q0, q0, q2 \n" // + sbg
+ "vqadd.u8 d2, d2, d6 \n" // + sr
+ "vmov.u8 d3, #255 \n" // a = 255
+ "vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [%2]! \n" // store 1 pixel.
+ "bge 1b \n"
+
+ "99: \n"
+
+ : "+r"(src_argb0), // %0
+ "+r"(src_argb1), // %1
+ "+r"(dst_argb), // %2
+ "+r"(width) // %3
+ :
+ : "cc", "memory", "q0", "q1", "q2", "q3", "q10", "q11", "q12");
+}
+
+// Attenuate 8 pixels at a time.
+void ARGBAttenuateRow_NEON(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ int width) {
+ asm volatile(
+ // Attenuate 8 pixels.
+ "1: \n"
+ "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 pixels of ARGB.
+ "subs %2, %2, #8 \n" // 8 processed per loop.
+ "vmull.u8 q10, d0, d3 \n" // b * a
+ "vmull.u8 q11, d1, d3 \n" // g * a
+ "vmull.u8 q12, d2, d3 \n" // r * a
+ "vqrshrn.u16 d0, q10, #8 \n" // b >>= 8
+ "vqrshrn.u16 d1, q11, #8 \n" // g >>= 8
+ "vqrshrn.u16 d2, q12, #8 \n" // r >>= 8
+ "vst4.8 {d0, d1, d2, d3}, [%1]! \n" // store 8 pixels of ARGB.
+ "bgt 1b \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_argb), // %1
+ "+r"(width) // %2
+ :
+ : "cc", "memory", "q0", "q1", "q10", "q11", "q12");
+}
+
+// Quantize 8 ARGB pixels (32 bytes).
+// dst = (dst * scale >> 16) * interval_size + interval_offset;
+void ARGBQuantizeRow_NEON(uint8_t* dst_argb,
+ int scale,
+ int interval_size,
+ int interval_offset,
+ int width) {
+ asm volatile(
+ "vdup.u16 q8, %2 \n"
+ "vshr.u16 q8, q8, #1 \n" // scale >>= 1
+ "vdup.u16 q9, %3 \n" // interval multiply.
+ "vdup.u16 q10, %4 \n" // interval add
+
+ // 8 pixel loop.
+ "1: \n"
+ "vld4.8 {d0, d2, d4, d6}, [%0] \n" // load 8 pixels of ARGB.
+ "subs %1, %1, #8 \n" // 8 processed per loop.
+ "vmovl.u8 q0, d0 \n" // b (0 .. 255)
+ "vmovl.u8 q1, d2 \n"
+ "vmovl.u8 q2, d4 \n"
+ "vqdmulh.s16 q0, q0, q8 \n" // b * scale
+ "vqdmulh.s16 q1, q1, q8 \n" // g
+ "vqdmulh.s16 q2, q2, q8 \n" // r
+ "vmul.u16 q0, q0, q9 \n" // b * interval_size
+ "vmul.u16 q1, q1, q9 \n" // g
+ "vmul.u16 q2, q2, q9 \n" // r
+ "vadd.u16 q0, q0, q10 \n" // b + interval_offset
+ "vadd.u16 q1, q1, q10 \n" // g
+ "vadd.u16 q2, q2, q10 \n" // r
+ "vqmovn.u16 d0, q0 \n"
+ "vqmovn.u16 d2, q1 \n"
+ "vqmovn.u16 d4, q2 \n"
+ "vst4.8 {d0, d2, d4, d6}, [%0]! \n" // store 8 pixels of ARGB.
+ "bgt 1b \n"
+ : "+r"(dst_argb), // %0
+ "+r"(width) // %1
+ : "r"(scale), // %2
+ "r"(interval_size), // %3
+ "r"(interval_offset) // %4
+ : "cc", "memory", "q0", "q1", "q2", "q3", "q8", "q9", "q10");
+}
+
+// Shade 8 pixels at a time by specified value.
+// NOTE vqrdmulh.s16 q10, q10, d0[0] must use a scaler register from 0 to 8.
+// Rounding in vqrdmulh does +1 to high if high bit of low s16 is set.
+void ARGBShadeRow_NEON(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ int width,
+ uint32_t value) {
+ asm volatile(
+ "vdup.u32 q0, %3 \n" // duplicate scale value.
+ "vzip.u8 d0, d1 \n" // d0 aarrggbb.
+ "vshr.u16 q0, q0, #1 \n" // scale / 2.
+
+ // 8 pixel loop.
+ "1: \n"
+ "vld4.8 {d20, d22, d24, d26}, [%0]! \n" // load 8 pixels of ARGB.
+ "subs %2, %2, #8 \n" // 8 processed per loop.
+ "vmovl.u8 q10, d20 \n" // b (0 .. 255)
+ "vmovl.u8 q11, d22 \n"
+ "vmovl.u8 q12, d24 \n"
+ "vmovl.u8 q13, d26 \n"
+ "vqrdmulh.s16 q10, q10, d0[0] \n" // b * scale * 2
+ "vqrdmulh.s16 q11, q11, d0[1] \n" // g
+ "vqrdmulh.s16 q12, q12, d0[2] \n" // r
+ "vqrdmulh.s16 q13, q13, d0[3] \n" // a
+ "vqmovn.u16 d20, q10 \n"
+ "vqmovn.u16 d22, q11 \n"
+ "vqmovn.u16 d24, q12 \n"
+ "vqmovn.u16 d26, q13 \n"
+ "vst4.8 {d20, d22, d24, d26}, [%1]! \n" // store 8 pixels of ARGB.
+ "bgt 1b \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_argb), // %1
+ "+r"(width) // %2
+ : "r"(value) // %3
+ : "cc", "memory", "q0", "q10", "q11", "q12", "q13");
+}
+
+// Convert 8 ARGB pixels (64 bytes) to 8 Gray ARGB pixels
+// Similar to ARGBToYJ but stores ARGB.
+// C code is (15 * b + 75 * g + 38 * r + 64) >> 7;
+void ARGBGrayRow_NEON(const uint8_t* src_argb, uint8_t* dst_argb, int width) {
+ asm volatile(
+ "vmov.u8 d24, #15 \n" // B * 0.11400 coefficient
+ "vmov.u8 d25, #75 \n" // G * 0.58700 coefficient
+ "vmov.u8 d26, #38 \n" // R * 0.29900 coefficient
+ "1: \n"
+ "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels.
+ "subs %2, %2, #8 \n" // 8 processed per loop.
+ "vmull.u8 q2, d0, d24 \n" // B
+ "vmlal.u8 q2, d1, d25 \n" // G
+ "vmlal.u8 q2, d2, d26 \n" // R
+ "vqrshrun.s16 d0, q2, #7 \n" // 15 bit to 8 bit B
+ "vmov d1, d0 \n" // G
+ "vmov d2, d0 \n" // R
+ "vst4.8 {d0, d1, d2, d3}, [%1]! \n" // store 8 ARGB pixels.
+ "bgt 1b \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_argb), // %1
+ "+r"(width) // %2
+ :
+ : "cc", "memory", "q0", "q1", "q2", "q12", "q13");
+}
+
+// Convert 8 ARGB pixels (32 bytes) to 8 Sepia ARGB pixels.
+// b = (r * 35 + g * 68 + b * 17) >> 7
+// g = (r * 45 + g * 88 + b * 22) >> 7
+// r = (r * 50 + g * 98 + b * 24) >> 7
+void ARGBSepiaRow_NEON(uint8_t* dst_argb, int width) {
+ asm volatile(
+ "vmov.u8 d20, #17 \n" // BB coefficient
+ "vmov.u8 d21, #68 \n" // BG coefficient
+ "vmov.u8 d22, #35 \n" // BR coefficient
+ "vmov.u8 d24, #22 \n" // GB coefficient
+ "vmov.u8 d25, #88 \n" // GG coefficient
+ "vmov.u8 d26, #45 \n" // GR coefficient
+ "vmov.u8 d28, #24 \n" // BB coefficient
+ "vmov.u8 d29, #98 \n" // BG coefficient
+ "vmov.u8 d30, #50 \n" // BR coefficient
+ "1: \n"
+ "vld4.8 {d0, d1, d2, d3}, [%0] \n" // load 8 ARGB pixels.
+ "subs %1, %1, #8 \n" // 8 processed per loop.
+ "vmull.u8 q2, d0, d20 \n" // B to Sepia B
+ "vmlal.u8 q2, d1, d21 \n" // G
+ "vmlal.u8 q2, d2, d22 \n" // R
+ "vmull.u8 q3, d0, d24 \n" // B to Sepia G
+ "vmlal.u8 q3, d1, d25 \n" // G
+ "vmlal.u8 q3, d2, d26 \n" // R
+ "vmull.u8 q8, d0, d28 \n" // B to Sepia R
+ "vmlal.u8 q8, d1, d29 \n" // G
+ "vmlal.u8 q8, d2, d30 \n" // R
+ "vqshrn.u16 d0, q2, #7 \n" // 16 bit to 8 bit B
+ "vqshrn.u16 d1, q3, #7 \n" // 16 bit to 8 bit G
+ "vqshrn.u16 d2, q8, #7 \n" // 16 bit to 8 bit R
+ "vst4.8 {d0, d1, d2, d3}, [%0]! \n" // store 8 ARGB pixels.
+ "bgt 1b \n"
+ : "+r"(dst_argb), // %0
+ "+r"(width) // %1
+ :
+ : "cc", "memory", "q0", "q1", "q2", "q3", "q10", "q11", "q12", "q13",
+ "q14", "q15");
+}
+
+// Tranform 8 ARGB pixels (32 bytes) with color matrix.
+// TODO(fbarchard): Was same as Sepia except matrix is provided. This function
+// needs to saturate. Consider doing a non-saturating version.
+void ARGBColorMatrixRow_NEON(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ const int8_t* matrix_argb,
+ int width) {
+ asm volatile(
+ "vld1.8 {q2}, [%3] \n" // load 3 ARGB vectors.
+ "vmovl.s8 q0, d4 \n" // B,G coefficients s16.
+ "vmovl.s8 q1, d5 \n" // R,A coefficients s16.
+
+ "1: \n"
+ "vld4.8 {d16, d18, d20, d22}, [%0]! \n" // load 8 ARGB pixels.
+ "subs %2, %2, #8 \n" // 8 processed per loop.
+ "vmovl.u8 q8, d16 \n" // b (0 .. 255) 16 bit
+ "vmovl.u8 q9, d18 \n" // g
+ "vmovl.u8 q10, d20 \n" // r
+ "vmovl.u8 q11, d22 \n" // a
+ "vmul.s16 q12, q8, d0[0] \n" // B = B * Matrix B
+ "vmul.s16 q13, q8, d1[0] \n" // G = B * Matrix G
+ "vmul.s16 q14, q8, d2[0] \n" // R = B * Matrix R
+ "vmul.s16 q15, q8, d3[0] \n" // A = B * Matrix A
+ "vmul.s16 q4, q9, d0[1] \n" // B += G * Matrix B
+ "vmul.s16 q5, q9, d1[1] \n" // G += G * Matrix G
+ "vmul.s16 q6, q9, d2[1] \n" // R += G * Matrix R
+ "vmul.s16 q7, q9, d3[1] \n" // A += G * Matrix A
+ "vqadd.s16 q12, q12, q4 \n" // Accumulate B
+ "vqadd.s16 q13, q13, q5 \n" // Accumulate G
+ "vqadd.s16 q14, q14, q6 \n" // Accumulate R
+ "vqadd.s16 q15, q15, q7 \n" // Accumulate A
+ "vmul.s16 q4, q10, d0[2] \n" // B += R * Matrix B
+ "vmul.s16 q5, q10, d1[2] \n" // G += R * Matrix G
+ "vmul.s16 q6, q10, d2[2] \n" // R += R * Matrix R
+ "vmul.s16 q7, q10, d3[2] \n" // A += R * Matrix A
+ "vqadd.s16 q12, q12, q4 \n" // Accumulate B
+ "vqadd.s16 q13, q13, q5 \n" // Accumulate G
+ "vqadd.s16 q14, q14, q6 \n" // Accumulate R
+ "vqadd.s16 q15, q15, q7 \n" // Accumulate A
+ "vmul.s16 q4, q11, d0[3] \n" // B += A * Matrix B
+ "vmul.s16 q5, q11, d1[3] \n" // G += A * Matrix G
+ "vmul.s16 q6, q11, d2[3] \n" // R += A * Matrix R
+ "vmul.s16 q7, q11, d3[3] \n" // A += A * Matrix A
+ "vqadd.s16 q12, q12, q4 \n" // Accumulate B
+ "vqadd.s16 q13, q13, q5 \n" // Accumulate G
+ "vqadd.s16 q14, q14, q6 \n" // Accumulate R
+ "vqadd.s16 q15, q15, q7 \n" // Accumulate A
+ "vqshrun.s16 d16, q12, #6 \n" // 16 bit to 8 bit B
+ "vqshrun.s16 d18, q13, #6 \n" // 16 bit to 8 bit G
+ "vqshrun.s16 d20, q14, #6 \n" // 16 bit to 8 bit R
+ "vqshrun.s16 d22, q15, #6 \n" // 16 bit to 8 bit A
+ "vst4.8 {d16, d18, d20, d22}, [%1]! \n" // store 8 ARGB pixels.
+ "bgt 1b \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_argb), // %1
+ "+r"(width) // %2
+ : "r"(matrix_argb) // %3
+ : "cc", "memory", "q0", "q1", "q2", "q4", "q5", "q6", "q7", "q8", "q9",
+ "q10", "q11", "q12", "q13", "q14", "q15");
+}
+
+// Multiply 2 rows of ARGB pixels together, 8 pixels at a time.
+void ARGBMultiplyRow_NEON(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
+ int width) {
+ asm volatile(
+ // 8 pixel loop.
+ "1: \n"
+ "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels.
+ "vld4.8 {d1, d3, d5, d7}, [%1]! \n" // load 8 more ARGB
+ "subs %3, %3, #8 \n" // 8 processed per loop.
+ "vmull.u8 q0, d0, d1 \n" // multiply B
+ "vmull.u8 q1, d2, d3 \n" // multiply G
+ "vmull.u8 q2, d4, d5 \n" // multiply R
+ "vmull.u8 q3, d6, d7 \n" // multiply A
+ "vrshrn.u16 d0, q0, #8 \n" // 16 bit to 8 bit B
+ "vrshrn.u16 d1, q1, #8 \n" // 16 bit to 8 bit G
+ "vrshrn.u16 d2, q2, #8 \n" // 16 bit to 8 bit R
+ "vrshrn.u16 d3, q3, #8 \n" // 16 bit to 8 bit A
+ "vst4.8 {d0, d1, d2, d3}, [%2]! \n" // store 8 ARGB pixels.
+ "bgt 1b \n"
+ : "+r"(src_argb0), // %0
+ "+r"(src_argb1), // %1
+ "+r"(dst_argb), // %2
+ "+r"(width) // %3
+ :
+ : "cc", "memory", "q0", "q1", "q2", "q3");
+}
+
+// Add 2 rows of ARGB pixels together, 8 pixels at a time.
+void ARGBAddRow_NEON(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
+ int width) {
+ asm volatile(
+ // 8 pixel loop.
+ "1: \n"
+ "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels.
+ "vld4.8 {d4, d5, d6, d7}, [%1]! \n" // load 8 more ARGB
+ "subs %3, %3, #8 \n" // 8 processed per loop.
+ "vqadd.u8 q0, q0, q2 \n" // add B, G
+ "vqadd.u8 q1, q1, q3 \n" // add R, A
+ "vst4.8 {d0, d1, d2, d3}, [%2]! \n" // store 8 ARGB pixels.
+ "bgt 1b \n"
+ : "+r"(src_argb0), // %0
+ "+r"(src_argb1), // %1
+ "+r"(dst_argb), // %2
+ "+r"(width) // %3
+ :
+ : "cc", "memory", "q0", "q1", "q2", "q3");
+}
+
+// Subtract 2 rows of ARGB pixels, 8 pixels at a time.
+void ARGBSubtractRow_NEON(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
+ int width) {
+ asm volatile(
+ // 8 pixel loop.
+ "1: \n"
+ "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels.
+ "vld4.8 {d4, d5, d6, d7}, [%1]! \n" // load 8 more ARGB
+ "subs %3, %3, #8 \n" // 8 processed per loop.
+ "vqsub.u8 q0, q0, q2 \n" // subtract B, G
+ "vqsub.u8 q1, q1, q3 \n" // subtract R, A
+ "vst4.8 {d0, d1, d2, d3}, [%2]! \n" // store 8 ARGB pixels.
+ "bgt 1b \n"
+ : "+r"(src_argb0), // %0
+ "+r"(src_argb1), // %1
+ "+r"(dst_argb), // %2
+ "+r"(width) // %3
+ :
+ : "cc", "memory", "q0", "q1", "q2", "q3");
+}
+
+// Adds Sobel X and Sobel Y and stores Sobel into ARGB.
+// A = 255
+// R = Sobel
+// G = Sobel
+// B = Sobel
+void SobelRow_NEON(const uint8_t* src_sobelx,
+ const uint8_t* src_sobely,
+ uint8_t* dst_argb,
+ int width) {
+ asm volatile(
+ "vmov.u8 d3, #255 \n" // alpha
+ // 8 pixel loop.
+ "1: \n"
+ "vld1.8 {d0}, [%0]! \n" // load 8 sobelx.
+ "vld1.8 {d1}, [%1]! \n" // load 8 sobely.
+ "subs %3, %3, #8 \n" // 8 processed per loop.
+ "vqadd.u8 d0, d0, d1 \n" // add
+ "vmov.u8 d1, d0 \n"
+ "vmov.u8 d2, d0 \n"
+ "vst4.8 {d0, d1, d2, d3}, [%2]! \n" // store 8 ARGB pixels.
+ "bgt 1b \n"
+ : "+r"(src_sobelx), // %0
+ "+r"(src_sobely), // %1
+ "+r"(dst_argb), // %2
+ "+r"(width) // %3
+ :
+ : "cc", "memory", "q0", "q1");
+}
+
+// Adds Sobel X and Sobel Y and stores Sobel into plane.
+void SobelToPlaneRow_NEON(const uint8_t* src_sobelx,
+ const uint8_t* src_sobely,
+ uint8_t* dst_y,
+ int width) {
+ asm volatile(
+ // 16 pixel loop.
+ "1: \n"
+ "vld1.8 {q0}, [%0]! \n" // load 16 sobelx.
+ "vld1.8 {q1}, [%1]! \n" // load 16 sobely.
+ "subs %3, %3, #16 \n" // 16 processed per loop.
+ "vqadd.u8 q0, q0, q1 \n" // add
+ "vst1.8 {q0}, [%2]! \n" // store 16 pixels.
+ "bgt 1b \n"
+ : "+r"(src_sobelx), // %0
+ "+r"(src_sobely), // %1
+ "+r"(dst_y), // %2
+ "+r"(width) // %3
+ :
+ : "cc", "memory", "q0", "q1");
+}
+
+// Mixes Sobel X, Sobel Y and Sobel into ARGB.
+// A = 255
+// R = Sobel X
+// G = Sobel
+// B = Sobel Y
+void SobelXYRow_NEON(const uint8_t* src_sobelx,
+ const uint8_t* src_sobely,
+ uint8_t* dst_argb,
+ int width) {
+ asm volatile(
+ "vmov.u8 d3, #255 \n" // alpha
+ // 8 pixel loop.
+ "1: \n"
+ "vld1.8 {d2}, [%0]! \n" // load 8 sobelx.
+ "vld1.8 {d0}, [%1]! \n" // load 8 sobely.
+ "subs %3, %3, #8 \n" // 8 processed per loop.
+ "vqadd.u8 d1, d0, d2 \n" // add
+ "vst4.8 {d0, d1, d2, d3}, [%2]! \n" // store 8 ARGB pixels.
+ "bgt 1b \n"
+ : "+r"(src_sobelx), // %0
+ "+r"(src_sobely), // %1
+ "+r"(dst_argb), // %2
+ "+r"(width) // %3
+ :
+ : "cc", "memory", "q0", "q1");
+}
+
+// SobelX as a matrix is
+// -1 0 1
+// -2 0 2
+// -1 0 1
+void SobelXRow_NEON(const uint8_t* src_y0,
+ const uint8_t* src_y1,
+ const uint8_t* src_y2,
+ uint8_t* dst_sobelx,
+ int width) {
+ asm volatile(
+ "1: \n"
+ "vld1.8 {d0}, [%0],%5 \n" // top
+ "vld1.8 {d1}, [%0],%6 \n"
+ "vsubl.u8 q0, d0, d1 \n"
+ "vld1.8 {d2}, [%1],%5 \n" // center * 2
+ "vld1.8 {d3}, [%1],%6 \n"
+ "vsubl.u8 q1, d2, d3 \n"
+ "vadd.s16 q0, q0, q1 \n"
+ "vadd.s16 q0, q0, q1 \n"
+ "vld1.8 {d2}, [%2],%5 \n" // bottom
+ "vld1.8 {d3}, [%2],%6 \n"
+ "subs %4, %4, #8 \n" // 8 pixels
+ "vsubl.u8 q1, d2, d3 \n"
+ "vadd.s16 q0, q0, q1 \n"
+ "vabs.s16 q0, q0 \n"
+ "vqmovn.u16 d0, q0 \n"
+ "vst1.8 {d0}, [%3]! \n" // store 8 sobelx
+ "bgt 1b \n"
+ : "+r"(src_y0), // %0
+ "+r"(src_y1), // %1
+ "+r"(src_y2), // %2
+ "+r"(dst_sobelx), // %3
+ "+r"(width) // %4
+ : "r"(2), // %5
+ "r"(6) // %6
+ : "cc", "memory", "q0", "q1" // Clobber List
+ );
+}
+
+// SobelY as a matrix is
+// -1 -2 -1
+// 0 0 0
+// 1 2 1
+void SobelYRow_NEON(const uint8_t* src_y0,
+ const uint8_t* src_y1,
+ uint8_t* dst_sobely,
+ int width) {
+ asm volatile(
+ "1: \n"
+ "vld1.8 {d0}, [%0],%4 \n" // left
+ "vld1.8 {d1}, [%1],%4 \n"
+ "vsubl.u8 q0, d0, d1 \n"
+ "vld1.8 {d2}, [%0],%4 \n" // center * 2
+ "vld1.8 {d3}, [%1],%4 \n"
+ "vsubl.u8 q1, d2, d3 \n"
+ "vadd.s16 q0, q0, q1 \n"
+ "vadd.s16 q0, q0, q1 \n"
+ "vld1.8 {d2}, [%0],%5 \n" // right
+ "vld1.8 {d3}, [%1],%5 \n"
+ "subs %3, %3, #8 \n" // 8 pixels
+ "vsubl.u8 q1, d2, d3 \n"
+ "vadd.s16 q0, q0, q1 \n"
+ "vabs.s16 q0, q0 \n"
+ "vqmovn.u16 d0, q0 \n"
+ "vst1.8 {d0}, [%2]! \n" // store 8 sobely
+ "bgt 1b \n"
+ : "+r"(src_y0), // %0
+ "+r"(src_y1), // %1
+ "+r"(dst_sobely), // %2
+ "+r"(width) // %3
+ : "r"(1), // %4
+ "r"(6) // %5
+ : "cc", "memory", "q0", "q1" // Clobber List
+ );
+}
+
+// %y passes a float as a scalar vector for vector * scalar multiply.
+// the regoster must be d0 to d15 and indexed with [0] or [1] to access
+// the float in the first or second float of the d-reg
+
+void HalfFloat1Row_NEON(const uint16_t* src,
+ uint16_t* dst,
+ float /*unused*/,
+ int width) {
+ asm volatile(
+
+ "1: \n"
+ "vld1.8 {q1}, [%0]! \n" // load 8 shorts
+ "subs %2, %2, #8 \n" // 8 pixels per loop
+ "vmovl.u16 q2, d2 \n" // 8 int's
+ "vmovl.u16 q3, d3 \n"
+ "vcvt.f32.u32 q2, q2 \n" // 8 floats
+ "vcvt.f32.u32 q3, q3 \n"
+ "vmul.f32 q2, q2, %y3 \n" // adjust exponent
+ "vmul.f32 q3, q3, %y3 \n"
+ "vqshrn.u32 d2, q2, #13 \n" // isolate halffloat
+ "vqshrn.u32 d3, q3, #13 \n"
+ "vst1.8 {q1}, [%1]! \n"
+ "bgt 1b \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width) // %2
+ : "w"(1.9259299444e-34f) // %3
+ : "cc", "memory", "q1", "q2", "q3");
+}
+
+void HalfFloatRow_NEON(const uint16_t* src,
+ uint16_t* dst,
+ float scale,
+ int width) {
+ asm volatile(
+
+ "1: \n"
+ "vld1.8 {q1}, [%0]! \n" // load 8 shorts
+ "subs %2, %2, #8 \n" // 8 pixels per loop
+ "vmovl.u16 q2, d2 \n" // 8 int's
+ "vmovl.u16 q3, d3 \n"
+ "vcvt.f32.u32 q2, q2 \n" // 8 floats
+ "vcvt.f32.u32 q3, q3 \n"
+ "vmul.f32 q2, q2, %y3 \n" // adjust exponent
+ "vmul.f32 q3, q3, %y3 \n"
+ "vqshrn.u32 d2, q2, #13 \n" // isolate halffloat
+ "vqshrn.u32 d3, q3, #13 \n"
+ "vst1.8 {q1}, [%1]! \n"
+ "bgt 1b \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width) // %2
+ : "w"(scale * 1.9259299444e-34f) // %3
+ : "cc", "memory", "q1", "q2", "q3");
+}
+
+void ByteToFloatRow_NEON(const uint8_t* src,
+ float* dst,
+ float scale,
+ int width) {
+ asm volatile(
+
+ "1: \n"
+ "vld1.8 {d2}, [%0]! \n" // load 8 bytes
+ "subs %2, %2, #8 \n" // 8 pixels per loop
+ "vmovl.u8 q1, d2 \n" // 8 shorts
+ "vmovl.u16 q2, d2 \n" // 8 ints
+ "vmovl.u16 q3, d3 \n"
+ "vcvt.f32.u32 q2, q2 \n" // 8 floats
+ "vcvt.f32.u32 q3, q3 \n"
+ "vmul.f32 q2, q2, %y3 \n" // scale
+ "vmul.f32 q3, q3, %y3 \n"
+ "vst1.8 {q2, q3}, [%1]! \n" // store 8 floats
+ "bgt 1b \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width) // %2
+ : "w"(scale) // %3
+ : "cc", "memory", "q1", "q2", "q3");
+}
+
+#endif // !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__)..
+
+#ifdef __cplusplus
+} // extern "C"
+} // namespace libyuv
+#endif
diff --git a/media/libvpx/libvpx/third_party/libyuv/source/row_neon64.cc b/media/libvpx/libvpx/third_party/libyuv/source/row_neon64.cc
new file mode 100644
index 0000000000..24b4520bab
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/libyuv/source/row_neon64.cc
@@ -0,0 +1,2884 @@
+/*
+ * Copyright 2014 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/row.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+// This module is for GCC Neon armv8 64 bit.
+#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
+
+// Read 8 Y, 4 U and 4 V from 422
+#define READYUV422 \
+ "ld1 {v0.8b}, [%0], #8 \n" \
+ "ld1 {v1.s}[0], [%1], #4 \n" \
+ "ld1 {v1.s}[1], [%2], #4 \n"
+
+// Read 8 Y, 8 U and 8 V from 444
+#define READYUV444 \
+ "ld1 {v0.8b}, [%0], #8 \n" \
+ "ld1 {v1.d}[0], [%1], #8 \n" \
+ "ld1 {v1.d}[1], [%2], #8 \n" \
+ "uaddlp v1.8h, v1.16b \n" \
+ "rshrn v1.8b, v1.8h, #1 \n"
+
+// Read 8 Y, and set 4 U and 4 V to 128
+#define READYUV400 \
+ "ld1 {v0.8b}, [%0], #8 \n" \
+ "movi v1.8b , #128 \n"
+
+// Read 8 Y and 4 UV from NV12
+#define READNV12 \
+ "ld1 {v0.8b}, [%0], #8 \n" \
+ "ld1 {v2.8b}, [%1], #8 \n" \
+ "uzp1 v1.8b, v2.8b, v2.8b \n" \
+ "uzp2 v3.8b, v2.8b, v2.8b \n" \
+ "ins v1.s[1], v3.s[0] \n"
+
+// Read 8 Y and 4 VU from NV21
+#define READNV21 \
+ "ld1 {v0.8b}, [%0], #8 \n" \
+ "ld1 {v2.8b}, [%1], #8 \n" \
+ "uzp1 v3.8b, v2.8b, v2.8b \n" \
+ "uzp2 v1.8b, v2.8b, v2.8b \n" \
+ "ins v1.s[1], v3.s[0] \n"
+
+// Read 8 YUY2
+#define READYUY2 \
+ "ld2 {v0.8b, v1.8b}, [%0], #16 \n" \
+ "uzp2 v3.8b, v1.8b, v1.8b \n" \
+ "uzp1 v1.8b, v1.8b, v1.8b \n" \
+ "ins v1.s[1], v3.s[0] \n"
+
+// Read 8 UYVY
+#define READUYVY \
+ "ld2 {v2.8b, v3.8b}, [%0], #16 \n" \
+ "orr v0.8b, v3.8b, v3.8b \n" \
+ "uzp1 v1.8b, v2.8b, v2.8b \n" \
+ "uzp2 v3.8b, v2.8b, v2.8b \n" \
+ "ins v1.s[1], v3.s[0] \n"
+
+#define YUVTORGB_SETUP \
+ "ld1r {v24.8h}, [%[kUVBiasBGR]], #2 \n" \
+ "ld1r {v25.8h}, [%[kUVBiasBGR]], #2 \n" \
+ "ld1r {v26.8h}, [%[kUVBiasBGR]] \n" \
+ "ld1r {v31.4s}, [%[kYToRgb]] \n" \
+ "ld2 {v27.8h, v28.8h}, [%[kUVToRB]] \n" \
+ "ld2 {v29.8h, v30.8h}, [%[kUVToG]] \n"
+
+#define YUVTORGB(vR, vG, vB) \
+ "uxtl v0.8h, v0.8b \n" /* Extract Y */ \
+ "shll v2.8h, v1.8b, #8 \n" /* Replicate UV */ \
+ "ushll2 v3.4s, v0.8h, #0 \n" /* Y */ \
+ "ushll v0.4s, v0.4h, #0 \n" \
+ "mul v3.4s, v3.4s, v31.4s \n" \
+ "mul v0.4s, v0.4s, v31.4s \n" \
+ "sqshrun v0.4h, v0.4s, #16 \n" \
+ "sqshrun2 v0.8h, v3.4s, #16 \n" /* Y */ \
+ "uaddw v1.8h, v2.8h, v1.8b \n" /* Replicate UV */ \
+ "mov v2.d[0], v1.d[1] \n" /* Extract V */ \
+ "uxtl v2.8h, v2.8b \n" \
+ "uxtl v1.8h, v1.8b \n" /* Extract U */ \
+ "mul v3.8h, v1.8h, v27.8h \n" \
+ "mul v5.8h, v1.8h, v29.8h \n" \
+ "mul v6.8h, v2.8h, v30.8h \n" \
+ "mul v7.8h, v2.8h, v28.8h \n" \
+ "sqadd v6.8h, v6.8h, v5.8h \n" \
+ "sqadd " #vB \
+ ".8h, v24.8h, v0.8h \n" /* B */ \
+ "sqadd " #vG \
+ ".8h, v25.8h, v0.8h \n" /* G */ \
+ "sqadd " #vR \
+ ".8h, v26.8h, v0.8h \n" /* R */ \
+ "sqadd " #vB ".8h, " #vB \
+ ".8h, v3.8h \n" /* B */ \
+ "sqsub " #vG ".8h, " #vG \
+ ".8h, v6.8h \n" /* G */ \
+ "sqadd " #vR ".8h, " #vR \
+ ".8h, v7.8h \n" /* R */ \
+ "sqshrun " #vB ".8b, " #vB \
+ ".8h, #6 \n" /* B */ \
+ "sqshrun " #vG ".8b, " #vG \
+ ".8h, #6 \n" /* G */ \
+ "sqshrun " #vR ".8b, " #vR ".8h, #6 \n" /* R */
+
+void I444ToARGBRow_NEON(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ asm volatile (
+ YUVTORGB_SETUP
+ "movi v23.8b, #255 \n" /* A */
+ "1: \n"
+ READYUV444
+ YUVTORGB(v22, v21, v20)
+ "subs %w4, %w4, #8 \n"
+ "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n"
+ "b.gt 1b \n"
+ : "+r"(src_y), // %0
+ "+r"(src_u), // %1
+ "+r"(src_v), // %2
+ "+r"(dst_argb), // %3
+ "+r"(width) // %4
+ : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+ [kUVToG]"r"(&yuvconstants->kUVToG),
+ [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+ [kYToRgb]"r"(&yuvconstants->kYToRgb)
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
+ "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
+ );
+}
+
+void I422ToARGBRow_NEON(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ asm volatile (
+ YUVTORGB_SETUP
+ "movi v23.8b, #255 \n" /* A */
+ "1: \n"
+ READYUV422
+ YUVTORGB(v22, v21, v20)
+ "subs %w4, %w4, #8 \n"
+ "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n"
+ "b.gt 1b \n"
+ : "+r"(src_y), // %0
+ "+r"(src_u), // %1
+ "+r"(src_v), // %2
+ "+r"(dst_argb), // %3
+ "+r"(width) // %4
+ : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+ [kUVToG]"r"(&yuvconstants->kUVToG),
+ [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+ [kYToRgb]"r"(&yuvconstants->kYToRgb)
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
+ "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
+ );
+}
+
+void I422AlphaToARGBRow_NEON(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ const uint8_t* src_a,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ asm volatile (
+ YUVTORGB_SETUP
+ "1: \n"
+ READYUV422
+ YUVTORGB(v22, v21, v20)
+ "ld1 {v23.8b}, [%3], #8 \n"
+ "subs %w5, %w5, #8 \n"
+ "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%4], #32 \n"
+ "b.gt 1b \n"
+ : "+r"(src_y), // %0
+ "+r"(src_u), // %1
+ "+r"(src_v), // %2
+ "+r"(src_a), // %3
+ "+r"(dst_argb), // %4
+ "+r"(width) // %5
+ : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+ [kUVToG]"r"(&yuvconstants->kUVToG),
+ [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+ [kYToRgb]"r"(&yuvconstants->kYToRgb)
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
+ "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
+ );
+}
+
+void I422ToRGBARow_NEON(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_rgba,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ asm volatile (
+ YUVTORGB_SETUP
+ "movi v20.8b, #255 \n" /* A */
+ "1: \n"
+ READYUV422
+ YUVTORGB(v23, v22, v21)
+ "subs %w4, %w4, #8 \n"
+ "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n"
+ "b.gt 1b \n"
+ : "+r"(src_y), // %0
+ "+r"(src_u), // %1
+ "+r"(src_v), // %2
+ "+r"(dst_rgba), // %3
+ "+r"(width) // %4
+ : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+ [kUVToG]"r"(&yuvconstants->kUVToG),
+ [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+ [kYToRgb]"r"(&yuvconstants->kYToRgb)
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
+ "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
+ );
+}
+
+void I422ToRGB24Row_NEON(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_rgb24,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ asm volatile (
+ YUVTORGB_SETUP
+ "1: \n"
+ READYUV422
+ YUVTORGB(v22, v21, v20)
+ "subs %w4, %w4, #8 \n"
+ "st3 {v20.8b,v21.8b,v22.8b}, [%3], #24 \n"
+ "b.gt 1b \n"
+ : "+r"(src_y), // %0
+ "+r"(src_u), // %1
+ "+r"(src_v), // %2
+ "+r"(dst_rgb24), // %3
+ "+r"(width) // %4
+ : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+ [kUVToG]"r"(&yuvconstants->kUVToG),
+ [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+ [kYToRgb]"r"(&yuvconstants->kYToRgb)
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
+ "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
+ );
+}
+
+#define ARGBTORGB565 \
+ "shll v0.8h, v22.8b, #8 \n" /* R */ \
+ "shll v21.8h, v21.8b, #8 \n" /* G */ \
+ "shll v20.8h, v20.8b, #8 \n" /* B */ \
+ "sri v0.8h, v21.8h, #5 \n" /* RG */ \
+ "sri v0.8h, v20.8h, #11 \n" /* RGB */
+
+void I422ToRGB565Row_NEON(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_rgb565,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ asm volatile(
+ YUVTORGB_SETUP
+ "1: \n" READYUV422 YUVTORGB(
+ v22, v21,
+ v20) "subs %w4, %w4, #8 \n" ARGBTORGB565
+ "st1 {v0.8h}, [%3], #16 \n" // store 8 pixels
+ // RGB565.
+ "b.gt 1b \n"
+ : "+r"(src_y), // %0
+ "+r"(src_u), // %1
+ "+r"(src_v), // %2
+ "+r"(dst_rgb565), // %3
+ "+r"(width) // %4
+ : [kUVToRB] "r"(&yuvconstants->kUVToRB),
+ [kUVToG] "r"(&yuvconstants->kUVToG),
+ [kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR),
+ [kYToRgb] "r"(&yuvconstants->kYToRgb)
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
+ "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30");
+}
+
+#define ARGBTOARGB1555 \
+ "shll v0.8h, v23.8b, #8 \n" /* A */ \
+ "shll v22.8h, v22.8b, #8 \n" /* R */ \
+ "shll v21.8h, v21.8b, #8 \n" /* G */ \
+ "shll v20.8h, v20.8b, #8 \n" /* B */ \
+ "sri v0.8h, v22.8h, #1 \n" /* AR */ \
+ "sri v0.8h, v21.8h, #6 \n" /* ARG */ \
+ "sri v0.8h, v20.8h, #11 \n" /* ARGB */
+
+void I422ToARGB1555Row_NEON(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb1555,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ asm volatile(
+ YUVTORGB_SETUP
+ "movi v23.8b, #255 \n"
+ "1: \n" READYUV422 YUVTORGB(
+ v22, v21,
+ v20) "subs %w4, %w4, #8 \n" ARGBTOARGB1555
+ "st1 {v0.8h}, [%3], #16 \n" // store 8 pixels
+ // RGB565.
+ "b.gt 1b \n"
+ : "+r"(src_y), // %0
+ "+r"(src_u), // %1
+ "+r"(src_v), // %2
+ "+r"(dst_argb1555), // %3
+ "+r"(width) // %4
+ : [kUVToRB] "r"(&yuvconstants->kUVToRB),
+ [kUVToG] "r"(&yuvconstants->kUVToG),
+ [kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR),
+ [kYToRgb] "r"(&yuvconstants->kYToRgb)
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
+ "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30");
+}
+
+#define ARGBTOARGB4444 \
+ /* Input v20.8b<=B, v21.8b<=G, v22.8b<=R, v23.8b<=A, v4.8b<=0x0f */ \
+ "ushr v20.8b, v20.8b, #4 \n" /* B */ \
+ "bic v21.8b, v21.8b, v4.8b \n" /* G */ \
+ "ushr v22.8b, v22.8b, #4 \n" /* R */ \
+ "bic v23.8b, v23.8b, v4.8b \n" /* A */ \
+ "orr v0.8b, v20.8b, v21.8b \n" /* BG */ \
+ "orr v1.8b, v22.8b, v23.8b \n" /* RA */ \
+ "zip1 v0.16b, v0.16b, v1.16b \n" /* BGRA */
+
+void I422ToARGB4444Row_NEON(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb4444,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ asm volatile (
+ YUVTORGB_SETUP
+ "movi v4.16b, #0x0f \n" // bits to clear with vbic.
+ "1: \n"
+ READYUV422
+ YUVTORGB(v22, v21, v20)
+ "subs %w4, %w4, #8 \n"
+ "movi v23.8b, #255 \n"
+ ARGBTOARGB4444
+ "st1 {v0.8h}, [%3], #16 \n" // store 8 pixels ARGB4444.
+ "b.gt 1b \n"
+ : "+r"(src_y), // %0
+ "+r"(src_u), // %1
+ "+r"(src_v), // %2
+ "+r"(dst_argb4444), // %3
+ "+r"(width) // %4
+ : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+ [kUVToG]"r"(&yuvconstants->kUVToG),
+ [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+ [kYToRgb]"r"(&yuvconstants->kYToRgb)
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
+ "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
+ );
+}
+
+void I400ToARGBRow_NEON(const uint8_t* src_y, uint8_t* dst_argb, int width) {
+ asm volatile (
+ YUVTORGB_SETUP
+ "movi v23.8b, #255 \n"
+ "1: \n"
+ READYUV400
+ YUVTORGB(v22, v21, v20)
+ "subs %w2, %w2, #8 \n"
+ "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], #32 \n"
+ "b.gt 1b \n"
+ : "+r"(src_y), // %0
+ "+r"(dst_argb), // %1
+ "+r"(width) // %2
+ : [kUVToRB]"r"(&kYuvI601Constants.kUVToRB),
+ [kUVToG]"r"(&kYuvI601Constants.kUVToG),
+ [kUVBiasBGR]"r"(&kYuvI601Constants.kUVBiasBGR),
+ [kYToRgb]"r"(&kYuvI601Constants.kYToRgb)
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
+ "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
+ );
+}
+
+void J400ToARGBRow_NEON(const uint8_t* src_y, uint8_t* dst_argb, int width) {
+ asm volatile(
+ "movi v23.8b, #255 \n"
+ "1: \n"
+ "ld1 {v20.8b}, [%0], #8 \n"
+ "orr v21.8b, v20.8b, v20.8b \n"
+ "orr v22.8b, v20.8b, v20.8b \n"
+ "subs %w2, %w2, #8 \n"
+ "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], #32 \n"
+ "b.gt 1b \n"
+ : "+r"(src_y), // %0
+ "+r"(dst_argb), // %1
+ "+r"(width) // %2
+ :
+ : "cc", "memory", "v20", "v21", "v22", "v23");
+}
+
+void NV12ToARGBRow_NEON(const uint8_t* src_y,
+ const uint8_t* src_uv,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ asm volatile (
+ YUVTORGB_SETUP
+ "movi v23.8b, #255 \n"
+ "1: \n"
+ READNV12
+ YUVTORGB(v22, v21, v20)
+ "subs %w3, %w3, #8 \n"
+ "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%2], #32 \n"
+ "b.gt 1b \n"
+ : "+r"(src_y), // %0
+ "+r"(src_uv), // %1
+ "+r"(dst_argb), // %2
+ "+r"(width) // %3
+ : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+ [kUVToG]"r"(&yuvconstants->kUVToG),
+ [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+ [kYToRgb]"r"(&yuvconstants->kYToRgb)
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
+ "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
+ );
+}
+
+void NV21ToARGBRow_NEON(const uint8_t* src_y,
+ const uint8_t* src_vu,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ asm volatile (
+ YUVTORGB_SETUP
+ "movi v23.8b, #255 \n"
+ "1: \n"
+ READNV21
+ YUVTORGB(v22, v21, v20)
+ "subs %w3, %w3, #8 \n"
+ "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%2], #32 \n"
+ "b.gt 1b \n"
+ : "+r"(src_y), // %0
+ "+r"(src_vu), // %1
+ "+r"(dst_argb), // %2
+ "+r"(width) // %3
+ : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+ [kUVToG]"r"(&yuvconstants->kUVToG),
+ [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+ [kYToRgb]"r"(&yuvconstants->kYToRgb)
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
+ "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
+ );
+}
+
+void NV12ToRGB24Row_NEON(const uint8_t* src_y,
+ const uint8_t* src_uv,
+ uint8_t* dst_rgb24,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ asm volatile (
+ YUVTORGB_SETUP
+ "1: \n"
+ READNV12
+ YUVTORGB(v22, v21, v20)
+ "subs %w3, %w3, #8 \n"
+ "st3 {v20.8b,v21.8b,v22.8b}, [%2], #24 \n"
+ "b.gt 1b \n"
+ : "+r"(src_y), // %0
+ "+r"(src_uv), // %1
+ "+r"(dst_rgb24), // %2
+ "+r"(width) // %3
+ : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+ [kUVToG]"r"(&yuvconstants->kUVToG),
+ [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+ [kYToRgb]"r"(&yuvconstants->kYToRgb)
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
+ "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
+ );
+}
+
+void NV21ToRGB24Row_NEON(const uint8_t* src_y,
+ const uint8_t* src_vu,
+ uint8_t* dst_rgb24,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ asm volatile (
+ YUVTORGB_SETUP
+ "1: \n"
+ READNV21
+ YUVTORGB(v22, v21, v20)
+ "subs %w3, %w3, #8 \n"
+ "st3 {v20.8b,v21.8b,v22.8b}, [%2], #24 \n"
+ "b.gt 1b \n"
+ : "+r"(src_y), // %0
+ "+r"(src_vu), // %1
+ "+r"(dst_rgb24), // %2
+ "+r"(width) // %3
+ : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+ [kUVToG]"r"(&yuvconstants->kUVToG),
+ [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+ [kYToRgb]"r"(&yuvconstants->kYToRgb)
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
+ "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
+ );
+}
+
+void NV12ToRGB565Row_NEON(const uint8_t* src_y,
+ const uint8_t* src_uv,
+ uint8_t* dst_rgb565,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ asm volatile(
+ YUVTORGB_SETUP
+ "1: \n" READNV12 YUVTORGB(
+ v22, v21,
+ v20) "subs %w3, %w3, #8 \n" ARGBTORGB565
+ "st1 {v0.8h}, [%2], 16 \n" // store 8 pixels
+ // RGB565.
+ "b.gt 1b \n"
+ : "+r"(src_y), // %0
+ "+r"(src_uv), // %1
+ "+r"(dst_rgb565), // %2
+ "+r"(width) // %3
+ : [kUVToRB] "r"(&yuvconstants->kUVToRB),
+ [kUVToG] "r"(&yuvconstants->kUVToG),
+ [kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR),
+ [kYToRgb] "r"(&yuvconstants->kYToRgb)
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
+ "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30");
+}
+
+void YUY2ToARGBRow_NEON(const uint8_t* src_yuy2,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ asm volatile (
+ YUVTORGB_SETUP
+ "movi v23.8b, #255 \n"
+ "1: \n"
+ READYUY2
+ YUVTORGB(v22, v21, v20)
+ "subs %w2, %w2, #8 \n"
+ "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], #32 \n"
+ "b.gt 1b \n"
+ : "+r"(src_yuy2), // %0
+ "+r"(dst_argb), // %1
+ "+r"(width) // %2
+ : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+ [kUVToG]"r"(&yuvconstants->kUVToG),
+ [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+ [kYToRgb]"r"(&yuvconstants->kYToRgb)
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
+ "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
+ );
+}
+
+void UYVYToARGBRow_NEON(const uint8_t* src_uyvy,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ asm volatile (
+ YUVTORGB_SETUP
+ "movi v23.8b, #255 \n"
+ "1: \n"
+ READUYVY
+ YUVTORGB(v22, v21, v20)
+ "subs %w2, %w2, #8 \n"
+ "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], 32 \n"
+ "b.gt 1b \n"
+ : "+r"(src_uyvy), // %0
+ "+r"(dst_argb), // %1
+ "+r"(width) // %2
+ : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+ [kUVToG]"r"(&yuvconstants->kUVToG),
+ [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+ [kYToRgb]"r"(&yuvconstants->kYToRgb)
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
+ "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
+ );
+}
+
+// Reads 16 pairs of UV and write even values to dst_u and odd to dst_v.
+void SplitUVRow_NEON(const uint8_t* src_uv,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ asm volatile(
+ "1: \n"
+ "ld2 {v0.16b,v1.16b}, [%0], #32 \n" // load 16 pairs of UV
+ "subs %w3, %w3, #16 \n" // 16 processed per loop
+ "st1 {v0.16b}, [%1], #16 \n" // store U
+ "st1 {v1.16b}, [%2], #16 \n" // store V
+ "b.gt 1b \n"
+ : "+r"(src_uv), // %0
+ "+r"(dst_u), // %1
+ "+r"(dst_v), // %2
+ "+r"(width) // %3 // Output registers
+ : // Input registers
+ : "cc", "memory", "v0", "v1" // Clobber List
+ );
+}
+
+// Reads 16 U's and V's and writes out 16 pairs of UV.
+void MergeUVRow_NEON(const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_uv,
+ int width) {
+ asm volatile(
+ "1: \n"
+ "ld1 {v0.16b}, [%0], #16 \n" // load U
+ "ld1 {v1.16b}, [%1], #16 \n" // load V
+ "subs %w3, %w3, #16 \n" // 16 processed per loop
+ "st2 {v0.16b,v1.16b}, [%2], #32 \n" // store 16 pairs of UV
+ "b.gt 1b \n"
+ : "+r"(src_u), // %0
+ "+r"(src_v), // %1
+ "+r"(dst_uv), // %2
+ "+r"(width) // %3 // Output registers
+ : // Input registers
+ : "cc", "memory", "v0", "v1" // Clobber List
+ );
+}
+
+// Reads 16 packed RGB and write to planar dst_r, dst_g, dst_b.
+void SplitRGBRow_NEON(const uint8_t* src_rgb,
+ uint8_t* dst_r,
+ uint8_t* dst_g,
+ uint8_t* dst_b,
+ int width) {
+ asm volatile(
+ "1: \n"
+ "ld3 {v0.16b,v1.16b,v2.16b}, [%0], #48 \n" // load 16 RGB
+ "subs %w4, %w4, #16 \n" // 16 processed per loop
+ "st1 {v0.16b}, [%1], #16 \n" // store R
+ "st1 {v1.16b}, [%2], #16 \n" // store G
+ "st1 {v2.16b}, [%3], #16 \n" // store B
+ "b.gt 1b \n"
+ : "+r"(src_rgb), // %0
+ "+r"(dst_r), // %1
+ "+r"(dst_g), // %2
+ "+r"(dst_b), // %3
+ "+r"(width) // %4
+ : // Input registers
+ : "cc", "memory", "v0", "v1", "v2" // Clobber List
+ );
+}
+
+// Reads 16 planar R's, G's and B's and writes out 16 packed RGB at a time
+void MergeRGBRow_NEON(const uint8_t* src_r,
+ const uint8_t* src_g,
+ const uint8_t* src_b,
+ uint8_t* dst_rgb,
+ int width) {
+ asm volatile(
+ "1: \n"
+ "ld1 {v0.16b}, [%0], #16 \n" // load R
+ "ld1 {v1.16b}, [%1], #16 \n" // load G
+ "ld1 {v2.16b}, [%2], #16 \n" // load B
+ "subs %w4, %w4, #16 \n" // 16 processed per loop
+ "st3 {v0.16b,v1.16b,v2.16b}, [%3], #48 \n" // store 16 RGB
+ "b.gt 1b \n"
+ : "+r"(src_r), // %0
+ "+r"(src_g), // %1
+ "+r"(src_b), // %2
+ "+r"(dst_rgb), // %3
+ "+r"(width) // %4
+ : // Input registers
+ : "cc", "memory", "v0", "v1", "v2" // Clobber List
+ );
+}
+
+// Copy multiple of 32.
+void CopyRow_NEON(const uint8_t* src, uint8_t* dst, int width) {
+ asm volatile(
+ "1: \n"
+ "ldp q0, q1, [%0], #32 \n"
+ "subs %w2, %w2, #32 \n" // 32 processed per loop
+ "stp q0, q1, [%1], #32 \n"
+ "b.gt 1b \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width) // %2 // Output registers
+ : // Input registers
+ : "cc", "memory", "v0", "v1" // Clobber List
+ );
+}
+
+// SetRow writes 'width' bytes using an 8 bit value repeated.
+void SetRow_NEON(uint8_t* dst, uint8_t v8, int width) {
+ asm volatile(
+ "dup v0.16b, %w2 \n" // duplicate 16 bytes
+ "1: \n"
+ "subs %w1, %w1, #16 \n" // 16 bytes per loop
+ "st1 {v0.16b}, [%0], #16 \n" // store
+ "b.gt 1b \n"
+ : "+r"(dst), // %0
+ "+r"(width) // %1
+ : "r"(v8) // %2
+ : "cc", "memory", "v0");
+}
+
+void ARGBSetRow_NEON(uint8_t* dst, uint32_t v32, int width) {
+ asm volatile(
+ "dup v0.4s, %w2 \n" // duplicate 4 ints
+ "1: \n"
+ "subs %w1, %w1, #4 \n" // 4 ints per loop
+ "st1 {v0.16b}, [%0], #16 \n" // store
+ "b.gt 1b \n"
+ : "+r"(dst), // %0
+ "+r"(width) // %1
+ : "r"(v32) // %2
+ : "cc", "memory", "v0");
+}
+
+void MirrorRow_NEON(const uint8_t* src, uint8_t* dst, int width) {
+ asm volatile(
+ // Start at end of source row.
+ "add %0, %0, %w2, sxtw \n"
+ "sub %0, %0, #16 \n"
+ "1: \n"
+ "ld1 {v0.16b}, [%0], %3 \n" // src -= 16
+ "subs %w2, %w2, #16 \n" // 16 pixels per loop.
+ "rev64 v0.16b, v0.16b \n"
+ "st1 {v0.D}[1], [%1], #8 \n" // dst += 16
+ "st1 {v0.D}[0], [%1], #8 \n"
+ "b.gt 1b \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width) // %2
+ : "r"((ptrdiff_t)-16) // %3
+ : "cc", "memory", "v0");
+}
+
+void MirrorUVRow_NEON(const uint8_t* src_uv,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ asm volatile(
+ // Start at end of source row.
+ "add %0, %0, %w3, sxtw #1 \n"
+ "sub %0, %0, #16 \n"
+ "1: \n"
+ "ld2 {v0.8b, v1.8b}, [%0], %4 \n" // src -= 16
+ "subs %w3, %w3, #8 \n" // 8 pixels per loop.
+ "rev64 v0.8b, v0.8b \n"
+ "rev64 v1.8b, v1.8b \n"
+ "st1 {v0.8b}, [%1], #8 \n" // dst += 8
+ "st1 {v1.8b}, [%2], #8 \n"
+ "b.gt 1b \n"
+ : "+r"(src_uv), // %0
+ "+r"(dst_u), // %1
+ "+r"(dst_v), // %2
+ "+r"(width) // %3
+ : "r"((ptrdiff_t)-16) // %4
+ : "cc", "memory", "v0", "v1");
+}
+
+void ARGBMirrorRow_NEON(const uint8_t* src, uint8_t* dst, int width) {
+ asm volatile(
+ // Start at end of source row.
+ "add %0, %0, %w2, sxtw #2 \n"
+ "sub %0, %0, #16 \n"
+ "1: \n"
+ "ld1 {v0.16b}, [%0], %3 \n" // src -= 16
+ "subs %w2, %w2, #4 \n" // 4 pixels per loop.
+ "rev64 v0.4s, v0.4s \n"
+ "st1 {v0.D}[1], [%1], #8 \n" // dst += 16
+ "st1 {v0.D}[0], [%1], #8 \n"
+ "b.gt 1b \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width) // %2
+ : "r"((ptrdiff_t)-16) // %3
+ : "cc", "memory", "v0");
+}
+
+void RGB24ToARGBRow_NEON(const uint8_t* src_rgb24,
+ uint8_t* dst_argb,
+ int width) {
+ asm volatile(
+ "movi v4.8b, #255 \n" // Alpha
+ "1: \n"
+ "ld3 {v1.8b,v2.8b,v3.8b}, [%0], #24 \n" // load 8 pixels of RGB24.
+ "subs %w2, %w2, #8 \n" // 8 processed per loop.
+ "st4 {v1.8b,v2.8b,v3.8b,v4.8b}, [%1], #32 \n" // store 8 ARGB
+ "b.gt 1b \n"
+ : "+r"(src_rgb24), // %0
+ "+r"(dst_argb), // %1
+ "+r"(width) // %2
+ :
+ : "cc", "memory", "v1", "v2", "v3", "v4" // Clobber List
+ );
+}
+
+void RAWToARGBRow_NEON(const uint8_t* src_raw, uint8_t* dst_argb, int width) {
+ asm volatile(
+ "movi v5.8b, #255 \n" // Alpha
+ "1: \n"
+ "ld3 {v0.8b,v1.8b,v2.8b}, [%0], #24 \n" // read r g b
+ "subs %w2, %w2, #8 \n" // 8 processed per loop.
+ "orr v3.8b, v1.8b, v1.8b \n" // move g
+ "orr v4.8b, v0.8b, v0.8b \n" // move r
+ "st4 {v2.8b,v3.8b,v4.8b,v5.8b}, [%1], #32 \n" // store b g r a
+ "b.gt 1b \n"
+ : "+r"(src_raw), // %0
+ "+r"(dst_argb), // %1
+ "+r"(width) // %2
+ :
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5" // Clobber List
+ );
+}
+
+void RAWToRGB24Row_NEON(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) {
+ asm volatile(
+ "1: \n"
+ "ld3 {v0.8b,v1.8b,v2.8b}, [%0], #24 \n" // read r g b
+ "subs %w2, %w2, #8 \n" // 8 processed per loop.
+ "orr v3.8b, v1.8b, v1.8b \n" // move g
+ "orr v4.8b, v0.8b, v0.8b \n" // move r
+ "st3 {v2.8b,v3.8b,v4.8b}, [%1], #24 \n" // store b g r
+ "b.gt 1b \n"
+ : "+r"(src_raw), // %0
+ "+r"(dst_rgb24), // %1
+ "+r"(width) // %2
+ :
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4" // Clobber List
+ );
+}
+
+#define RGB565TOARGB \
+ "shrn v6.8b, v0.8h, #5 \n" /* G xxGGGGGG */ \
+ "shl v6.8b, v6.8b, #2 \n" /* G GGGGGG00 upper 6 */ \
+ "ushr v4.8b, v6.8b, #6 \n" /* G 000000GG lower 2 */ \
+ "orr v1.8b, v4.8b, v6.8b \n" /* G */ \
+ "xtn v2.8b, v0.8h \n" /* B xxxBBBBB */ \
+ "ushr v0.8h, v0.8h, #11 \n" /* R 000RRRRR */ \
+ "xtn2 v2.16b,v0.8h \n" /* R in upper part */ \
+ "shl v2.16b, v2.16b, #3 \n" /* R,B BBBBB000 upper 5 */ \
+ "ushr v0.16b, v2.16b, #5 \n" /* R,B 00000BBB lower 3 */ \
+ "orr v0.16b, v0.16b, v2.16b \n" /* R,B */ \
+ "dup v2.2D, v0.D[1] \n" /* R */
+
+void RGB565ToARGBRow_NEON(const uint8_t* src_rgb565,
+ uint8_t* dst_argb,
+ int width) {
+ asm volatile(
+ "movi v3.8b, #255 \n" // Alpha
+ "1: \n"
+ "ld1 {v0.16b}, [%0], #16 \n" // load 8 RGB565 pixels.
+ "subs %w2, %w2, #8 \n" // 8 processed per loop.
+ RGB565TOARGB
+ "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%1], #32 \n" // store 8 ARGB
+ "b.gt 1b \n"
+ : "+r"(src_rgb565), // %0
+ "+r"(dst_argb), // %1
+ "+r"(width) // %2
+ :
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v6" // Clobber List
+ );
+}
+
+#define ARGB1555TOARGB \
+ "ushr v2.8h, v0.8h, #10 \n" /* R xxxRRRRR */ \
+ "shl v2.8h, v2.8h, #3 \n" /* R RRRRR000 upper 5 */ \
+ "xtn v3.8b, v2.8h \n" /* RRRRR000 AAAAAAAA */ \
+ \
+ "sshr v2.8h, v0.8h, #15 \n" /* A AAAAAAAA */ \
+ "xtn2 v3.16b, v2.8h \n" \
+ \
+ "xtn v2.8b, v0.8h \n" /* B xxxBBBBB */ \
+ "shrn2 v2.16b,v0.8h, #5 \n" /* G xxxGGGGG */ \
+ \
+ "ushr v1.16b, v3.16b, #5 \n" /* R,A 00000RRR lower 3 */ \
+ "shl v0.16b, v2.16b, #3 \n" /* B,G BBBBB000 upper 5 */ \
+ "ushr v2.16b, v0.16b, #5 \n" /* B,G 00000BBB lower 3 */ \
+ \
+ "orr v0.16b, v0.16b, v2.16b \n" /* B,G */ \
+ "orr v2.16b, v1.16b, v3.16b \n" /* R,A */ \
+ "dup v1.2D, v0.D[1] \n" \
+ "dup v3.2D, v2.D[1] \n"
+
+// RGB555TOARGB is same as ARGB1555TOARGB but ignores alpha.
+#define RGB555TOARGB \
+ "ushr v2.8h, v0.8h, #10 \n" /* R xxxRRRRR */ \
+ "shl v2.8h, v2.8h, #3 \n" /* R RRRRR000 upper 5 */ \
+ "xtn v3.8b, v2.8h \n" /* RRRRR000 */ \
+ \
+ "xtn v2.8b, v0.8h \n" /* B xxxBBBBB */ \
+ "shrn2 v2.16b,v0.8h, #5 \n" /* G xxxGGGGG */ \
+ \
+ "ushr v1.16b, v3.16b, #5 \n" /* R 00000RRR lower 3 */ \
+ "shl v0.16b, v2.16b, #3 \n" /* B,G BBBBB000 upper 5 */ \
+ "ushr v2.16b, v0.16b, #5 \n" /* B,G 00000BBB lower 3 */ \
+ \
+ "orr v0.16b, v0.16b, v2.16b \n" /* B,G */ \
+ "orr v2.16b, v1.16b, v3.16b \n" /* R */ \
+ "dup v1.2D, v0.D[1] \n" /* G */
+
+void ARGB1555ToARGBRow_NEON(const uint8_t* src_argb1555,
+ uint8_t* dst_argb,
+ int width) {
+ asm volatile(
+ "movi v3.8b, #255 \n" // Alpha
+ "1: \n"
+ "ld1 {v0.16b}, [%0], #16 \n" // load 8 ARGB1555 pixels.
+ "subs %w2, %w2, #8 \n" // 8 processed per loop.
+ ARGB1555TOARGB
+ "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%1], #32 \n" // store 8 ARGB
+ // pixels
+ "b.gt 1b \n"
+ : "+r"(src_argb1555), // %0
+ "+r"(dst_argb), // %1
+ "+r"(width) // %2
+ :
+ : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List
+ );
+}
+
+#define ARGB4444TOARGB \
+ "shrn v1.8b, v0.8h, #8 \n" /* v1(l) AR */ \
+ "xtn2 v1.16b, v0.8h \n" /* v1(h) GB */ \
+ "shl v2.16b, v1.16b, #4 \n" /* B,R BBBB0000 */ \
+ "ushr v3.16b, v1.16b, #4 \n" /* G,A 0000GGGG */ \
+ "ushr v0.16b, v2.16b, #4 \n" /* B,R 0000BBBB */ \
+ "shl v1.16b, v3.16b, #4 \n" /* G,A GGGG0000 */ \
+ "orr v2.16b, v0.16b, v2.16b \n" /* B,R BBBBBBBB */ \
+ "orr v3.16b, v1.16b, v3.16b \n" /* G,A GGGGGGGG */ \
+ "dup v0.2D, v2.D[1] \n" \
+ "dup v1.2D, v3.D[1] \n"
+
+void ARGB4444ToARGBRow_NEON(const uint8_t* src_argb4444,
+ uint8_t* dst_argb,
+ int width) {
+ asm volatile(
+ "1: \n"
+ "ld1 {v0.16b}, [%0], #16 \n" // load 8 ARGB4444 pixels.
+ "subs %w2, %w2, #8 \n" // 8 processed per loop.
+ ARGB4444TOARGB
+ "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%1], #32 \n" // store 8 ARGB
+ // pixels
+ "b.gt 1b \n"
+ : "+r"(src_argb4444), // %0
+ "+r"(dst_argb), // %1
+ "+r"(width) // %2
+ :
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4" // Clobber List
+ );
+}
+
+void ARGBToRGB24Row_NEON(const uint8_t* src_argb,
+ uint8_t* dst_rgb24,
+ int width) {
+ asm volatile(
+ "1: \n"
+ "ld4 {v1.8b,v2.8b,v3.8b,v4.8b}, [%0], #32 \n" // load 8 ARGB
+ "subs %w2, %w2, #8 \n" // 8 processed per loop.
+ "st3 {v1.8b,v2.8b,v3.8b}, [%1], #24 \n" // store 8 pixels of
+ // RGB24.
+ "b.gt 1b \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_rgb24), // %1
+ "+r"(width) // %2
+ :
+ : "cc", "memory", "v1", "v2", "v3", "v4" // Clobber List
+ );
+}
+
+void ARGBToRAWRow_NEON(const uint8_t* src_argb, uint8_t* dst_raw, int width) {
+ asm volatile(
+ "1: \n"
+ "ld4 {v1.8b,v2.8b,v3.8b,v4.8b}, [%0], #32 \n" // load b g r a
+ "subs %w2, %w2, #8 \n" // 8 processed per loop.
+ "orr v4.8b, v2.8b, v2.8b \n" // mov g
+ "orr v5.8b, v1.8b, v1.8b \n" // mov b
+ "st3 {v3.8b,v4.8b,v5.8b}, [%1], #24 \n" // store r g b
+ "b.gt 1b \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_raw), // %1
+ "+r"(width) // %2
+ :
+ : "cc", "memory", "v1", "v2", "v3", "v4", "v5" // Clobber List
+ );
+}
+
+void YUY2ToYRow_NEON(const uint8_t* src_yuy2, uint8_t* dst_y, int width) {
+ asm volatile(
+ "1: \n"
+ "ld2 {v0.16b,v1.16b}, [%0], #32 \n" // load 16 pixels of YUY2.
+ "subs %w2, %w2, #16 \n" // 16 processed per loop.
+ "st1 {v0.16b}, [%1], #16 \n" // store 16 pixels of Y.
+ "b.gt 1b \n"
+ : "+r"(src_yuy2), // %0
+ "+r"(dst_y), // %1
+ "+r"(width) // %2
+ :
+ : "cc", "memory", "v0", "v1" // Clobber List
+ );
+}
+
+void UYVYToYRow_NEON(const uint8_t* src_uyvy, uint8_t* dst_y, int width) {
+ asm volatile(
+ "1: \n"
+ "ld2 {v0.16b,v1.16b}, [%0], #32 \n" // load 16 pixels of UYVY.
+ "subs %w2, %w2, #16 \n" // 16 processed per loop.
+ "st1 {v1.16b}, [%1], #16 \n" // store 16 pixels of Y.
+ "b.gt 1b \n"
+ : "+r"(src_uyvy), // %0
+ "+r"(dst_y), // %1
+ "+r"(width) // %2
+ :
+ : "cc", "memory", "v0", "v1" // Clobber List
+ );
+}
+
+void YUY2ToUV422Row_NEON(const uint8_t* src_yuy2,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ asm volatile(
+ "1: \n"
+ "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 16 YUY2
+ "subs %w3, %w3, #16 \n" // 16 pixels = 8 UVs.
+ "st1 {v1.8b}, [%1], #8 \n" // store 8 U.
+ "st1 {v3.8b}, [%2], #8 \n" // store 8 V.
+ "b.gt 1b \n"
+ : "+r"(src_yuy2), // %0
+ "+r"(dst_u), // %1
+ "+r"(dst_v), // %2
+ "+r"(width) // %3
+ :
+ : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List
+ );
+}
+
+void UYVYToUV422Row_NEON(const uint8_t* src_uyvy,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ asm volatile(
+ "1: \n"
+ "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 16 UYVY
+ "subs %w3, %w3, #16 \n" // 16 pixels = 8 UVs.
+ "st1 {v0.8b}, [%1], #8 \n" // store 8 U.
+ "st1 {v2.8b}, [%2], #8 \n" // store 8 V.
+ "b.gt 1b \n"
+ : "+r"(src_uyvy), // %0
+ "+r"(dst_u), // %1
+ "+r"(dst_v), // %2
+ "+r"(width) // %3
+ :
+ : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List
+ );
+}
+
+void YUY2ToUVRow_NEON(const uint8_t* src_yuy2,
+ int stride_yuy2,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ const uint8_t* src_yuy2b = src_yuy2 + stride_yuy2;
+ asm volatile(
+ "1: \n"
+ "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 16 pixels
+ "subs %w4, %w4, #16 \n" // 16 pixels = 8 UVs.
+ "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%1], #32 \n" // load next row
+ "urhadd v1.8b, v1.8b, v5.8b \n" // average rows of U
+ "urhadd v3.8b, v3.8b, v7.8b \n" // average rows of V
+ "st1 {v1.8b}, [%2], #8 \n" // store 8 U.
+ "st1 {v3.8b}, [%3], #8 \n" // store 8 V.
+ "b.gt 1b \n"
+ : "+r"(src_yuy2), // %0
+ "+r"(src_yuy2b), // %1
+ "+r"(dst_u), // %2
+ "+r"(dst_v), // %3
+ "+r"(width) // %4
+ :
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6",
+ "v7" // Clobber List
+ );
+}
+
+void UYVYToUVRow_NEON(const uint8_t* src_uyvy,
+ int stride_uyvy,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ const uint8_t* src_uyvyb = src_uyvy + stride_uyvy;
+ asm volatile(
+ "1: \n"
+ "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 16 pixels
+ "subs %w4, %w4, #16 \n" // 16 pixels = 8 UVs.
+ "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%1], #32 \n" // load next row
+ "urhadd v0.8b, v0.8b, v4.8b \n" // average rows of U
+ "urhadd v2.8b, v2.8b, v6.8b \n" // average rows of V
+ "st1 {v0.8b}, [%2], #8 \n" // store 8 U.
+ "st1 {v2.8b}, [%3], #8 \n" // store 8 V.
+ "b.gt 1b \n"
+ : "+r"(src_uyvy), // %0
+ "+r"(src_uyvyb), // %1
+ "+r"(dst_u), // %2
+ "+r"(dst_v), // %3
+ "+r"(width) // %4
+ :
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6",
+ "v7" // Clobber List
+ );
+}
+
+// For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA.
+void ARGBShuffleRow_NEON(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ const uint8_t* shuffler,
+ int width) {
+ asm volatile(
+ "ld1 {v2.16b}, [%3] \n" // shuffler
+ "1: \n"
+ "ld1 {v0.16b}, [%0], #16 \n" // load 4 pixels.
+ "subs %w2, %w2, #4 \n" // 4 processed per loop
+ "tbl v1.16b, {v0.16b}, v2.16b \n" // look up 4 pixels
+ "st1 {v1.16b}, [%1], #16 \n" // store 4.
+ "b.gt 1b \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_argb), // %1
+ "+r"(width) // %2
+ : "r"(shuffler) // %3
+ : "cc", "memory", "v0", "v1", "v2" // Clobber List
+ );
+}
+
+void I422ToYUY2Row_NEON(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_yuy2,
+ int width) {
+ asm volatile(
+ "1: \n"
+ "ld2 {v0.8b, v1.8b}, [%0], #16 \n" // load 16 Ys
+ "orr v2.8b, v1.8b, v1.8b \n"
+ "ld1 {v1.8b}, [%1], #8 \n" // load 8 Us
+ "ld1 {v3.8b}, [%2], #8 \n" // load 8 Vs
+ "subs %w4, %w4, #16 \n" // 16 pixels
+ "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%3], #32 \n" // Store 16 pixels.
+ "b.gt 1b \n"
+ : "+r"(src_y), // %0
+ "+r"(src_u), // %1
+ "+r"(src_v), // %2
+ "+r"(dst_yuy2), // %3
+ "+r"(width) // %4
+ :
+ : "cc", "memory", "v0", "v1", "v2", "v3");
+}
+
+void I422ToUYVYRow_NEON(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_uyvy,
+ int width) {
+ asm volatile(
+ "1: \n"
+ "ld2 {v1.8b,v2.8b}, [%0], #16 \n" // load 16 Ys
+ "orr v3.8b, v2.8b, v2.8b \n"
+ "ld1 {v0.8b}, [%1], #8 \n" // load 8 Us
+ "ld1 {v2.8b}, [%2], #8 \n" // load 8 Vs
+ "subs %w4, %w4, #16 \n" // 16 pixels
+ "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%3], #32 \n" // Store 16 pixels.
+ "b.gt 1b \n"
+ : "+r"(src_y), // %0
+ "+r"(src_u), // %1
+ "+r"(src_v), // %2
+ "+r"(dst_uyvy), // %3
+ "+r"(width) // %4
+ :
+ : "cc", "memory", "v0", "v1", "v2", "v3");
+}
+
+void ARGBToRGB565Row_NEON(const uint8_t* src_argb,
+ uint8_t* dst_rgb565,
+ int width) {
+ asm volatile(
+ "1: \n"
+ "ld4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%0], #32 \n" // load 8 pixels
+ "subs %w2, %w2, #8 \n" // 8 processed per loop.
+ ARGBTORGB565
+ "st1 {v0.16b}, [%1], #16 \n" // store 8 pixels RGB565.
+ "b.gt 1b \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_rgb565), // %1
+ "+r"(width) // %2
+ :
+ : "cc", "memory", "v0", "v20", "v21", "v22", "v23");
+}
+
+void ARGBToRGB565DitherRow_NEON(const uint8_t* src_argb,
+ uint8_t* dst_rgb,
+ const uint32_t dither4,
+ int width) {
+ asm volatile(
+ "dup v1.4s, %w2 \n" // dither4
+ "1: \n"
+ "ld4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], #32 \n" // load 8 pixels
+ "subs %w3, %w3, #8 \n" // 8 processed per loop.
+ "uqadd v20.8b, v20.8b, v1.8b \n"
+ "uqadd v21.8b, v21.8b, v1.8b \n"
+ "uqadd v22.8b, v22.8b, v1.8b \n" ARGBTORGB565
+ "st1 {v0.16b}, [%0], #16 \n" // store 8 pixels RGB565.
+ "b.gt 1b \n"
+ : "+r"(dst_rgb) // %0
+ : "r"(src_argb), // %1
+ "r"(dither4), // %2
+ "r"(width) // %3
+ : "cc", "memory", "v0", "v1", "v20", "v21", "v22", "v23");
+}
+
+void ARGBToARGB1555Row_NEON(const uint8_t* src_argb,
+ uint8_t* dst_argb1555,
+ int width) {
+ asm volatile(
+ "1: \n"
+ "ld4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%0], #32 \n" // load 8 pixels
+ "subs %w2, %w2, #8 \n" // 8 processed per loop.
+ ARGBTOARGB1555
+ "st1 {v0.16b}, [%1], #16 \n" // store 8 pixels
+ // ARGB1555.
+ "b.gt 1b \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_argb1555), // %1
+ "+r"(width) // %2
+ :
+ : "cc", "memory", "v0", "v20", "v21", "v22", "v23");
+}
+
+void ARGBToARGB4444Row_NEON(const uint8_t* src_argb,
+ uint8_t* dst_argb4444,
+ int width) {
+ asm volatile(
+ "movi v4.16b, #0x0f \n" // bits to clear with
+ // vbic.
+ "1: \n"
+ "ld4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%0], #32 \n" // load 8 pixels
+ "subs %w2, %w2, #8 \n" // 8 processed per loop.
+ ARGBTOARGB4444
+ "st1 {v0.16b}, [%1], #16 \n" // store 8 pixels
+ // ARGB4444.
+ "b.gt 1b \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_argb4444), // %1
+ "+r"(width) // %2
+ :
+ : "cc", "memory", "v0", "v1", "v4", "v20", "v21", "v22", "v23");
+}
+
+void ARGBToYRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width) {
+ asm volatile(
+ "movi v4.8b, #13 \n" // B * 0.1016 coefficient
+ "movi v5.8b, #65 \n" // G * 0.5078 coefficient
+ "movi v6.8b, #33 \n" // R * 0.2578 coefficient
+ "movi v7.8b, #16 \n" // Add 16 constant
+ "1: \n"
+ "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 ARGB
+ "subs %w2, %w2, #8 \n" // 8 processed per loop.
+ "umull v3.8h, v0.8b, v4.8b \n" // B
+ "umlal v3.8h, v1.8b, v5.8b \n" // G
+ "umlal v3.8h, v2.8b, v6.8b \n" // R
+ "sqrshrun v0.8b, v3.8h, #7 \n" // 16 bit to 8 bit Y
+ "uqadd v0.8b, v0.8b, v7.8b \n"
+ "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y.
+ "b.gt 1b \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_y), // %1
+ "+r"(width) // %2
+ :
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7");
+}
+
+void ARGBExtractAlphaRow_NEON(const uint8_t* src_argb,
+ uint8_t* dst_a,
+ int width) {
+ asm volatile(
+ "1: \n"
+ "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load row 16
+ // pixels
+ "subs %w2, %w2, #16 \n" // 16 processed per loop
+ "st1 {v3.16b}, [%1], #16 \n" // store 16 A's.
+ "b.gt 1b \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_a), // %1
+ "+r"(width) // %2
+ :
+ : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List
+ );
+}
+
+void ARGBToYJRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width) {
+ asm volatile(
+ "movi v4.8b, #15 \n" // B * 0.11400 coefficient
+ "movi v5.8b, #75 \n" // G * 0.58700 coefficient
+ "movi v6.8b, #38 \n" // R * 0.29900 coefficient
+ "1: \n"
+ "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 ARGB
+ "subs %w2, %w2, #8 \n" // 8 processed per loop.
+ "umull v3.8h, v0.8b, v4.8b \n" // B
+ "umlal v3.8h, v1.8b, v5.8b \n" // G
+ "umlal v3.8h, v2.8b, v6.8b \n" // R
+ "sqrshrun v0.8b, v3.8h, #7 \n" // 15 bit to 8 bit Y
+ "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y.
+ "b.gt 1b \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_y), // %1
+ "+r"(width) // %2
+ :
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6");
+}
+
+// 8x1 pixels.
+void ARGBToUV444Row_NEON(const uint8_t* src_argb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ asm volatile(
+ "movi v24.8b, #112 \n" // UB / VR 0.875
+ // coefficient
+ "movi v25.8b, #74 \n" // UG -0.5781 coefficient
+ "movi v26.8b, #38 \n" // UR -0.2969 coefficient
+ "movi v27.8b, #18 \n" // VB -0.1406 coefficient
+ "movi v28.8b, #94 \n" // VG -0.7344 coefficient
+ "movi v29.16b,#0x80 \n" // 128.5
+ "1: \n"
+ "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 ARGB
+ // pixels.
+ "subs %w3, %w3, #8 \n" // 8 processed per loop.
+ "umull v4.8h, v0.8b, v24.8b \n" // B
+ "umlsl v4.8h, v1.8b, v25.8b \n" // G
+ "umlsl v4.8h, v2.8b, v26.8b \n" // R
+ "add v4.8h, v4.8h, v29.8h \n" // +128 -> unsigned
+
+ "umull v3.8h, v2.8b, v24.8b \n" // R
+ "umlsl v3.8h, v1.8b, v28.8b \n" // G
+ "umlsl v3.8h, v0.8b, v27.8b \n" // B
+ "add v3.8h, v3.8h, v29.8h \n" // +128 -> unsigned
+
+ "uqshrn v0.8b, v4.8h, #8 \n" // 16 bit to 8 bit U
+ "uqshrn v1.8b, v3.8h, #8 \n" // 16 bit to 8 bit V
+
+ "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels U.
+ "st1 {v1.8b}, [%2], #8 \n" // store 8 pixels V.
+ "b.gt 1b \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_u), // %1
+ "+r"(dst_v), // %2
+ "+r"(width) // %3
+ :
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v24", "v25", "v26",
+ "v27", "v28", "v29");
+}
+
+#define RGBTOUV_SETUP_REG \
+ "movi v20.8h, #56, lsl #0 \n" /* UB/VR coefficient (0.875) / 2 */ \
+ "movi v21.8h, #37, lsl #0 \n" /* UG coefficient (-0.5781) / 2 */ \
+ "movi v22.8h, #19, lsl #0 \n" /* UR coefficient (-0.2969) / 2 */ \
+ "movi v23.8h, #9, lsl #0 \n" /* VB coefficient (-0.1406) / 2 */ \
+ "movi v24.8h, #47, lsl #0 \n" /* VG coefficient (-0.7344) / 2 */ \
+ "movi v25.16b, #0x80 \n" /* 128.5 (0x8080 in 16-bit) */
+
+// 16x2 pixels -> 8x1. width is number of argb pixels. e.g. 16.
+// clang-format off
+#define RGBTOUV(QB, QG, QR) \
+ "mul v3.8h, " #QB ",v20.8h \n" /* B */ \
+ "mul v4.8h, " #QR ",v20.8h \n" /* R */ \
+ "mls v3.8h, " #QG ",v21.8h \n" /* G */ \
+ "mls v4.8h, " #QG ",v24.8h \n" /* G */ \
+ "mls v3.8h, " #QR ",v22.8h \n" /* R */ \
+ "mls v4.8h, " #QB ",v23.8h \n" /* B */ \
+ "add v3.8h, v3.8h, v25.8h \n" /* +128 -> unsigned */ \
+ "add v4.8h, v4.8h, v25.8h \n" /* +128 -> unsigned */ \
+ "uqshrn v0.8b, v3.8h, #8 \n" /* 16 bit to 8 bit U */ \
+ "uqshrn v1.8b, v4.8h, #8 \n" /* 16 bit to 8 bit V */
+// clang-format on
+
+// TODO(fbarchard): Consider vhadd vertical, then vpaddl horizontal, avoid shr.
+// TODO(fbarchard): consider ptrdiff_t for all strides.
+
+void ARGBToUVRow_NEON(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ const uint8_t* src_argb_1 = src_argb + src_stride_argb;
+ asm volatile (
+ RGBTOUV_SETUP_REG
+ "1: \n"
+ "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 pixels.
+ "uaddlp v0.8h, v0.16b \n" // B 16 bytes -> 8 shorts.
+ "uaddlp v1.8h, v1.16b \n" // G 16 bytes -> 8 shorts.
+ "uaddlp v2.8h, v2.16b \n" // R 16 bytes -> 8 shorts.
+
+ "ld4 {v4.16b,v5.16b,v6.16b,v7.16b}, [%1], #64 \n" // load next 16
+ "uadalp v0.8h, v4.16b \n" // B 16 bytes -> 8 shorts.
+ "uadalp v1.8h, v5.16b \n" // G 16 bytes -> 8 shorts.
+ "uadalp v2.8h, v6.16b \n" // R 16 bytes -> 8 shorts.
+
+ "urshr v0.8h, v0.8h, #1 \n" // 2x average
+ "urshr v1.8h, v1.8h, #1 \n"
+ "urshr v2.8h, v2.8h, #1 \n"
+
+ "subs %w4, %w4, #16 \n" // 32 processed per loop.
+ RGBTOUV(v0.8h, v1.8h, v2.8h)
+ "st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U.
+ "st1 {v1.8b}, [%3], #8 \n" // store 8 pixels V.
+ "b.gt 1b \n"
+ : "+r"(src_argb), // %0
+ "+r"(src_argb_1), // %1
+ "+r"(dst_u), // %2
+ "+r"(dst_v), // %3
+ "+r"(width) // %4
+ :
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
+ "v20", "v21", "v22", "v23", "v24", "v25"
+ );
+}
+
+// TODO(fbarchard): Subsample match C code.
+void ARGBToUVJRow_NEON(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ const uint8_t* src_argb_1 = src_argb + src_stride_argb;
+ asm volatile (
+ "movi v20.8h, #63, lsl #0 \n" // UB/VR coeff (0.500) / 2
+ "movi v21.8h, #42, lsl #0 \n" // UG coeff (-0.33126) / 2
+ "movi v22.8h, #21, lsl #0 \n" // UR coeff (-0.16874) / 2
+ "movi v23.8h, #10, lsl #0 \n" // VB coeff (-0.08131) / 2
+ "movi v24.8h, #53, lsl #0 \n" // VG coeff (-0.41869) / 2
+ "movi v25.16b, #0x80 \n" // 128.5 (0x8080 in 16-bit)
+ "1: \n"
+ "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 pixels.
+ "uaddlp v0.8h, v0.16b \n" // B 16 bytes -> 8 shorts.
+ "uaddlp v1.8h, v1.16b \n" // G 16 bytes -> 8 shorts.
+ "uaddlp v2.8h, v2.16b \n" // R 16 bytes -> 8 shorts.
+ "ld4 {v4.16b,v5.16b,v6.16b,v7.16b}, [%1], #64 \n" // load next 16
+ "uadalp v0.8h, v4.16b \n" // B 16 bytes -> 8 shorts.
+ "uadalp v1.8h, v5.16b \n" // G 16 bytes -> 8 shorts.
+ "uadalp v2.8h, v6.16b \n" // R 16 bytes -> 8 shorts.
+
+ "urshr v0.8h, v0.8h, #1 \n" // 2x average
+ "urshr v1.8h, v1.8h, #1 \n"
+ "urshr v2.8h, v2.8h, #1 \n"
+
+ "subs %w4, %w4, #16 \n" // 32 processed per loop.
+ RGBTOUV(v0.8h, v1.8h, v2.8h)
+ "st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U.
+ "st1 {v1.8b}, [%3], #8 \n" // store 8 pixels V.
+ "b.gt 1b \n"
+ : "+r"(src_argb), // %0
+ "+r"(src_argb_1), // %1
+ "+r"(dst_u), // %2
+ "+r"(dst_v), // %3
+ "+r"(width) // %4
+ :
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
+ "v20", "v21", "v22", "v23", "v24", "v25"
+ );
+}
+
+void BGRAToUVRow_NEON(const uint8_t* src_bgra,
+ int src_stride_bgra,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ const uint8_t* src_bgra_1 = src_bgra + src_stride_bgra;
+ asm volatile (
+ RGBTOUV_SETUP_REG
+ "1: \n"
+ "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 pixels.
+ "uaddlp v0.8h, v3.16b \n" // B 16 bytes -> 8 shorts.
+ "uaddlp v3.8h, v2.16b \n" // G 16 bytes -> 8 shorts.
+ "uaddlp v2.8h, v1.16b \n" // R 16 bytes -> 8 shorts.
+ "ld4 {v4.16b,v5.16b,v6.16b,v7.16b}, [%1], #64 \n" // load 16 more
+ "uadalp v0.8h, v7.16b \n" // B 16 bytes -> 8 shorts.
+ "uadalp v3.8h, v6.16b \n" // G 16 bytes -> 8 shorts.
+ "uadalp v2.8h, v5.16b \n" // R 16 bytes -> 8 shorts.
+
+ "urshr v0.8h, v0.8h, #1 \n" // 2x average
+ "urshr v1.8h, v3.8h, #1 \n"
+ "urshr v2.8h, v2.8h, #1 \n"
+
+ "subs %w4, %w4, #16 \n" // 32 processed per loop.
+ RGBTOUV(v0.8h, v1.8h, v2.8h)
+ "st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U.
+ "st1 {v1.8b}, [%3], #8 \n" // store 8 pixels V.
+ "b.gt 1b \n"
+ : "+r"(src_bgra), // %0
+ "+r"(src_bgra_1), // %1
+ "+r"(dst_u), // %2
+ "+r"(dst_v), // %3
+ "+r"(width) // %4
+ :
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
+ "v20", "v21", "v22", "v23", "v24", "v25"
+ );
+}
+
+void ABGRToUVRow_NEON(const uint8_t* src_abgr,
+ int src_stride_abgr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ const uint8_t* src_abgr_1 = src_abgr + src_stride_abgr;
+ asm volatile (
+ RGBTOUV_SETUP_REG
+ "1: \n"
+ "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 pixels.
+ "uaddlp v3.8h, v2.16b \n" // B 16 bytes -> 8 shorts.
+ "uaddlp v2.8h, v1.16b \n" // G 16 bytes -> 8 shorts.
+ "uaddlp v1.8h, v0.16b \n" // R 16 bytes -> 8 shorts.
+ "ld4 {v4.16b,v5.16b,v6.16b,v7.16b}, [%1], #64 \n" // load 16 more.
+ "uadalp v3.8h, v6.16b \n" // B 16 bytes -> 8 shorts.
+ "uadalp v2.8h, v5.16b \n" // G 16 bytes -> 8 shorts.
+ "uadalp v1.8h, v4.16b \n" // R 16 bytes -> 8 shorts.
+
+ "urshr v0.8h, v3.8h, #1 \n" // 2x average
+ "urshr v2.8h, v2.8h, #1 \n"
+ "urshr v1.8h, v1.8h, #1 \n"
+
+ "subs %w4, %w4, #16 \n" // 32 processed per loop.
+ RGBTOUV(v0.8h, v2.8h, v1.8h)
+ "st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U.
+ "st1 {v1.8b}, [%3], #8 \n" // store 8 pixels V.
+ "b.gt 1b \n"
+ : "+r"(src_abgr), // %0
+ "+r"(src_abgr_1), // %1
+ "+r"(dst_u), // %2
+ "+r"(dst_v), // %3
+ "+r"(width) // %4
+ :
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
+ "v20", "v21", "v22", "v23", "v24", "v25"
+ );
+}
+
+void RGBAToUVRow_NEON(const uint8_t* src_rgba,
+ int src_stride_rgba,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ const uint8_t* src_rgba_1 = src_rgba + src_stride_rgba;
+ asm volatile (
+ RGBTOUV_SETUP_REG
+ "1: \n"
+ "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 pixels.
+ "uaddlp v0.8h, v1.16b \n" // B 16 bytes -> 8 shorts.
+ "uaddlp v1.8h, v2.16b \n" // G 16 bytes -> 8 shorts.
+ "uaddlp v2.8h, v3.16b \n" // R 16 bytes -> 8 shorts.
+ "ld4 {v4.16b,v5.16b,v6.16b,v7.16b}, [%1], #64 \n" // load 16 more.
+ "uadalp v0.8h, v5.16b \n" // B 16 bytes -> 8 shorts.
+ "uadalp v1.8h, v6.16b \n" // G 16 bytes -> 8 shorts.
+ "uadalp v2.8h, v7.16b \n" // R 16 bytes -> 8 shorts.
+
+ "urshr v0.8h, v0.8h, #1 \n" // 2x average
+ "urshr v1.8h, v1.8h, #1 \n"
+ "urshr v2.8h, v2.8h, #1 \n"
+
+ "subs %w4, %w4, #16 \n" // 32 processed per loop.
+ RGBTOUV(v0.8h, v1.8h, v2.8h)
+ "st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U.
+ "st1 {v1.8b}, [%3], #8 \n" // store 8 pixels V.
+ "b.gt 1b \n"
+ : "+r"(src_rgba), // %0
+ "+r"(src_rgba_1), // %1
+ "+r"(dst_u), // %2
+ "+r"(dst_v), // %3
+ "+r"(width) // %4
+ :
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
+ "v20", "v21", "v22", "v23", "v24", "v25"
+ );
+}
+
+void RGB24ToUVRow_NEON(const uint8_t* src_rgb24,
+ int src_stride_rgb24,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ const uint8_t* src_rgb24_1 = src_rgb24 + src_stride_rgb24;
+ asm volatile (
+ RGBTOUV_SETUP_REG
+ "1: \n"
+ "ld3 {v0.16b,v1.16b,v2.16b}, [%0], #48 \n" // load 16 pixels.
+ "uaddlp v0.8h, v0.16b \n" // B 16 bytes -> 8 shorts.
+ "uaddlp v1.8h, v1.16b \n" // G 16 bytes -> 8 shorts.
+ "uaddlp v2.8h, v2.16b \n" // R 16 bytes -> 8 shorts.
+ "ld3 {v4.16b,v5.16b,v6.16b}, [%1], #48 \n" // load 16 more.
+ "uadalp v0.8h, v4.16b \n" // B 16 bytes -> 8 shorts.
+ "uadalp v1.8h, v5.16b \n" // G 16 bytes -> 8 shorts.
+ "uadalp v2.8h, v6.16b \n" // R 16 bytes -> 8 shorts.
+
+ "urshr v0.8h, v0.8h, #1 \n" // 2x average
+ "urshr v1.8h, v1.8h, #1 \n"
+ "urshr v2.8h, v2.8h, #1 \n"
+
+ "subs %w4, %w4, #16 \n" // 32 processed per loop.
+ RGBTOUV(v0.8h, v1.8h, v2.8h)
+ "st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U.
+ "st1 {v1.8b}, [%3], #8 \n" // store 8 pixels V.
+ "b.gt 1b \n"
+ : "+r"(src_rgb24), // %0
+ "+r"(src_rgb24_1), // %1
+ "+r"(dst_u), // %2
+ "+r"(dst_v), // %3
+ "+r"(width) // %4
+ :
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
+ "v20", "v21", "v22", "v23", "v24", "v25"
+ );
+}
+
+void RAWToUVRow_NEON(const uint8_t* src_raw,
+ int src_stride_raw,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ const uint8_t* src_raw_1 = src_raw + src_stride_raw;
+ asm volatile (
+ RGBTOUV_SETUP_REG
+ "1: \n"
+ "ld3 {v0.16b,v1.16b,v2.16b}, [%0], #48 \n" // load 8 RAW pixels.
+ "uaddlp v2.8h, v2.16b \n" // B 16 bytes -> 8 shorts.
+ "uaddlp v1.8h, v1.16b \n" // G 16 bytes -> 8 shorts.
+ "uaddlp v0.8h, v0.16b \n" // R 16 bytes -> 8 shorts.
+ "ld3 {v4.16b,v5.16b,v6.16b}, [%1], #48 \n" // load 8 more RAW pixels
+ "uadalp v2.8h, v6.16b \n" // B 16 bytes -> 8 shorts.
+ "uadalp v1.8h, v5.16b \n" // G 16 bytes -> 8 shorts.
+ "uadalp v0.8h, v4.16b \n" // R 16 bytes -> 8 shorts.
+
+ "urshr v2.8h, v2.8h, #1 \n" // 2x average
+ "urshr v1.8h, v1.8h, #1 \n"
+ "urshr v0.8h, v0.8h, #1 \n"
+
+ "subs %w4, %w4, #16 \n" // 32 processed per loop.
+ RGBTOUV(v2.8h, v1.8h, v0.8h)
+ "st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U.
+ "st1 {v1.8b}, [%3], #8 \n" // store 8 pixels V.
+ "b.gt 1b \n"
+ : "+r"(src_raw), // %0
+ "+r"(src_raw_1), // %1
+ "+r"(dst_u), // %2
+ "+r"(dst_v), // %3
+ "+r"(width) // %4
+ :
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
+ "v20", "v21", "v22", "v23", "v24", "v25"
+ );
+}
+
+// 16x2 pixels -> 8x1. width is number of argb pixels. e.g. 16.
+void RGB565ToUVRow_NEON(const uint8_t* src_rgb565,
+ int src_stride_rgb565,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ const uint8_t* src_rgb565_1 = src_rgb565 + src_stride_rgb565;
+ asm volatile(
+ "movi v22.8h, #56, lsl #0 \n" // UB / VR coeff (0.875) /
+ // 2
+ "movi v23.8h, #37, lsl #0 \n" // UG coeff (-0.5781) / 2
+ "movi v24.8h, #19, lsl #0 \n" // UR coeff (-0.2969) / 2
+ "movi v25.8h, #9 , lsl #0 \n" // VB coeff (-0.1406) / 2
+ "movi v26.8h, #47, lsl #0 \n" // VG coeff (-0.7344) / 2
+ "movi v27.16b, #0x80 \n" // 128.5 0x8080 in 16bit
+ "1: \n"
+ "ld1 {v0.16b}, [%0], #16 \n" // load 8 RGB565 pixels.
+ RGB565TOARGB
+ "uaddlp v16.4h, v0.8b \n" // B 8 bytes -> 4 shorts.
+ "uaddlp v18.4h, v1.8b \n" // G 8 bytes -> 4 shorts.
+ "uaddlp v20.4h, v2.8b \n" // R 8 bytes -> 4 shorts.
+ "ld1 {v0.16b}, [%0], #16 \n" // next 8 RGB565 pixels.
+ RGB565TOARGB
+ "uaddlp v17.4h, v0.8b \n" // B 8 bytes -> 4 shorts.
+ "uaddlp v19.4h, v1.8b \n" // G 8 bytes -> 4 shorts.
+ "uaddlp v21.4h, v2.8b \n" // R 8 bytes -> 4 shorts.
+
+ "ld1 {v0.16b}, [%1], #16 \n" // load 8 RGB565 pixels.
+ RGB565TOARGB
+ "uadalp v16.4h, v0.8b \n" // B 8 bytes -> 4 shorts.
+ "uadalp v18.4h, v1.8b \n" // G 8 bytes -> 4 shorts.
+ "uadalp v20.4h, v2.8b \n" // R 8 bytes -> 4 shorts.
+ "ld1 {v0.16b}, [%1], #16 \n" // next 8 RGB565 pixels.
+ RGB565TOARGB
+ "uadalp v17.4h, v0.8b \n" // B 8 bytes -> 4 shorts.
+ "uadalp v19.4h, v1.8b \n" // G 8 bytes -> 4 shorts.
+ "uadalp v21.4h, v2.8b \n" // R 8 bytes -> 4 shorts.
+
+ "ins v16.D[1], v17.D[0] \n"
+ "ins v18.D[1], v19.D[0] \n"
+ "ins v20.D[1], v21.D[0] \n"
+
+ "urshr v4.8h, v16.8h, #1 \n" // 2x average
+ "urshr v5.8h, v18.8h, #1 \n"
+ "urshr v6.8h, v20.8h, #1 \n"
+
+ "subs %w4, %w4, #16 \n" // 16 processed per loop.
+ "mul v16.8h, v4.8h, v22.8h \n" // B
+ "mls v16.8h, v5.8h, v23.8h \n" // G
+ "mls v16.8h, v6.8h, v24.8h \n" // R
+ "add v16.8h, v16.8h, v27.8h \n" // +128 -> unsigned
+ "mul v17.8h, v6.8h, v22.8h \n" // R
+ "mls v17.8h, v5.8h, v26.8h \n" // G
+ "mls v17.8h, v4.8h, v25.8h \n" // B
+ "add v17.8h, v17.8h, v27.8h \n" // +128 -> unsigned
+ "uqshrn v0.8b, v16.8h, #8 \n" // 16 bit to 8 bit U
+ "uqshrn v1.8b, v17.8h, #8 \n" // 16 bit to 8 bit V
+ "st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U.
+ "st1 {v1.8b}, [%3], #8 \n" // store 8 pixels V.
+ "b.gt 1b \n"
+ : "+r"(src_rgb565), // %0
+ "+r"(src_rgb565_1), // %1
+ "+r"(dst_u), // %2
+ "+r"(dst_v), // %3
+ "+r"(width) // %4
+ :
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16",
+ "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
+ "v27");
+}
+
+// 16x2 pixels -> 8x1. width is number of argb pixels. e.g. 16.
+void ARGB1555ToUVRow_NEON(const uint8_t* src_argb1555,
+ int src_stride_argb1555,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ const uint8_t* src_argb1555_1 = src_argb1555 + src_stride_argb1555;
+ asm volatile(
+ RGBTOUV_SETUP_REG
+ "1: \n"
+ "ld1 {v0.16b}, [%0], #16 \n" // load 8 ARGB1555 pixels.
+ RGB555TOARGB
+ "uaddlp v16.4h, v0.8b \n" // B 8 bytes -> 4 shorts.
+ "uaddlp v17.4h, v1.8b \n" // G 8 bytes -> 4 shorts.
+ "uaddlp v18.4h, v2.8b \n" // R 8 bytes -> 4 shorts.
+ "ld1 {v0.16b}, [%0], #16 \n" // next 8 ARGB1555 pixels.
+ RGB555TOARGB
+ "uaddlp v26.4h, v0.8b \n" // B 8 bytes -> 4 shorts.
+ "uaddlp v27.4h, v1.8b \n" // G 8 bytes -> 4 shorts.
+ "uaddlp v28.4h, v2.8b \n" // R 8 bytes -> 4 shorts.
+
+ "ld1 {v0.16b}, [%1], #16 \n" // load 8 ARGB1555 pixels.
+ RGB555TOARGB
+ "uadalp v16.4h, v0.8b \n" // B 8 bytes -> 4 shorts.
+ "uadalp v17.4h, v1.8b \n" // G 8 bytes -> 4 shorts.
+ "uadalp v18.4h, v2.8b \n" // R 8 bytes -> 4 shorts.
+ "ld1 {v0.16b}, [%1], #16 \n" // next 8 ARGB1555 pixels.
+ RGB555TOARGB
+ "uadalp v26.4h, v0.8b \n" // B 8 bytes -> 4 shorts.
+ "uadalp v27.4h, v1.8b \n" // G 8 bytes -> 4 shorts.
+ "uadalp v28.4h, v2.8b \n" // R 8 bytes -> 4 shorts.
+
+ "ins v16.D[1], v26.D[0] \n"
+ "ins v17.D[1], v27.D[0] \n"
+ "ins v18.D[1], v28.D[0] \n"
+
+ "urshr v4.8h, v16.8h, #1 \n" // 2x average
+ "urshr v5.8h, v17.8h, #1 \n"
+ "urshr v6.8h, v18.8h, #1 \n"
+
+ "subs %w4, %w4, #16 \n" // 16 processed per loop.
+ "mul v2.8h, v4.8h, v20.8h \n" // B
+ "mls v2.8h, v5.8h, v21.8h \n" // G
+ "mls v2.8h, v6.8h, v22.8h \n" // R
+ "add v2.8h, v2.8h, v25.8h \n" // +128 -> unsigned
+ "mul v3.8h, v6.8h, v20.8h \n" // R
+ "mls v3.8h, v5.8h, v24.8h \n" // G
+ "mls v3.8h, v4.8h, v23.8h \n" // B
+ "add v3.8h, v3.8h, v25.8h \n" // +128 -> unsigned
+ "uqshrn v0.8b, v2.8h, #8 \n" // 16 bit to 8 bit U
+ "uqshrn v1.8b, v3.8h, #8 \n" // 16 bit to 8 bit V
+ "st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U.
+ "st1 {v1.8b}, [%3], #8 \n" // store 8 pixels V.
+ "b.gt 1b \n"
+ : "+r"(src_argb1555), // %0
+ "+r"(src_argb1555_1), // %1
+ "+r"(dst_u), // %2
+ "+r"(dst_v), // %3
+ "+r"(width) // %4
+ :
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27",
+ "v28");
+}
+
+// 16x2 pixels -> 8x1. width is number of argb pixels. e.g. 16.
+void ARGB4444ToUVRow_NEON(const uint8_t* src_argb4444,
+ int src_stride_argb4444,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ const uint8_t* src_argb4444_1 = src_argb4444 + src_stride_argb4444;
+ asm volatile(
+ RGBTOUV_SETUP_REG
+ "1: \n"
+ "ld1 {v0.16b}, [%0], #16 \n" // load 8 ARGB4444 pixels.
+ ARGB4444TOARGB
+ "uaddlp v16.4h, v0.8b \n" // B 8 bytes -> 4 shorts.
+ "uaddlp v17.4h, v1.8b \n" // G 8 bytes -> 4 shorts.
+ "uaddlp v18.4h, v2.8b \n" // R 8 bytes -> 4 shorts.
+ "ld1 {v0.16b}, [%0], #16 \n" // next 8 ARGB4444 pixels.
+ ARGB4444TOARGB
+ "uaddlp v26.4h, v0.8b \n" // B 8 bytes -> 4 shorts.
+ "uaddlp v27.4h, v1.8b \n" // G 8 bytes -> 4 shorts.
+ "uaddlp v28.4h, v2.8b \n" // R 8 bytes -> 4 shorts.
+
+ "ld1 {v0.16b}, [%1], #16 \n" // load 8 ARGB4444 pixels.
+ ARGB4444TOARGB
+ "uadalp v16.4h, v0.8b \n" // B 8 bytes -> 4 shorts.
+ "uadalp v17.4h, v1.8b \n" // G 8 bytes -> 4 shorts.
+ "uadalp v18.4h, v2.8b \n" // R 8 bytes -> 4 shorts.
+ "ld1 {v0.16b}, [%1], #16 \n" // next 8 ARGB4444 pixels.
+ ARGB4444TOARGB
+ "uadalp v26.4h, v0.8b \n" // B 8 bytes -> 4 shorts.
+ "uadalp v27.4h, v1.8b \n" // G 8 bytes -> 4 shorts.
+ "uadalp v28.4h, v2.8b \n" // R 8 bytes -> 4 shorts.
+
+ "ins v16.D[1], v26.D[0] \n"
+ "ins v17.D[1], v27.D[0] \n"
+ "ins v18.D[1], v28.D[0] \n"
+
+ "urshr v4.8h, v16.8h, #1 \n" // 2x average
+ "urshr v5.8h, v17.8h, #1 \n"
+ "urshr v6.8h, v18.8h, #1 \n"
+
+ "subs %w4, %w4, #16 \n" // 16 processed per loop.
+ "mul v2.8h, v4.8h, v20.8h \n" // B
+ "mls v2.8h, v5.8h, v21.8h \n" // G
+ "mls v2.8h, v6.8h, v22.8h \n" // R
+ "add v2.8h, v2.8h, v25.8h \n" // +128 -> unsigned
+ "mul v3.8h, v6.8h, v20.8h \n" // R
+ "mls v3.8h, v5.8h, v24.8h \n" // G
+ "mls v3.8h, v4.8h, v23.8h \n" // B
+ "add v3.8h, v3.8h, v25.8h \n" // +128 -> unsigned
+ "uqshrn v0.8b, v2.8h, #8 \n" // 16 bit to 8 bit U
+ "uqshrn v1.8b, v3.8h, #8 \n" // 16 bit to 8 bit V
+ "st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U.
+ "st1 {v1.8b}, [%3], #8 \n" // store 8 pixels V.
+ "b.gt 1b \n"
+ : "+r"(src_argb4444), // %0
+ "+r"(src_argb4444_1), // %1
+ "+r"(dst_u), // %2
+ "+r"(dst_v), // %3
+ "+r"(width) // %4
+ :
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27",
+ "v28"
+
+ );
+}
+
+void RGB565ToYRow_NEON(const uint8_t* src_rgb565, uint8_t* dst_y, int width) {
+ asm volatile(
+ "movi v24.8b, #13 \n" // B * 0.1016 coefficient
+ "movi v25.8b, #65 \n" // G * 0.5078 coefficient
+ "movi v26.8b, #33 \n" // R * 0.2578 coefficient
+ "movi v27.8b, #16 \n" // Add 16 constant
+ "1: \n"
+ "ld1 {v0.16b}, [%0], #16 \n" // load 8 RGB565 pixels.
+ "subs %w2, %w2, #8 \n" // 8 processed per loop.
+ RGB565TOARGB
+ "umull v3.8h, v0.8b, v24.8b \n" // B
+ "umlal v3.8h, v1.8b, v25.8b \n" // G
+ "umlal v3.8h, v2.8b, v26.8b \n" // R
+ "sqrshrun v0.8b, v3.8h, #7 \n" // 16 bit to 8 bit Y
+ "uqadd v0.8b, v0.8b, v27.8b \n"
+ "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y.
+ "b.gt 1b \n"
+ : "+r"(src_rgb565), // %0
+ "+r"(dst_y), // %1
+ "+r"(width) // %2
+ :
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v6", "v24", "v25", "v26",
+ "v27");
+}
+
+void ARGB1555ToYRow_NEON(const uint8_t* src_argb1555,
+ uint8_t* dst_y,
+ int width) {
+ asm volatile(
+ "movi v4.8b, #13 \n" // B * 0.1016 coefficient
+ "movi v5.8b, #65 \n" // G * 0.5078 coefficient
+ "movi v6.8b, #33 \n" // R * 0.2578 coefficient
+ "movi v7.8b, #16 \n" // Add 16 constant
+ "1: \n"
+ "ld1 {v0.16b}, [%0], #16 \n" // load 8 ARGB1555 pixels.
+ "subs %w2, %w2, #8 \n" // 8 processed per loop.
+ ARGB1555TOARGB
+ "umull v3.8h, v0.8b, v4.8b \n" // B
+ "umlal v3.8h, v1.8b, v5.8b \n" // G
+ "umlal v3.8h, v2.8b, v6.8b \n" // R
+ "sqrshrun v0.8b, v3.8h, #7 \n" // 16 bit to 8 bit Y
+ "uqadd v0.8b, v0.8b, v7.8b \n"
+ "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y.
+ "b.gt 1b \n"
+ : "+r"(src_argb1555), // %0
+ "+r"(dst_y), // %1
+ "+r"(width) // %2
+ :
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7");
+}
+
+void ARGB4444ToYRow_NEON(const uint8_t* src_argb4444,
+ uint8_t* dst_y,
+ int width) {
+ asm volatile(
+ "movi v24.8b, #13 \n" // B * 0.1016 coefficient
+ "movi v25.8b, #65 \n" // G * 0.5078 coefficient
+ "movi v26.8b, #33 \n" // R * 0.2578 coefficient
+ "movi v27.8b, #16 \n" // Add 16 constant
+ "1: \n"
+ "ld1 {v0.16b}, [%0], #16 \n" // load 8 ARGB4444 pixels.
+ "subs %w2, %w2, #8 \n" // 8 processed per loop.
+ ARGB4444TOARGB
+ "umull v3.8h, v0.8b, v24.8b \n" // B
+ "umlal v3.8h, v1.8b, v25.8b \n" // G
+ "umlal v3.8h, v2.8b, v26.8b \n" // R
+ "sqrshrun v0.8b, v3.8h, #7 \n" // 16 bit to 8 bit Y
+ "uqadd v0.8b, v0.8b, v27.8b \n"
+ "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y.
+ "b.gt 1b \n"
+ : "+r"(src_argb4444), // %0
+ "+r"(dst_y), // %1
+ "+r"(width) // %2
+ :
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v24", "v25", "v26", "v27");
+}
+
+void BGRAToYRow_NEON(const uint8_t* src_bgra, uint8_t* dst_y, int width) {
+ asm volatile(
+ "movi v4.8b, #33 \n" // R * 0.2578 coefficient
+ "movi v5.8b, #65 \n" // G * 0.5078 coefficient
+ "movi v6.8b, #13 \n" // B * 0.1016 coefficient
+ "movi v7.8b, #16 \n" // Add 16 constant
+ "1: \n"
+ "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 pixels.
+ "subs %w2, %w2, #8 \n" // 8 processed per loop.
+ "umull v16.8h, v1.8b, v4.8b \n" // R
+ "umlal v16.8h, v2.8b, v5.8b \n" // G
+ "umlal v16.8h, v3.8b, v6.8b \n" // B
+ "sqrshrun v0.8b, v16.8h, #7 \n" // 16 bit to 8 bit Y
+ "uqadd v0.8b, v0.8b, v7.8b \n"
+ "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y.
+ "b.gt 1b \n"
+ : "+r"(src_bgra), // %0
+ "+r"(dst_y), // %1
+ "+r"(width) // %2
+ :
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16");
+}
+
+void ABGRToYRow_NEON(const uint8_t* src_abgr, uint8_t* dst_y, int width) {
+ asm volatile(
+ "movi v4.8b, #33 \n" // R * 0.2578 coefficient
+ "movi v5.8b, #65 \n" // G * 0.5078 coefficient
+ "movi v6.8b, #13 \n" // B * 0.1016 coefficient
+ "movi v7.8b, #16 \n" // Add 16 constant
+ "1: \n"
+ "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 pixels.
+ "subs %w2, %w2, #8 \n" // 8 processed per loop.
+ "umull v16.8h, v0.8b, v4.8b \n" // R
+ "umlal v16.8h, v1.8b, v5.8b \n" // G
+ "umlal v16.8h, v2.8b, v6.8b \n" // B
+ "sqrshrun v0.8b, v16.8h, #7 \n" // 16 bit to 8 bit Y
+ "uqadd v0.8b, v0.8b, v7.8b \n"
+ "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y.
+ "b.gt 1b \n"
+ : "+r"(src_abgr), // %0
+ "+r"(dst_y), // %1
+ "+r"(width) // %2
+ :
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16");
+}
+
+void RGBAToYRow_NEON(const uint8_t* src_rgba, uint8_t* dst_y, int width) {
+ asm volatile(
+ "movi v4.8b, #13 \n" // B * 0.1016 coefficient
+ "movi v5.8b, #65 \n" // G * 0.5078 coefficient
+ "movi v6.8b, #33 \n" // R * 0.2578 coefficient
+ "movi v7.8b, #16 \n" // Add 16 constant
+ "1: \n"
+ "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 pixels.
+ "subs %w2, %w2, #8 \n" // 8 processed per loop.
+ "umull v16.8h, v1.8b, v4.8b \n" // B
+ "umlal v16.8h, v2.8b, v5.8b \n" // G
+ "umlal v16.8h, v3.8b, v6.8b \n" // R
+ "sqrshrun v0.8b, v16.8h, #7 \n" // 16 bit to 8 bit Y
+ "uqadd v0.8b, v0.8b, v7.8b \n"
+ "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y.
+ "b.gt 1b \n"
+ : "+r"(src_rgba), // %0
+ "+r"(dst_y), // %1
+ "+r"(width) // %2
+ :
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16");
+}
+
+void RGB24ToYRow_NEON(const uint8_t* src_rgb24, uint8_t* dst_y, int width) {
+ asm volatile(
+ "movi v4.8b, #13 \n" // B * 0.1016 coefficient
+ "movi v5.8b, #65 \n" // G * 0.5078 coefficient
+ "movi v6.8b, #33 \n" // R * 0.2578 coefficient
+ "movi v7.8b, #16 \n" // Add 16 constant
+ "1: \n"
+ "ld3 {v0.8b,v1.8b,v2.8b}, [%0], #24 \n" // load 8 pixels.
+ "subs %w2, %w2, #8 \n" // 8 processed per loop.
+ "umull v16.8h, v0.8b, v4.8b \n" // B
+ "umlal v16.8h, v1.8b, v5.8b \n" // G
+ "umlal v16.8h, v2.8b, v6.8b \n" // R
+ "sqrshrun v0.8b, v16.8h, #7 \n" // 16 bit to 8 bit Y
+ "uqadd v0.8b, v0.8b, v7.8b \n"
+ "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y.
+ "b.gt 1b \n"
+ : "+r"(src_rgb24), // %0
+ "+r"(dst_y), // %1
+ "+r"(width) // %2
+ :
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16");
+}
+
+void RAWToYRow_NEON(const uint8_t* src_raw, uint8_t* dst_y, int width) {
+ asm volatile(
+ "movi v4.8b, #33 \n" // R * 0.2578 coefficient
+ "movi v5.8b, #65 \n" // G * 0.5078 coefficient
+ "movi v6.8b, #13 \n" // B * 0.1016 coefficient
+ "movi v7.8b, #16 \n" // Add 16 constant
+ "1: \n"
+ "ld3 {v0.8b,v1.8b,v2.8b}, [%0], #24 \n" // load 8 pixels.
+ "subs %w2, %w2, #8 \n" // 8 processed per loop.
+ "umull v16.8h, v0.8b, v4.8b \n" // B
+ "umlal v16.8h, v1.8b, v5.8b \n" // G
+ "umlal v16.8h, v2.8b, v6.8b \n" // R
+ "sqrshrun v0.8b, v16.8h, #7 \n" // 16 bit to 8 bit Y
+ "uqadd v0.8b, v0.8b, v7.8b \n"
+ "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y.
+ "b.gt 1b \n"
+ : "+r"(src_raw), // %0
+ "+r"(dst_y), // %1
+ "+r"(width) // %2
+ :
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16");
+}
+
+// Bilinear filter 16x2 -> 16x1
+void InterpolateRow_NEON(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ int dst_width,
+ int source_y_fraction) {
+ int y1_fraction = source_y_fraction;
+ int y0_fraction = 256 - y1_fraction;
+ const uint8_t* src_ptr1 = src_ptr + src_stride;
+ asm volatile(
+ "cmp %w4, #0 \n"
+ "b.eq 100f \n"
+ "cmp %w4, #128 \n"
+ "b.eq 50f \n"
+
+ "dup v5.16b, %w4 \n"
+ "dup v4.16b, %w5 \n"
+ // General purpose row blend.
+ "1: \n"
+ "ld1 {v0.16b}, [%1], #16 \n"
+ "ld1 {v1.16b}, [%2], #16 \n"
+ "subs %w3, %w3, #16 \n"
+ "umull v2.8h, v0.8b, v4.8b \n"
+ "umull2 v3.8h, v0.16b, v4.16b \n"
+ "umlal v2.8h, v1.8b, v5.8b \n"
+ "umlal2 v3.8h, v1.16b, v5.16b \n"
+ "rshrn v0.8b, v2.8h, #8 \n"
+ "rshrn2 v0.16b, v3.8h, #8 \n"
+ "st1 {v0.16b}, [%0], #16 \n"
+ "b.gt 1b \n"
+ "b 99f \n"
+
+ // Blend 50 / 50.
+ "50: \n"
+ "ld1 {v0.16b}, [%1], #16 \n"
+ "ld1 {v1.16b}, [%2], #16 \n"
+ "subs %w3, %w3, #16 \n"
+ "urhadd v0.16b, v0.16b, v1.16b \n"
+ "st1 {v0.16b}, [%0], #16 \n"
+ "b.gt 50b \n"
+ "b 99f \n"
+
+ // Blend 100 / 0 - Copy row unchanged.
+ "100: \n"
+ "ld1 {v0.16b}, [%1], #16 \n"
+ "subs %w3, %w3, #16 \n"
+ "st1 {v0.16b}, [%0], #16 \n"
+ "b.gt 100b \n"
+
+ "99: \n"
+ : "+r"(dst_ptr), // %0
+ "+r"(src_ptr), // %1
+ "+r"(src_ptr1), // %2
+ "+r"(dst_width), // %3
+ "+r"(y1_fraction), // %4
+ "+r"(y0_fraction) // %5
+ :
+ : "cc", "memory", "v0", "v1", "v3", "v4", "v5");
+}
+
+// dr * (256 - sa) / 256 + sr = dr - dr * sa / 256 + sr
+void ARGBBlendRow_NEON(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
+ int width) {
+ asm volatile(
+ "subs %w3, %w3, #8 \n"
+ "b.lt 89f \n"
+ // Blend 8 pixels.
+ "8: \n"
+ "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 ARGB0
+ // pixels
+ "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%1], #32 \n" // load 8 ARGB1
+ // pixels
+ "subs %w3, %w3, #8 \n" // 8 processed per loop.
+ "umull v16.8h, v4.8b, v3.8b \n" // db * a
+ "umull v17.8h, v5.8b, v3.8b \n" // dg * a
+ "umull v18.8h, v6.8b, v3.8b \n" // dr * a
+ "uqrshrn v16.8b, v16.8h, #8 \n" // db >>= 8
+ "uqrshrn v17.8b, v17.8h, #8 \n" // dg >>= 8
+ "uqrshrn v18.8b, v18.8h, #8 \n" // dr >>= 8
+ "uqsub v4.8b, v4.8b, v16.8b \n" // db - (db * a / 256)
+ "uqsub v5.8b, v5.8b, v17.8b \n" // dg - (dg * a / 256)
+ "uqsub v6.8b, v6.8b, v18.8b \n" // dr - (dr * a / 256)
+ "uqadd v0.8b, v0.8b, v4.8b \n" // + sb
+ "uqadd v1.8b, v1.8b, v5.8b \n" // + sg
+ "uqadd v2.8b, v2.8b, v6.8b \n" // + sr
+ "movi v3.8b, #255 \n" // a = 255
+ "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%2], #32 \n" // store 8 ARGB
+ // pixels
+ "b.ge 8b \n"
+
+ "89: \n"
+ "adds %w3, %w3, #8-1 \n"
+ "b.lt 99f \n"
+
+ // Blend 1 pixels.
+ "1: \n"
+ "ld4 {v0.b,v1.b,v2.b,v3.b}[0], [%0], #4 \n" // load 1 pixel ARGB0.
+ "ld4 {v4.b,v5.b,v6.b,v7.b}[0], [%1], #4 \n" // load 1 pixel ARGB1.
+ "subs %w3, %w3, #1 \n" // 1 processed per loop.
+ "umull v16.8h, v4.8b, v3.8b \n" // db * a
+ "umull v17.8h, v5.8b, v3.8b \n" // dg * a
+ "umull v18.8h, v6.8b, v3.8b \n" // dr * a
+ "uqrshrn v16.8b, v16.8h, #8 \n" // db >>= 8
+ "uqrshrn v17.8b, v17.8h, #8 \n" // dg >>= 8
+ "uqrshrn v18.8b, v18.8h, #8 \n" // dr >>= 8
+ "uqsub v4.8b, v4.8b, v16.8b \n" // db - (db * a / 256)
+ "uqsub v5.8b, v5.8b, v17.8b \n" // dg - (dg * a / 256)
+ "uqsub v6.8b, v6.8b, v18.8b \n" // dr - (dr * a / 256)
+ "uqadd v0.8b, v0.8b, v4.8b \n" // + sb
+ "uqadd v1.8b, v1.8b, v5.8b \n" // + sg
+ "uqadd v2.8b, v2.8b, v6.8b \n" // + sr
+ "movi v3.8b, #255 \n" // a = 255
+ "st4 {v0.b,v1.b,v2.b,v3.b}[0], [%2], #4 \n" // store 1 pixel.
+ "b.ge 1b \n"
+
+ "99: \n"
+
+ : "+r"(src_argb0), // %0
+ "+r"(src_argb1), // %1
+ "+r"(dst_argb), // %2
+ "+r"(width) // %3
+ :
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16",
+ "v17", "v18");
+}
+
+// Attenuate 8 pixels at a time.
+void ARGBAttenuateRow_NEON(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ int width) {
+ asm volatile(
+ // Attenuate 8 pixels.
+ "1: \n"
+ "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 ARGB
+ "subs %w2, %w2, #8 \n" // 8 processed per loop.
+ "umull v4.8h, v0.8b, v3.8b \n" // b * a
+ "umull v5.8h, v1.8b, v3.8b \n" // g * a
+ "umull v6.8h, v2.8b, v3.8b \n" // r * a
+ "uqrshrn v0.8b, v4.8h, #8 \n" // b >>= 8
+ "uqrshrn v1.8b, v5.8h, #8 \n" // g >>= 8
+ "uqrshrn v2.8b, v6.8h, #8 \n" // r >>= 8
+ "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%1], #32 \n" // store 8 ARGB
+ // pixels
+ "b.gt 1b \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_argb), // %1
+ "+r"(width) // %2
+ :
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6");
+}
+
+// Quantize 8 ARGB pixels (32 bytes).
+// dst = (dst * scale >> 16) * interval_size + interval_offset;
+void ARGBQuantizeRow_NEON(uint8_t* dst_argb,
+ int scale,
+ int interval_size,
+ int interval_offset,
+ int width) {
+ asm volatile(
+ "dup v4.8h, %w2 \n"
+ "ushr v4.8h, v4.8h, #1 \n" // scale >>= 1
+ "dup v5.8h, %w3 \n" // interval multiply.
+ "dup v6.8h, %w4 \n" // interval add
+
+ // 8 pixel loop.
+ "1: \n"
+ "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0] \n" // load 8 ARGB.
+ "subs %w1, %w1, #8 \n" // 8 processed per loop.
+ "uxtl v0.8h, v0.8b \n" // b (0 .. 255)
+ "uxtl v1.8h, v1.8b \n"
+ "uxtl v2.8h, v2.8b \n"
+ "sqdmulh v0.8h, v0.8h, v4.8h \n" // b * scale
+ "sqdmulh v1.8h, v1.8h, v4.8h \n" // g
+ "sqdmulh v2.8h, v2.8h, v4.8h \n" // r
+ "mul v0.8h, v0.8h, v5.8h \n" // b * interval_size
+ "mul v1.8h, v1.8h, v5.8h \n" // g
+ "mul v2.8h, v2.8h, v5.8h \n" // r
+ "add v0.8h, v0.8h, v6.8h \n" // b + interval_offset
+ "add v1.8h, v1.8h, v6.8h \n" // g
+ "add v2.8h, v2.8h, v6.8h \n" // r
+ "uqxtn v0.8b, v0.8h \n"
+ "uqxtn v1.8b, v1.8h \n"
+ "uqxtn v2.8b, v2.8h \n"
+ "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // store 8 ARGB
+ "b.gt 1b \n"
+ : "+r"(dst_argb), // %0
+ "+r"(width) // %1
+ : "r"(scale), // %2
+ "r"(interval_size), // %3
+ "r"(interval_offset) // %4
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6");
+}
+
+// Shade 8 pixels at a time by specified value.
+// NOTE vqrdmulh.s16 q10, q10, d0[0] must use a scaler register from 0 to 8.
+// Rounding in vqrdmulh does +1 to high if high bit of low s16 is set.
+void ARGBShadeRow_NEON(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ int width,
+ uint32_t value) {
+ asm volatile(
+ "dup v0.4s, %w3 \n" // duplicate scale value.
+ "zip1 v0.8b, v0.8b, v0.8b \n" // v0.8b aarrggbb.
+ "ushr v0.8h, v0.8h, #1 \n" // scale / 2.
+
+ // 8 pixel loop.
+ "1: \n"
+ "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%0], #32 \n" // load 8 ARGB
+ "subs %w2, %w2, #8 \n" // 8 processed per loop.
+ "uxtl v4.8h, v4.8b \n" // b (0 .. 255)
+ "uxtl v5.8h, v5.8b \n"
+ "uxtl v6.8h, v6.8b \n"
+ "uxtl v7.8h, v7.8b \n"
+ "sqrdmulh v4.8h, v4.8h, v0.h[0] \n" // b * scale * 2
+ "sqrdmulh v5.8h, v5.8h, v0.h[1] \n" // g
+ "sqrdmulh v6.8h, v6.8h, v0.h[2] \n" // r
+ "sqrdmulh v7.8h, v7.8h, v0.h[3] \n" // a
+ "uqxtn v4.8b, v4.8h \n"
+ "uqxtn v5.8b, v5.8h \n"
+ "uqxtn v6.8b, v6.8h \n"
+ "uqxtn v7.8b, v7.8h \n"
+ "st4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%1], #32 \n" // store 8 ARGB
+ "b.gt 1b \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_argb), // %1
+ "+r"(width) // %2
+ : "r"(value) // %3
+ : "cc", "memory", "v0", "v4", "v5", "v6", "v7");
+}
+
+// Convert 8 ARGB pixels (64 bytes) to 8 Gray ARGB pixels
+// Similar to ARGBToYJ but stores ARGB.
+// C code is (15 * b + 75 * g + 38 * r + 64) >> 7;
+void ARGBGrayRow_NEON(const uint8_t* src_argb, uint8_t* dst_argb, int width) {
+ asm volatile(
+ "movi v24.8b, #15 \n" // B * 0.11400 coefficient
+ "movi v25.8b, #75 \n" // G * 0.58700 coefficient
+ "movi v26.8b, #38 \n" // R * 0.29900 coefficient
+ "1: \n"
+ "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 ARGB
+ "subs %w2, %w2, #8 \n" // 8 processed per loop.
+ "umull v4.8h, v0.8b, v24.8b \n" // B
+ "umlal v4.8h, v1.8b, v25.8b \n" // G
+ "umlal v4.8h, v2.8b, v26.8b \n" // R
+ "sqrshrun v0.8b, v4.8h, #7 \n" // 15 bit to 8 bit B
+ "orr v1.8b, v0.8b, v0.8b \n" // G
+ "orr v2.8b, v0.8b, v0.8b \n" // R
+ "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%1], #32 \n" // store 8 pixels.
+ "b.gt 1b \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_argb), // %1
+ "+r"(width) // %2
+ :
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v24", "v25", "v26");
+}
+
+// Convert 8 ARGB pixels (32 bytes) to 8 Sepia ARGB pixels.
+// b = (r * 35 + g * 68 + b * 17) >> 7
+// g = (r * 45 + g * 88 + b * 22) >> 7
+// r = (r * 50 + g * 98 + b * 24) >> 7
+
+void ARGBSepiaRow_NEON(uint8_t* dst_argb, int width) {
+ asm volatile(
+ "movi v20.8b, #17 \n" // BB coefficient
+ "movi v21.8b, #68 \n" // BG coefficient
+ "movi v22.8b, #35 \n" // BR coefficient
+ "movi v24.8b, #22 \n" // GB coefficient
+ "movi v25.8b, #88 \n" // GG coefficient
+ "movi v26.8b, #45 \n" // GR coefficient
+ "movi v28.8b, #24 \n" // BB coefficient
+ "movi v29.8b, #98 \n" // BG coefficient
+ "movi v30.8b, #50 \n" // BR coefficient
+ "1: \n"
+ "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0] \n" // load 8 ARGB pixels.
+ "subs %w1, %w1, #8 \n" // 8 processed per loop.
+ "umull v4.8h, v0.8b, v20.8b \n" // B to Sepia B
+ "umlal v4.8h, v1.8b, v21.8b \n" // G
+ "umlal v4.8h, v2.8b, v22.8b \n" // R
+ "umull v5.8h, v0.8b, v24.8b \n" // B to Sepia G
+ "umlal v5.8h, v1.8b, v25.8b \n" // G
+ "umlal v5.8h, v2.8b, v26.8b \n" // R
+ "umull v6.8h, v0.8b, v28.8b \n" // B to Sepia R
+ "umlal v6.8h, v1.8b, v29.8b \n" // G
+ "umlal v6.8h, v2.8b, v30.8b \n" // R
+ "uqshrn v0.8b, v4.8h, #7 \n" // 16 bit to 8 bit B
+ "uqshrn v1.8b, v5.8h, #7 \n" // 16 bit to 8 bit G
+ "uqshrn v2.8b, v6.8h, #7 \n" // 16 bit to 8 bit R
+ "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // store 8 pixels.
+ "b.gt 1b \n"
+ : "+r"(dst_argb), // %0
+ "+r"(width) // %1
+ :
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
+ "v21", "v22", "v24", "v25", "v26", "v28", "v29", "v30");
+}
+
+// Tranform 8 ARGB pixels (32 bytes) with color matrix.
+// TODO(fbarchard): Was same as Sepia except matrix is provided. This function
+// needs to saturate. Consider doing a non-saturating version.
+void ARGBColorMatrixRow_NEON(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ const int8_t* matrix_argb,
+ int width) {
+ asm volatile(
+ "ld1 {v2.16b}, [%3] \n" // load 3 ARGB vectors.
+ "sxtl v0.8h, v2.8b \n" // B,G coefficients s16.
+ "sxtl2 v1.8h, v2.16b \n" // R,A coefficients s16.
+
+ "1: \n"
+ "ld4 {v16.8b,v17.8b,v18.8b,v19.8b}, [%0], #32 \n" // load 8 ARGB
+ "subs %w2, %w2, #8 \n" // 8 processed per loop.
+ "uxtl v16.8h, v16.8b \n" // b (0 .. 255) 16 bit
+ "uxtl v17.8h, v17.8b \n" // g
+ "uxtl v18.8h, v18.8b \n" // r
+ "uxtl v19.8h, v19.8b \n" // a
+ "mul v22.8h, v16.8h, v0.h[0] \n" // B = B * Matrix B
+ "mul v23.8h, v16.8h, v0.h[4] \n" // G = B * Matrix G
+ "mul v24.8h, v16.8h, v1.h[0] \n" // R = B * Matrix R
+ "mul v25.8h, v16.8h, v1.h[4] \n" // A = B * Matrix A
+ "mul v4.8h, v17.8h, v0.h[1] \n" // B += G * Matrix B
+ "mul v5.8h, v17.8h, v0.h[5] \n" // G += G * Matrix G
+ "mul v6.8h, v17.8h, v1.h[1] \n" // R += G * Matrix R
+ "mul v7.8h, v17.8h, v1.h[5] \n" // A += G * Matrix A
+ "sqadd v22.8h, v22.8h, v4.8h \n" // Accumulate B
+ "sqadd v23.8h, v23.8h, v5.8h \n" // Accumulate G
+ "sqadd v24.8h, v24.8h, v6.8h \n" // Accumulate R
+ "sqadd v25.8h, v25.8h, v7.8h \n" // Accumulate A
+ "mul v4.8h, v18.8h, v0.h[2] \n" // B += R * Matrix B
+ "mul v5.8h, v18.8h, v0.h[6] \n" // G += R * Matrix G
+ "mul v6.8h, v18.8h, v1.h[2] \n" // R += R * Matrix R
+ "mul v7.8h, v18.8h, v1.h[6] \n" // A += R * Matrix A
+ "sqadd v22.8h, v22.8h, v4.8h \n" // Accumulate B
+ "sqadd v23.8h, v23.8h, v5.8h \n" // Accumulate G
+ "sqadd v24.8h, v24.8h, v6.8h \n" // Accumulate R
+ "sqadd v25.8h, v25.8h, v7.8h \n" // Accumulate A
+ "mul v4.8h, v19.8h, v0.h[3] \n" // B += A * Matrix B
+ "mul v5.8h, v19.8h, v0.h[7] \n" // G += A * Matrix G
+ "mul v6.8h, v19.8h, v1.h[3] \n" // R += A * Matrix R
+ "mul v7.8h, v19.8h, v1.h[7] \n" // A += A * Matrix A
+ "sqadd v22.8h, v22.8h, v4.8h \n" // Accumulate B
+ "sqadd v23.8h, v23.8h, v5.8h \n" // Accumulate G
+ "sqadd v24.8h, v24.8h, v6.8h \n" // Accumulate R
+ "sqadd v25.8h, v25.8h, v7.8h \n" // Accumulate A
+ "sqshrun v16.8b, v22.8h, #6 \n" // 16 bit to 8 bit B
+ "sqshrun v17.8b, v23.8h, #6 \n" // 16 bit to 8 bit G
+ "sqshrun v18.8b, v24.8h, #6 \n" // 16 bit to 8 bit R
+ "sqshrun v19.8b, v25.8h, #6 \n" // 16 bit to 8 bit A
+ "st4 {v16.8b,v17.8b,v18.8b,v19.8b}, [%1], #32 \n" // store 8 ARGB
+ "b.gt 1b \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_argb), // %1
+ "+r"(width) // %2
+ : "r"(matrix_argb) // %3
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16",
+ "v17", "v18", "v19", "v22", "v23", "v24", "v25");
+}
+
+// TODO(fbarchard): fix vqshrun in ARGBMultiplyRow_NEON and reenable.
+// Multiply 2 rows of ARGB pixels together, 8 pixels at a time.
+void ARGBMultiplyRow_NEON(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
+ int width) {
+ asm volatile(
+ // 8 pixel loop.
+ "1: \n"
+ "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 ARGB
+ "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%1], #32 \n" // load 8 more
+ "subs %w3, %w3, #8 \n" // 8 processed per loop.
+ "umull v0.8h, v0.8b, v4.8b \n" // multiply B
+ "umull v1.8h, v1.8b, v5.8b \n" // multiply G
+ "umull v2.8h, v2.8b, v6.8b \n" // multiply R
+ "umull v3.8h, v3.8b, v7.8b \n" // multiply A
+ "rshrn v0.8b, v0.8h, #8 \n" // 16 bit to 8 bit B
+ "rshrn v1.8b, v1.8h, #8 \n" // 16 bit to 8 bit G
+ "rshrn v2.8b, v2.8h, #8 \n" // 16 bit to 8 bit R
+ "rshrn v3.8b, v3.8h, #8 \n" // 16 bit to 8 bit A
+ "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%2], #32 \n" // store 8 ARGB
+ "b.gt 1b \n"
+ : "+r"(src_argb0), // %0
+ "+r"(src_argb1), // %1
+ "+r"(dst_argb), // %2
+ "+r"(width) // %3
+ :
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7");
+}
+
+// Add 2 rows of ARGB pixels together, 8 pixels at a time.
+void ARGBAddRow_NEON(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
+ int width) {
+ asm volatile(
+ // 8 pixel loop.
+ "1: \n"
+ "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 ARGB
+ "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%1], #32 \n" // load 8 more
+ "subs %w3, %w3, #8 \n" // 8 processed per loop.
+ "uqadd v0.8b, v0.8b, v4.8b \n"
+ "uqadd v1.8b, v1.8b, v5.8b \n"
+ "uqadd v2.8b, v2.8b, v6.8b \n"
+ "uqadd v3.8b, v3.8b, v7.8b \n"
+ "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%2], #32 \n" // store 8 ARGB
+ "b.gt 1b \n"
+ : "+r"(src_argb0), // %0
+ "+r"(src_argb1), // %1
+ "+r"(dst_argb), // %2
+ "+r"(width) // %3
+ :
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7");
+}
+
+// Subtract 2 rows of ARGB pixels, 8 pixels at a time.
+void ARGBSubtractRow_NEON(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
+ int width) {
+ asm volatile(
+ // 8 pixel loop.
+ "1: \n"
+ "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 ARGB
+ "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%1], #32 \n" // load 8 more
+ "subs %w3, %w3, #8 \n" // 8 processed per loop.
+ "uqsub v0.8b, v0.8b, v4.8b \n"
+ "uqsub v1.8b, v1.8b, v5.8b \n"
+ "uqsub v2.8b, v2.8b, v6.8b \n"
+ "uqsub v3.8b, v3.8b, v7.8b \n"
+ "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%2], #32 \n" // store 8 ARGB
+ "b.gt 1b \n"
+ : "+r"(src_argb0), // %0
+ "+r"(src_argb1), // %1
+ "+r"(dst_argb), // %2
+ "+r"(width) // %3
+ :
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7");
+}
+
+// Adds Sobel X and Sobel Y and stores Sobel into ARGB.
+// A = 255
+// R = Sobel
+// G = Sobel
+// B = Sobel
+void SobelRow_NEON(const uint8_t* src_sobelx,
+ const uint8_t* src_sobely,
+ uint8_t* dst_argb,
+ int width) {
+ asm volatile(
+ "movi v3.8b, #255 \n" // alpha
+ // 8 pixel loop.
+ "1: \n"
+ "ld1 {v0.8b}, [%0], #8 \n" // load 8 sobelx.
+ "ld1 {v1.8b}, [%1], #8 \n" // load 8 sobely.
+ "subs %w3, %w3, #8 \n" // 8 processed per loop.
+ "uqadd v0.8b, v0.8b, v1.8b \n" // add
+ "orr v1.8b, v0.8b, v0.8b \n"
+ "orr v2.8b, v0.8b, v0.8b \n"
+ "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%2], #32 \n" // store 8 ARGB
+ "b.gt 1b \n"
+ : "+r"(src_sobelx), // %0
+ "+r"(src_sobely), // %1
+ "+r"(dst_argb), // %2
+ "+r"(width) // %3
+ :
+ : "cc", "memory", "v0", "v1", "v2", "v3");
+}
+
+// Adds Sobel X and Sobel Y and stores Sobel into plane.
+void SobelToPlaneRow_NEON(const uint8_t* src_sobelx,
+ const uint8_t* src_sobely,
+ uint8_t* dst_y,
+ int width) {
+ asm volatile(
+ // 16 pixel loop.
+ "1: \n"
+ "ld1 {v0.16b}, [%0], #16 \n" // load 16 sobelx.
+ "ld1 {v1.16b}, [%1], #16 \n" // load 16 sobely.
+ "subs %w3, %w3, #16 \n" // 16 processed per loop.
+ "uqadd v0.16b, v0.16b, v1.16b \n" // add
+ "st1 {v0.16b}, [%2], #16 \n" // store 16 pixels.
+ "b.gt 1b \n"
+ : "+r"(src_sobelx), // %0
+ "+r"(src_sobely), // %1
+ "+r"(dst_y), // %2
+ "+r"(width) // %3
+ :
+ : "cc", "memory", "v0", "v1");
+}
+
+// Mixes Sobel X, Sobel Y and Sobel into ARGB.
+// A = 255
+// R = Sobel X
+// G = Sobel
+// B = Sobel Y
+void SobelXYRow_NEON(const uint8_t* src_sobelx,
+ const uint8_t* src_sobely,
+ uint8_t* dst_argb,
+ int width) {
+ asm volatile(
+ "movi v3.8b, #255 \n" // alpha
+ // 8 pixel loop.
+ "1: \n"
+ "ld1 {v2.8b}, [%0], #8 \n" // load 8 sobelx.
+ "ld1 {v0.8b}, [%1], #8 \n" // load 8 sobely.
+ "subs %w3, %w3, #8 \n" // 8 processed per loop.
+ "uqadd v1.8b, v0.8b, v2.8b \n" // add
+ "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%2], #32 \n" // store 8 ARGB
+ "b.gt 1b \n"
+ : "+r"(src_sobelx), // %0
+ "+r"(src_sobely), // %1
+ "+r"(dst_argb), // %2
+ "+r"(width) // %3
+ :
+ : "cc", "memory", "v0", "v1", "v2", "v3");
+}
+
+// SobelX as a matrix is
+// -1 0 1
+// -2 0 2
+// -1 0 1
+void SobelXRow_NEON(const uint8_t* src_y0,
+ const uint8_t* src_y1,
+ const uint8_t* src_y2,
+ uint8_t* dst_sobelx,
+ int width) {
+ asm volatile(
+ "1: \n"
+ "ld1 {v0.8b}, [%0],%5 \n" // top
+ "ld1 {v1.8b}, [%0],%6 \n"
+ "usubl v0.8h, v0.8b, v1.8b \n"
+ "ld1 {v2.8b}, [%1],%5 \n" // center * 2
+ "ld1 {v3.8b}, [%1],%6 \n"
+ "usubl v1.8h, v2.8b, v3.8b \n"
+ "add v0.8h, v0.8h, v1.8h \n"
+ "add v0.8h, v0.8h, v1.8h \n"
+ "ld1 {v2.8b}, [%2],%5 \n" // bottom
+ "ld1 {v3.8b}, [%2],%6 \n"
+ "subs %w4, %w4, #8 \n" // 8 pixels
+ "usubl v1.8h, v2.8b, v3.8b \n"
+ "add v0.8h, v0.8h, v1.8h \n"
+ "abs v0.8h, v0.8h \n"
+ "uqxtn v0.8b, v0.8h \n"
+ "st1 {v0.8b}, [%3], #8 \n" // store 8 sobelx
+ "b.gt 1b \n"
+ : "+r"(src_y0), // %0
+ "+r"(src_y1), // %1
+ "+r"(src_y2), // %2
+ "+r"(dst_sobelx), // %3
+ "+r"(width) // %4
+ : "r"(2LL), // %5
+ "r"(6LL) // %6
+ : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List
+ );
+}
+
+// SobelY as a matrix is
+// -1 -2 -1
+// 0 0 0
+// 1 2 1
+void SobelYRow_NEON(const uint8_t* src_y0,
+ const uint8_t* src_y1,
+ uint8_t* dst_sobely,
+ int width) {
+ asm volatile(
+ "1: \n"
+ "ld1 {v0.8b}, [%0],%4 \n" // left
+ "ld1 {v1.8b}, [%1],%4 \n"
+ "usubl v0.8h, v0.8b, v1.8b \n"
+ "ld1 {v2.8b}, [%0],%4 \n" // center * 2
+ "ld1 {v3.8b}, [%1],%4 \n"
+ "usubl v1.8h, v2.8b, v3.8b \n"
+ "add v0.8h, v0.8h, v1.8h \n"
+ "add v0.8h, v0.8h, v1.8h \n"
+ "ld1 {v2.8b}, [%0],%5 \n" // right
+ "ld1 {v3.8b}, [%1],%5 \n"
+ "subs %w3, %w3, #8 \n" // 8 pixels
+ "usubl v1.8h, v2.8b, v3.8b \n"
+ "add v0.8h, v0.8h, v1.8h \n"
+ "abs v0.8h, v0.8h \n"
+ "uqxtn v0.8b, v0.8h \n"
+ "st1 {v0.8b}, [%2], #8 \n" // store 8 sobely
+ "b.gt 1b \n"
+ : "+r"(src_y0), // %0
+ "+r"(src_y1), // %1
+ "+r"(dst_sobely), // %2
+ "+r"(width) // %3
+ : "r"(1LL), // %4
+ "r"(6LL) // %5
+ : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List
+ );
+}
+
+// Caveat - rounds float to half float whereas scaling version truncates.
+void HalfFloat1Row_NEON(const uint16_t* src,
+ uint16_t* dst,
+ float /*unused*/,
+ int width) {
+ asm volatile(
+ "1: \n"
+ "ld1 {v1.16b}, [%0], #16 \n" // load 8 shorts
+ "subs %w2, %w2, #8 \n" // 8 pixels per loop
+ "uxtl v2.4s, v1.4h \n" // 8 int's
+ "uxtl2 v3.4s, v1.8h \n"
+ "scvtf v2.4s, v2.4s \n" // 8 floats
+ "scvtf v3.4s, v3.4s \n"
+ "fcvtn v1.4h, v2.4s \n" // 8 half floats
+ "fcvtn2 v1.8h, v3.4s \n"
+ "st1 {v1.16b}, [%1], #16 \n" // store 8 shorts
+ "b.gt 1b \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width) // %2
+ :
+ : "cc", "memory", "v1", "v2", "v3");
+}
+
+void HalfFloatRow_NEON(const uint16_t* src,
+ uint16_t* dst,
+ float scale,
+ int width) {
+ asm volatile(
+ "1: \n"
+ "ld1 {v1.16b}, [%0], #16 \n" // load 8 shorts
+ "subs %w2, %w2, #8 \n" // 8 pixels per loop
+ "uxtl v2.4s, v1.4h \n" // 8 int's
+ "uxtl2 v3.4s, v1.8h \n"
+ "scvtf v2.4s, v2.4s \n" // 8 floats
+ "scvtf v3.4s, v3.4s \n"
+ "fmul v2.4s, v2.4s, %3.s[0] \n" // adjust exponent
+ "fmul v3.4s, v3.4s, %3.s[0] \n"
+ "uqshrn v1.4h, v2.4s, #13 \n" // isolate halffloat
+ "uqshrn2 v1.8h, v3.4s, #13 \n"
+ "st1 {v1.16b}, [%1], #16 \n" // store 8 shorts
+ "b.gt 1b \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width) // %2
+ : "w"(scale * 1.9259299444e-34f) // %3
+ : "cc", "memory", "v1", "v2", "v3");
+}
+
+void ByteToFloatRow_NEON(const uint8_t* src,
+ float* dst,
+ float scale,
+ int width) {
+ asm volatile(
+ "1: \n"
+ "ld1 {v1.8b}, [%0], #8 \n" // load 8 bytes
+ "subs %w2, %w2, #8 \n" // 8 pixels per loop
+ "uxtl v1.8h, v1.8b \n" // 8 shorts
+ "uxtl v2.4s, v1.4h \n" // 8 ints
+ "uxtl2 v3.4s, v1.8h \n"
+ "scvtf v2.4s, v2.4s \n" // 8 floats
+ "scvtf v3.4s, v3.4s \n"
+ "fmul v2.4s, v2.4s, %3.s[0] \n" // scale
+ "fmul v3.4s, v3.4s, %3.s[0] \n"
+ "st1 {v2.16b, v3.16b}, [%1], #32 \n" // store 8 floats
+ "b.gt 1b \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width) // %2
+ : "w"(scale) // %3
+ : "cc", "memory", "v1", "v2", "v3");
+}
+
+float ScaleMaxSamples_NEON(const float* src,
+ float* dst,
+ float scale,
+ int width) {
+ float fmax;
+ asm volatile(
+ "movi v5.4s, #0 \n" // max
+ "movi v6.4s, #0 \n"
+
+ "1: \n"
+ "ld1 {v1.4s, v2.4s}, [%0], #32 \n" // load 8 samples
+ "subs %w2, %w2, #8 \n" // 8 processed per loop
+ "fmul v3.4s, v1.4s, %4.s[0] \n" // scale
+ "fmul v4.4s, v2.4s, %4.s[0] \n" // scale
+ "fmax v5.4s, v5.4s, v1.4s \n" // max
+ "fmax v6.4s, v6.4s, v2.4s \n"
+ "st1 {v3.4s, v4.4s}, [%1], #32 \n" // store 8 samples
+ "b.gt 1b \n"
+ "fmax v5.4s, v5.4s, v6.4s \n" // max
+ "fmaxv %s3, v5.4s \n" // signed max acculator
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width), // %2
+ "=w"(fmax) // %3
+ : "w"(scale) // %4
+ : "cc", "memory", "v1", "v2", "v3", "v4", "v5", "v6");
+ return fmax;
+}
+
+float ScaleSumSamples_NEON(const float* src,
+ float* dst,
+ float scale,
+ int width) {
+ float fsum;
+ asm volatile(
+ "movi v5.4s, #0 \n" // max
+ "movi v6.4s, #0 \n" // max
+
+ "1: \n"
+ "ld1 {v1.4s, v2.4s}, [%0], #32 \n" // load 8 samples
+ "subs %w2, %w2, #8 \n" // 8 processed per loop
+ "fmul v3.4s, v1.4s, %4.s[0] \n" // scale
+ "fmul v4.4s, v2.4s, %4.s[0] \n"
+ "fmla v5.4s, v1.4s, v1.4s \n" // sum of squares
+ "fmla v6.4s, v2.4s, v2.4s \n"
+ "st1 {v3.4s, v4.4s}, [%1], #32 \n" // store 8 samples
+ "b.gt 1b \n"
+ "faddp v5.4s, v5.4s, v6.4s \n"
+ "faddp v5.4s, v5.4s, v5.4s \n"
+ "faddp %3.4s, v5.4s, v5.4s \n" // sum
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width), // %2
+ "=w"(fsum) // %3
+ : "w"(scale) // %4
+ : "cc", "memory", "v1", "v2", "v3", "v4", "v5", "v6");
+ return fsum;
+}
+
+void ScaleSamples_NEON(const float* src, float* dst, float scale, int width) {
+ asm volatile(
+ "1: \n"
+ "ld1 {v1.4s, v2.4s}, [%0], #32 \n" // load 8 samples
+ "subs %w2, %w2, #8 \n" // 8 processed per loop
+ "fmul v1.4s, v1.4s, %3.s[0] \n" // scale
+ "fmul v2.4s, v2.4s, %3.s[0] \n" // scale
+ "st1 {v1.4s, v2.4s}, [%1], #32 \n" // store 8 samples
+ "b.gt 1b \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width) // %2
+ : "w"(scale) // %3
+ : "cc", "memory", "v1", "v2");
+}
+
+// filter 5 rows with 1, 4, 6, 4, 1 coefficients to produce 1 row.
+void GaussCol_NEON(const uint16_t* src0,
+ const uint16_t* src1,
+ const uint16_t* src2,
+ const uint16_t* src3,
+ const uint16_t* src4,
+ uint32_t* dst,
+ int width) {
+ asm volatile(
+ "movi v6.8h, #4 \n" // constant 4
+ "movi v7.8h, #6 \n" // constant 6
+
+ "1: \n"
+ "ld1 {v1.8h}, [%0], #16 \n" // load 8 samples, 5 rows
+ "ld1 {v2.8h}, [%4], #16 \n"
+ "uaddl v0.4s, v1.4h, v2.4h \n" // * 1
+ "uaddl2 v1.4s, v1.8h, v2.8h \n" // * 1
+ "ld1 {v2.8h}, [%1], #16 \n"
+ "umlal v0.4s, v2.4h, v6.4h \n" // * 4
+ "umlal2 v1.4s, v2.8h, v6.8h \n" // * 4
+ "ld1 {v2.8h}, [%2], #16 \n"
+ "umlal v0.4s, v2.4h, v7.4h \n" // * 6
+ "umlal2 v1.4s, v2.8h, v7.8h \n" // * 6
+ "ld1 {v2.8h}, [%3], #16 \n"
+ "umlal v0.4s, v2.4h, v6.4h \n" // * 4
+ "umlal2 v1.4s, v2.8h, v6.8h \n" // * 4
+ "subs %w6, %w6, #8 \n" // 8 processed per loop
+ "st1 {v0.4s,v1.4s}, [%5], #32 \n" // store 8 samples
+ "b.gt 1b \n"
+ : "+r"(src0), // %0
+ "+r"(src1), // %1
+ "+r"(src2), // %2
+ "+r"(src3), // %3
+ "+r"(src4), // %4
+ "+r"(dst), // %5
+ "+r"(width) // %6
+ :
+ : "cc", "memory", "v0", "v1", "v2", "v6", "v7");
+}
+
+// filter 5 rows with 1, 4, 6, 4, 1 coefficients to produce 1 row.
+void GaussRow_NEON(const uint32_t* src, uint16_t* dst, int width) {
+ const uint32_t* src1 = src + 1;
+ const uint32_t* src2 = src + 2;
+ const uint32_t* src3 = src + 3;
+ asm volatile(
+ "movi v6.4s, #4 \n" // constant 4
+ "movi v7.4s, #6 \n" // constant 6
+
+ "1: \n"
+ "ld1 {v0.4s,v1.4s,v2.4s}, [%0], %6 \n" // load 12 source samples
+ "add v0.4s, v0.4s, v1.4s \n" // * 1
+ "add v1.4s, v1.4s, v2.4s \n" // * 1
+ "ld1 {v2.4s,v3.4s}, [%2], #32 \n"
+ "mla v0.4s, v2.4s, v7.4s \n" // * 6
+ "mla v1.4s, v3.4s, v7.4s \n" // * 6
+ "ld1 {v2.4s,v3.4s}, [%1], #32 \n"
+ "ld1 {v4.4s,v5.4s}, [%3], #32 \n"
+ "add v2.4s, v2.4s, v4.4s \n" // add rows for * 4
+ "add v3.4s, v3.4s, v5.4s \n"
+ "mla v0.4s, v2.4s, v6.4s \n" // * 4
+ "mla v1.4s, v3.4s, v6.4s \n" // * 4
+ "subs %w5, %w5, #8 \n" // 8 processed per loop
+ "uqrshrn v0.4h, v0.4s, #8 \n" // round and pack
+ "uqrshrn2 v0.8h, v1.4s, #8 \n"
+ "st1 {v0.8h}, [%4], #16 \n" // store 8 samples
+ "b.gt 1b \n"
+ : "+r"(src), // %0
+ "+r"(src1), // %1
+ "+r"(src2), // %2
+ "+r"(src3), // %3
+ "+r"(dst), // %4
+ "+r"(width) // %5
+ : "r"(32LL) // %6
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7");
+}
+
+#endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
+
+#ifdef __cplusplus
+} // extern "C"
+} // namespace libyuv
+#endif
diff --git a/media/libvpx/libvpx/third_party/libyuv/source/row_win.cc b/media/libvpx/libvpx/third_party/libyuv/source/row_win.cc
new file mode 100644
index 0000000000..5500d7f5a6
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/libyuv/source/row_win.cc
@@ -0,0 +1,6234 @@
+/*
+ * Copyright 2011 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/row.h"
+
+// This module is for Visual C 32/64 bit and clangcl 32 bit
+#if !defined(LIBYUV_DISABLE_X86) && defined(_MSC_VER) && \
+ (defined(_M_IX86) || (defined(_M_X64) && !defined(__clang__)))
+
+#if defined(_M_X64)
+#include <emmintrin.h>
+#include <tmmintrin.h> // For _mm_maddubs_epi16
+#endif
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+// 64 bit
+#if defined(_M_X64)
+
+// Read 4 UV from 422, upsample to 8 UV.
+#define READYUV422 \
+ xmm0 = _mm_cvtsi32_si128(*(uint32_t*)u_buf); \
+ xmm1 = _mm_cvtsi32_si128(*(uint32_t*)(u_buf + offset)); \
+ xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); \
+ xmm0 = _mm_unpacklo_epi16(xmm0, xmm0); \
+ u_buf += 4; \
+ xmm4 = _mm_loadl_epi64((__m128i*)y_buf); \
+ xmm4 = _mm_unpacklo_epi8(xmm4, xmm4); \
+ y_buf += 8;
+
+// Read 4 UV from 422, upsample to 8 UV. With 8 Alpha.
+#define READYUVA422 \
+ xmm0 = _mm_cvtsi32_si128(*(uint32_t*)u_buf); \
+ xmm1 = _mm_cvtsi32_si128(*(uint32_t*)(u_buf + offset)); \
+ xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); \
+ xmm0 = _mm_unpacklo_epi16(xmm0, xmm0); \
+ u_buf += 4; \
+ xmm4 = _mm_loadl_epi64((__m128i*)y_buf); \
+ xmm4 = _mm_unpacklo_epi8(xmm4, xmm4); \
+ y_buf += 8; \
+ xmm5 = _mm_loadl_epi64((__m128i*)a_buf); \
+ a_buf += 8;
+
+// Convert 8 pixels: 8 UV and 8 Y.
+#define YUVTORGB(yuvconstants) \
+ xmm1 = _mm_loadu_si128(&xmm0); \
+ xmm2 = _mm_loadu_si128(&xmm0); \
+ xmm0 = _mm_maddubs_epi16(xmm0, *(__m128i*)yuvconstants->kUVToB); \
+ xmm1 = _mm_maddubs_epi16(xmm1, *(__m128i*)yuvconstants->kUVToG); \
+ xmm2 = _mm_maddubs_epi16(xmm2, *(__m128i*)yuvconstants->kUVToR); \
+ xmm0 = _mm_sub_epi16(*(__m128i*)yuvconstants->kUVBiasB, xmm0); \
+ xmm1 = _mm_sub_epi16(*(__m128i*)yuvconstants->kUVBiasG, xmm1); \
+ xmm2 = _mm_sub_epi16(*(__m128i*)yuvconstants->kUVBiasR, xmm2); \
+ xmm4 = _mm_mulhi_epu16(xmm4, *(__m128i*)yuvconstants->kYToRgb); \
+ xmm0 = _mm_adds_epi16(xmm0, xmm4); \
+ xmm1 = _mm_adds_epi16(xmm1, xmm4); \
+ xmm2 = _mm_adds_epi16(xmm2, xmm4); \
+ xmm0 = _mm_srai_epi16(xmm0, 6); \
+ xmm1 = _mm_srai_epi16(xmm1, 6); \
+ xmm2 = _mm_srai_epi16(xmm2, 6); \
+ xmm0 = _mm_packus_epi16(xmm0, xmm0); \
+ xmm1 = _mm_packus_epi16(xmm1, xmm1); \
+ xmm2 = _mm_packus_epi16(xmm2, xmm2);
+
+// Store 8 ARGB values.
+#define STOREARGB \
+ xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); \
+ xmm2 = _mm_unpacklo_epi8(xmm2, xmm5); \
+ xmm1 = _mm_loadu_si128(&xmm0); \
+ xmm0 = _mm_unpacklo_epi16(xmm0, xmm2); \
+ xmm1 = _mm_unpackhi_epi16(xmm1, xmm2); \
+ _mm_storeu_si128((__m128i*)dst_argb, xmm0); \
+ _mm_storeu_si128((__m128i*)(dst_argb + 16), xmm1); \
+ dst_argb += 32;
+
+#if defined(HAS_I422TOARGBROW_SSSE3)
+void I422ToARGBRow_SSSE3(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ __m128i xmm0, xmm1, xmm2, xmm4;
+ const __m128i xmm5 = _mm_set1_epi8(-1);
+ const ptrdiff_t offset = (uint8_t*)v_buf - (uint8_t*)u_buf;
+ while (width > 0) {
+ READYUV422
+ YUVTORGB(yuvconstants)
+ STOREARGB
+ width -= 8;
+ }
+}
+#endif
+
+#if defined(HAS_I422ALPHATOARGBROW_SSSE3)
+void I422AlphaToARGBRow_SSSE3(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ const uint8_t* a_buf,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ __m128i xmm0, xmm1, xmm2, xmm4, xmm5;
+ const ptrdiff_t offset = (uint8_t*)v_buf - (uint8_t*)u_buf;
+ while (width > 0) {
+ READYUVA422
+ YUVTORGB(yuvconstants)
+ STOREARGB
+ width -= 8;
+ }
+}
+#endif
+
+// 32 bit
+#else // defined(_M_X64)
+#ifdef HAS_ARGBTOYROW_SSSE3
+
+// Constants for ARGB.
+static const vec8 kARGBToY = {13, 65, 33, 0, 13, 65, 33, 0,
+ 13, 65, 33, 0, 13, 65, 33, 0};
+
+// JPeg full range.
+static const vec8 kARGBToYJ = {15, 75, 38, 0, 15, 75, 38, 0,
+ 15, 75, 38, 0, 15, 75, 38, 0};
+
+static const vec8 kARGBToU = {112, -74, -38, 0, 112, -74, -38, 0,
+ 112, -74, -38, 0, 112, -74, -38, 0};
+
+static const vec8 kARGBToUJ = {127, -84, -43, 0, 127, -84, -43, 0,
+ 127, -84, -43, 0, 127, -84, -43, 0};
+
+static const vec8 kARGBToV = {
+ -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0,
+};
+
+static const vec8 kARGBToVJ = {-20, -107, 127, 0, -20, -107, 127, 0,
+ -20, -107, 127, 0, -20, -107, 127, 0};
+
+// vpshufb for vphaddw + vpackuswb packed to shorts.
+static const lvec8 kShufARGBToUV_AVX = {
+ 0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15,
+ 0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15};
+
+// Constants for BGRA.
+static const vec8 kBGRAToY = {0, 33, 65, 13, 0, 33, 65, 13,
+ 0, 33, 65, 13, 0, 33, 65, 13};
+
+static const vec8 kBGRAToU = {0, -38, -74, 112, 0, -38, -74, 112,
+ 0, -38, -74, 112, 0, -38, -74, 112};
+
+static const vec8 kBGRAToV = {0, 112, -94, -18, 0, 112, -94, -18,
+ 0, 112, -94, -18, 0, 112, -94, -18};
+
+// Constants for ABGR.
+static const vec8 kABGRToY = {33, 65, 13, 0, 33, 65, 13, 0,
+ 33, 65, 13, 0, 33, 65, 13, 0};
+
+static const vec8 kABGRToU = {-38, -74, 112, 0, -38, -74, 112, 0,
+ -38, -74, 112, 0, -38, -74, 112, 0};
+
+static const vec8 kABGRToV = {112, -94, -18, 0, 112, -94, -18, 0,
+ 112, -94, -18, 0, 112, -94, -18, 0};
+
+// Constants for RGBA.
+static const vec8 kRGBAToY = {0, 13, 65, 33, 0, 13, 65, 33,
+ 0, 13, 65, 33, 0, 13, 65, 33};
+
+static const vec8 kRGBAToU = {0, 112, -74, -38, 0, 112, -74, -38,
+ 0, 112, -74, -38, 0, 112, -74, -38};
+
+static const vec8 kRGBAToV = {0, -18, -94, 112, 0, -18, -94, 112,
+ 0, -18, -94, 112, 0, -18, -94, 112};
+
+static const uvec8 kAddY16 = {16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u,
+ 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u};
+
+// 7 bit fixed point 0.5.
+static const vec16 kAddYJ64 = {64, 64, 64, 64, 64, 64, 64, 64};
+
+static const uvec8 kAddUV128 = {128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u,
+ 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u};
+
+static const uvec16 kAddUVJ128 = {0x8080u, 0x8080u, 0x8080u, 0x8080u,
+ 0x8080u, 0x8080u, 0x8080u, 0x8080u};
+
+// Shuffle table for converting RGB24 to ARGB.
+static const uvec8 kShuffleMaskRGB24ToARGB = {
+ 0u, 1u, 2u, 12u, 3u, 4u, 5u, 13u, 6u, 7u, 8u, 14u, 9u, 10u, 11u, 15u};
+
+// Shuffle table for converting RAW to ARGB.
+static const uvec8 kShuffleMaskRAWToARGB = {2u, 1u, 0u, 12u, 5u, 4u, 3u, 13u,
+ 8u, 7u, 6u, 14u, 11u, 10u, 9u, 15u};
+
+// Shuffle table for converting RAW to RGB24. First 8.
+static const uvec8 kShuffleMaskRAWToRGB24_0 = {
+ 2u, 1u, 0u, 5u, 4u, 3u, 8u, 7u,
+ 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u};
+
+// Shuffle table for converting RAW to RGB24. Middle 8.
+static const uvec8 kShuffleMaskRAWToRGB24_1 = {
+ 2u, 7u, 6u, 5u, 10u, 9u, 8u, 13u,
+ 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u};
+
+// Shuffle table for converting RAW to RGB24. Last 8.
+static const uvec8 kShuffleMaskRAWToRGB24_2 = {
+ 8u, 7u, 12u, 11u, 10u, 15u, 14u, 13u,
+ 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u};
+
+// Shuffle table for converting ARGB to RGB24.
+static const uvec8 kShuffleMaskARGBToRGB24 = {
+ 0u, 1u, 2u, 4u, 5u, 6u, 8u, 9u, 10u, 12u, 13u, 14u, 128u, 128u, 128u, 128u};
+
+// Shuffle table for converting ARGB to RAW.
+static const uvec8 kShuffleMaskARGBToRAW = {
+ 2u, 1u, 0u, 6u, 5u, 4u, 10u, 9u, 8u, 14u, 13u, 12u, 128u, 128u, 128u, 128u};
+
+// Shuffle table for converting ARGBToRGB24 for I422ToRGB24. First 8 + next 4
+static const uvec8 kShuffleMaskARGBToRGB24_0 = {
+ 0u, 1u, 2u, 4u, 5u, 6u, 8u, 9u, 128u, 128u, 128u, 128u, 10u, 12u, 13u, 14u};
+
+// YUY2 shuf 16 Y to 32 Y.
+static const lvec8 kShuffleYUY2Y = {0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10,
+ 10, 12, 12, 14, 14, 0, 0, 2, 2, 4, 4,
+ 6, 6, 8, 8, 10, 10, 12, 12, 14, 14};
+
+// YUY2 shuf 8 UV to 16 UV.
+static const lvec8 kShuffleYUY2UV = {1, 3, 1, 3, 5, 7, 5, 7, 9, 11, 9,
+ 11, 13, 15, 13, 15, 1, 3, 1, 3, 5, 7,
+ 5, 7, 9, 11, 9, 11, 13, 15, 13, 15};
+
+// UYVY shuf 16 Y to 32 Y.
+static const lvec8 kShuffleUYVYY = {1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11,
+ 11, 13, 13, 15, 15, 1, 1, 3, 3, 5, 5,
+ 7, 7, 9, 9, 11, 11, 13, 13, 15, 15};
+
+// UYVY shuf 8 UV to 16 UV.
+static const lvec8 kShuffleUYVYUV = {0, 2, 0, 2, 4, 6, 4, 6, 8, 10, 8,
+ 10, 12, 14, 12, 14, 0, 2, 0, 2, 4, 6,
+ 4, 6, 8, 10, 8, 10, 12, 14, 12, 14};
+
+// NV21 shuf 8 VU to 16 UV.
+static const lvec8 kShuffleNV21 = {
+ 1, 0, 1, 0, 3, 2, 3, 2, 5, 4, 5, 4, 7, 6, 7, 6,
+ 1, 0, 1, 0, 3, 2, 3, 2, 5, 4, 5, 4, 7, 6, 7, 6,
+};
+
+// Duplicates gray value 3 times and fills in alpha opaque.
+__declspec(naked) void J400ToARGBRow_SSE2(const uint8_t* src_y,
+ uint8_t* dst_argb,
+ int width) {
+ __asm {
+ mov eax, [esp + 4] // src_y
+ mov edx, [esp + 8] // dst_argb
+ mov ecx, [esp + 12] // width
+ pcmpeqb xmm5, xmm5 // generate mask 0xff000000
+ pslld xmm5, 24
+
+ convertloop:
+ movq xmm0, qword ptr [eax]
+ lea eax, [eax + 8]
+ punpcklbw xmm0, xmm0
+ movdqa xmm1, xmm0
+ punpcklwd xmm0, xmm0
+ punpckhwd xmm1, xmm1
+ por xmm0, xmm5
+ por xmm1, xmm5
+ movdqu [edx], xmm0
+ movdqu [edx + 16], xmm1
+ lea edx, [edx + 32]
+ sub ecx, 8
+ jg convertloop
+ ret
+ }
+}
+
+#ifdef HAS_J400TOARGBROW_AVX2
+// Duplicates gray value 3 times and fills in alpha opaque.
+__declspec(naked) void J400ToARGBRow_AVX2(const uint8_t* src_y,
+ uint8_t* dst_argb,
+ int width) {
+ __asm {
+ mov eax, [esp + 4] // src_y
+ mov edx, [esp + 8] // dst_argb
+ mov ecx, [esp + 12] // width
+ vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0xff000000
+ vpslld ymm5, ymm5, 24
+
+ convertloop:
+ vmovdqu xmm0, [eax]
+ lea eax, [eax + 16]
+ vpermq ymm0, ymm0, 0xd8
+ vpunpcklbw ymm0, ymm0, ymm0
+ vpermq ymm0, ymm0, 0xd8
+ vpunpckhwd ymm1, ymm0, ymm0
+ vpunpcklwd ymm0, ymm0, ymm0
+ vpor ymm0, ymm0, ymm5
+ vpor ymm1, ymm1, ymm5
+ vmovdqu [edx], ymm0
+ vmovdqu [edx + 32], ymm1
+ lea edx, [edx + 64]
+ sub ecx, 16
+ jg convertloop
+ vzeroupper
+ ret
+ }
+}
+#endif // HAS_J400TOARGBROW_AVX2
+
+__declspec(naked) void RGB24ToARGBRow_SSSE3(const uint8_t* src_rgb24,
+ uint8_t* dst_argb,
+ int width) {
+ __asm {
+ mov eax, [esp + 4] // src_rgb24
+ mov edx, [esp + 8] // dst_argb
+ mov ecx, [esp + 12] // width
+ pcmpeqb xmm5, xmm5 // generate mask 0xff000000
+ pslld xmm5, 24
+ movdqa xmm4, xmmword ptr kShuffleMaskRGB24ToARGB
+
+ convertloop:
+ movdqu xmm0, [eax]
+ movdqu xmm1, [eax + 16]
+ movdqu xmm3, [eax + 32]
+ lea eax, [eax + 48]
+ movdqa xmm2, xmm3
+ palignr xmm2, xmm1, 8 // xmm2 = { xmm3[0:3] xmm1[8:15]}
+ pshufb xmm2, xmm4
+ por xmm2, xmm5
+ palignr xmm1, xmm0, 12 // xmm1 = { xmm3[0:7] xmm0[12:15]}
+ pshufb xmm0, xmm4
+ movdqu [edx + 32], xmm2
+ por xmm0, xmm5
+ pshufb xmm1, xmm4
+ movdqu [edx], xmm0
+ por xmm1, xmm5
+ palignr xmm3, xmm3, 4 // xmm3 = { xmm3[4:15]}
+ pshufb xmm3, xmm4
+ movdqu [edx + 16], xmm1
+ por xmm3, xmm5
+ movdqu [edx + 48], xmm3
+ lea edx, [edx + 64]
+ sub ecx, 16
+ jg convertloop
+ ret
+ }
+}
+
+__declspec(naked) void RAWToARGBRow_SSSE3(const uint8_t* src_raw,
+ uint8_t* dst_argb,
+ int width) {
+ __asm {
+ mov eax, [esp + 4] // src_raw
+ mov edx, [esp + 8] // dst_argb
+ mov ecx, [esp + 12] // width
+ pcmpeqb xmm5, xmm5 // generate mask 0xff000000
+ pslld xmm5, 24
+ movdqa xmm4, xmmword ptr kShuffleMaskRAWToARGB
+
+ convertloop:
+ movdqu xmm0, [eax]
+ movdqu xmm1, [eax + 16]
+ movdqu xmm3, [eax + 32]
+ lea eax, [eax + 48]
+ movdqa xmm2, xmm3
+ palignr xmm2, xmm1, 8 // xmm2 = { xmm3[0:3] xmm1[8:15]}
+ pshufb xmm2, xmm4
+ por xmm2, xmm5
+ palignr xmm1, xmm0, 12 // xmm1 = { xmm3[0:7] xmm0[12:15]}
+ pshufb xmm0, xmm4
+ movdqu [edx + 32], xmm2
+ por xmm0, xmm5
+ pshufb xmm1, xmm4
+ movdqu [edx], xmm0
+ por xmm1, xmm5
+ palignr xmm3, xmm3, 4 // xmm3 = { xmm3[4:15]}
+ pshufb xmm3, xmm4
+ movdqu [edx + 16], xmm1
+ por xmm3, xmm5
+ movdqu [edx + 48], xmm3
+ lea edx, [edx + 64]
+ sub ecx, 16
+ jg convertloop
+ ret
+ }
+}
+
+__declspec(naked) void RAWToRGB24Row_SSSE3(const uint8_t* src_raw,
+ uint8_t* dst_rgb24,
+ int width) {
+ __asm {
+ mov eax, [esp + 4] // src_raw
+ mov edx, [esp + 8] // dst_rgb24
+ mov ecx, [esp + 12] // width
+ movdqa xmm3, xmmword ptr kShuffleMaskRAWToRGB24_0
+ movdqa xmm4, xmmword ptr kShuffleMaskRAWToRGB24_1
+ movdqa xmm5, xmmword ptr kShuffleMaskRAWToRGB24_2
+
+ convertloop:
+ movdqu xmm0, [eax]
+ movdqu xmm1, [eax + 4]
+ movdqu xmm2, [eax + 8]
+ lea eax, [eax + 24]
+ pshufb xmm0, xmm3
+ pshufb xmm1, xmm4
+ pshufb xmm2, xmm5
+ movq qword ptr [edx], xmm0
+ movq qword ptr [edx + 8], xmm1
+ movq qword ptr [edx + 16], xmm2
+ lea edx, [edx + 24]
+ sub ecx, 8
+ jg convertloop
+ ret
+ }
+}
+
+// pmul method to replicate bits.
+// Math to replicate bits:
+// (v << 8) | (v << 3)
+// v * 256 + v * 8
+// v * (256 + 8)
+// G shift of 5 is incorporated, so shift is 5 + 8 and 5 + 3
+// 20 instructions.
+__declspec(naked) void RGB565ToARGBRow_SSE2(const uint8_t* src_rgb565,
+ uint8_t* dst_argb,
+ int width) {
+ __asm {
+ mov eax, 0x01080108 // generate multiplier to repeat 5 bits
+ movd xmm5, eax
+ pshufd xmm5, xmm5, 0
+ mov eax, 0x20802080 // multiplier shift by 5 and then repeat 6 bits
+ movd xmm6, eax
+ pshufd xmm6, xmm6, 0
+ pcmpeqb xmm3, xmm3 // generate mask 0xf800f800 for Red
+ psllw xmm3, 11
+ pcmpeqb xmm4, xmm4 // generate mask 0x07e007e0 for Green
+ psllw xmm4, 10
+ psrlw xmm4, 5
+ pcmpeqb xmm7, xmm7 // generate mask 0xff00ff00 for Alpha
+ psllw xmm7, 8
+
+ mov eax, [esp + 4] // src_rgb565
+ mov edx, [esp + 8] // dst_argb
+ mov ecx, [esp + 12] // width
+ sub edx, eax
+ sub edx, eax
+
+ convertloop:
+ movdqu xmm0, [eax] // fetch 8 pixels of bgr565
+ movdqa xmm1, xmm0
+ movdqa xmm2, xmm0
+ pand xmm1, xmm3 // R in upper 5 bits
+ psllw xmm2, 11 // B in upper 5 bits
+ pmulhuw xmm1, xmm5 // * (256 + 8)
+ pmulhuw xmm2, xmm5 // * (256 + 8)
+ psllw xmm1, 8
+ por xmm1, xmm2 // RB
+ pand xmm0, xmm4 // G in middle 6 bits
+ pmulhuw xmm0, xmm6 // << 5 * (256 + 4)
+ por xmm0, xmm7 // AG
+ movdqa xmm2, xmm1
+ punpcklbw xmm1, xmm0
+ punpckhbw xmm2, xmm0
+ movdqu [eax * 2 + edx], xmm1 // store 4 pixels of ARGB
+ movdqu [eax * 2 + edx + 16], xmm2 // store next 4 pixels of ARGB
+ lea eax, [eax + 16]
+ sub ecx, 8
+ jg convertloop
+ ret
+ }
+}
+
+#ifdef HAS_RGB565TOARGBROW_AVX2
+// pmul method to replicate bits.
+// Math to replicate bits:
+// (v << 8) | (v << 3)
+// v * 256 + v * 8
+// v * (256 + 8)
+// G shift of 5 is incorporated, so shift is 5 + 8 and 5 + 3
+__declspec(naked) void RGB565ToARGBRow_AVX2(const uint8_t* src_rgb565,
+ uint8_t* dst_argb,
+ int width) {
+ __asm {
+ mov eax, 0x01080108 // generate multiplier to repeat 5 bits
+ vmovd xmm5, eax
+ vbroadcastss ymm5, xmm5
+ mov eax, 0x20802080 // multiplier shift by 5 and then repeat 6 bits
+ vmovd xmm6, eax
+ vbroadcastss ymm6, xmm6
+ vpcmpeqb ymm3, ymm3, ymm3 // generate mask 0xf800f800 for Red
+ vpsllw ymm3, ymm3, 11
+ vpcmpeqb ymm4, ymm4, ymm4 // generate mask 0x07e007e0 for Green
+ vpsllw ymm4, ymm4, 10
+ vpsrlw ymm4, ymm4, 5
+ vpcmpeqb ymm7, ymm7, ymm7 // generate mask 0xff00ff00 for Alpha
+ vpsllw ymm7, ymm7, 8
+
+ mov eax, [esp + 4] // src_rgb565
+ mov edx, [esp + 8] // dst_argb
+ mov ecx, [esp + 12] // width
+ sub edx, eax
+ sub edx, eax
+
+ convertloop:
+ vmovdqu ymm0, [eax] // fetch 16 pixels of bgr565
+ vpand ymm1, ymm0, ymm3 // R in upper 5 bits
+ vpsllw ymm2, ymm0, 11 // B in upper 5 bits
+ vpmulhuw ymm1, ymm1, ymm5 // * (256 + 8)
+ vpmulhuw ymm2, ymm2, ymm5 // * (256 + 8)
+ vpsllw ymm1, ymm1, 8
+ vpor ymm1, ymm1, ymm2 // RB
+ vpand ymm0, ymm0, ymm4 // G in middle 6 bits
+ vpmulhuw ymm0, ymm0, ymm6 // << 5 * (256 + 4)
+ vpor ymm0, ymm0, ymm7 // AG
+ vpermq ymm0, ymm0, 0xd8 // mutate for unpack
+ vpermq ymm1, ymm1, 0xd8
+ vpunpckhbw ymm2, ymm1, ymm0
+ vpunpcklbw ymm1, ymm1, ymm0
+ vmovdqu [eax * 2 + edx], ymm1 // store 4 pixels of ARGB
+ vmovdqu [eax * 2 + edx + 32], ymm2 // store next 4 pixels of ARGB
+ lea eax, [eax + 32]
+ sub ecx, 16
+ jg convertloop
+ vzeroupper
+ ret
+ }
+}
+#endif // HAS_RGB565TOARGBROW_AVX2
+
+#ifdef HAS_ARGB1555TOARGBROW_AVX2
+__declspec(naked) void ARGB1555ToARGBRow_AVX2(const uint8_t* src_argb1555,
+ uint8_t* dst_argb,
+ int width) {
+ __asm {
+ mov eax, 0x01080108 // generate multiplier to repeat 5 bits
+ vmovd xmm5, eax
+ vbroadcastss ymm5, xmm5
+ mov eax, 0x42004200 // multiplier shift by 6 and then repeat 5 bits
+ vmovd xmm6, eax
+ vbroadcastss ymm6, xmm6
+ vpcmpeqb ymm3, ymm3, ymm3 // generate mask 0xf800f800 for Red
+ vpsllw ymm3, ymm3, 11
+ vpsrlw ymm4, ymm3, 6 // generate mask 0x03e003e0 for Green
+ vpcmpeqb ymm7, ymm7, ymm7 // generate mask 0xff00ff00 for Alpha
+ vpsllw ymm7, ymm7, 8
+
+ mov eax, [esp + 4] // src_argb1555
+ mov edx, [esp + 8] // dst_argb
+ mov ecx, [esp + 12] // width
+ sub edx, eax
+ sub edx, eax
+
+ convertloop:
+ vmovdqu ymm0, [eax] // fetch 16 pixels of 1555
+ vpsllw ymm1, ymm0, 1 // R in upper 5 bits
+ vpsllw ymm2, ymm0, 11 // B in upper 5 bits
+ vpand ymm1, ymm1, ymm3
+ vpmulhuw ymm2, ymm2, ymm5 // * (256 + 8)
+ vpmulhuw ymm1, ymm1, ymm5 // * (256 + 8)
+ vpsllw ymm1, ymm1, 8
+ vpor ymm1, ymm1, ymm2 // RB
+ vpsraw ymm2, ymm0, 8 // A
+ vpand ymm0, ymm0, ymm4 // G in middle 5 bits
+ vpmulhuw ymm0, ymm0, ymm6 // << 6 * (256 + 8)
+ vpand ymm2, ymm2, ymm7
+ vpor ymm0, ymm0, ymm2 // AG
+ vpermq ymm0, ymm0, 0xd8 // mutate for unpack
+ vpermq ymm1, ymm1, 0xd8
+ vpunpckhbw ymm2, ymm1, ymm0
+ vpunpcklbw ymm1, ymm1, ymm0
+ vmovdqu [eax * 2 + edx], ymm1 // store 8 pixels of ARGB
+ vmovdqu [eax * 2 + edx + 32], ymm2 // store next 8 pixels of ARGB
+ lea eax, [eax + 32]
+ sub ecx, 16
+ jg convertloop
+ vzeroupper
+ ret
+ }
+}
+#endif // HAS_ARGB1555TOARGBROW_AVX2
+
+#ifdef HAS_ARGB4444TOARGBROW_AVX2
+__declspec(naked) void ARGB4444ToARGBRow_AVX2(const uint8_t* src_argb4444,
+ uint8_t* dst_argb,
+ int width) {
+ __asm {
+ mov eax, 0x0f0f0f0f // generate mask 0x0f0f0f0f
+ vmovd xmm4, eax
+ vbroadcastss ymm4, xmm4
+ vpslld ymm5, ymm4, 4 // 0xf0f0f0f0 for high nibbles
+ mov eax, [esp + 4] // src_argb4444
+ mov edx, [esp + 8] // dst_argb
+ mov ecx, [esp + 12] // width
+ sub edx, eax
+ sub edx, eax
+
+ convertloop:
+ vmovdqu ymm0, [eax] // fetch 16 pixels of bgra4444
+ vpand ymm2, ymm0, ymm5 // mask high nibbles
+ vpand ymm0, ymm0, ymm4 // mask low nibbles
+ vpsrlw ymm3, ymm2, 4
+ vpsllw ymm1, ymm0, 4
+ vpor ymm2, ymm2, ymm3
+ vpor ymm0, ymm0, ymm1
+ vpermq ymm0, ymm0, 0xd8 // mutate for unpack
+ vpermq ymm2, ymm2, 0xd8
+ vpunpckhbw ymm1, ymm0, ymm2
+ vpunpcklbw ymm0, ymm0, ymm2
+ vmovdqu [eax * 2 + edx], ymm0 // store 8 pixels of ARGB
+ vmovdqu [eax * 2 + edx + 32], ymm1 // store next 8 pixels of ARGB
+ lea eax, [eax + 32]
+ sub ecx, 16
+ jg convertloop
+ vzeroupper
+ ret
+ }
+}
+#endif // HAS_ARGB4444TOARGBROW_AVX2
+
+// 24 instructions
+__declspec(naked) void ARGB1555ToARGBRow_SSE2(const uint8_t* src_argb1555,
+ uint8_t* dst_argb,
+ int width) {
+ __asm {
+ mov eax, 0x01080108 // generate multiplier to repeat 5 bits
+ movd xmm5, eax
+ pshufd xmm5, xmm5, 0
+ mov eax, 0x42004200 // multiplier shift by 6 and then repeat 5 bits
+ movd xmm6, eax
+ pshufd xmm6, xmm6, 0
+ pcmpeqb xmm3, xmm3 // generate mask 0xf800f800 for Red
+ psllw xmm3, 11
+ movdqa xmm4, xmm3 // generate mask 0x03e003e0 for Green
+ psrlw xmm4, 6
+ pcmpeqb xmm7, xmm7 // generate mask 0xff00ff00 for Alpha
+ psllw xmm7, 8
+
+ mov eax, [esp + 4] // src_argb1555
+ mov edx, [esp + 8] // dst_argb
+ mov ecx, [esp + 12] // width
+ sub edx, eax
+ sub edx, eax
+
+ convertloop:
+ movdqu xmm0, [eax] // fetch 8 pixels of 1555
+ movdqa xmm1, xmm0
+ movdqa xmm2, xmm0
+ psllw xmm1, 1 // R in upper 5 bits
+ psllw xmm2, 11 // B in upper 5 bits
+ pand xmm1, xmm3
+ pmulhuw xmm2, xmm5 // * (256 + 8)
+ pmulhuw xmm1, xmm5 // * (256 + 8)
+ psllw xmm1, 8
+ por xmm1, xmm2 // RB
+ movdqa xmm2, xmm0
+ pand xmm0, xmm4 // G in middle 5 bits
+ psraw xmm2, 8 // A
+ pmulhuw xmm0, xmm6 // << 6 * (256 + 8)
+ pand xmm2, xmm7
+ por xmm0, xmm2 // AG
+ movdqa xmm2, xmm1
+ punpcklbw xmm1, xmm0
+ punpckhbw xmm2, xmm0
+ movdqu [eax * 2 + edx], xmm1 // store 4 pixels of ARGB
+ movdqu [eax * 2 + edx + 16], xmm2 // store next 4 pixels of ARGB
+ lea eax, [eax + 16]
+ sub ecx, 8
+ jg convertloop
+ ret
+ }
+}
+
+// 18 instructions.
+__declspec(naked) void ARGB4444ToARGBRow_SSE2(const uint8_t* src_argb4444,
+ uint8_t* dst_argb,
+ int width) {
+ __asm {
+ mov eax, 0x0f0f0f0f // generate mask 0x0f0f0f0f
+ movd xmm4, eax
+ pshufd xmm4, xmm4, 0
+ movdqa xmm5, xmm4 // 0xf0f0f0f0 for high nibbles
+ pslld xmm5, 4
+ mov eax, [esp + 4] // src_argb4444
+ mov edx, [esp + 8] // dst_argb
+ mov ecx, [esp + 12] // width
+ sub edx, eax
+ sub edx, eax
+
+ convertloop:
+ movdqu xmm0, [eax] // fetch 8 pixels of bgra4444
+ movdqa xmm2, xmm0
+ pand xmm0, xmm4 // mask low nibbles
+ pand xmm2, xmm5 // mask high nibbles
+ movdqa xmm1, xmm0
+ movdqa xmm3, xmm2
+ psllw xmm1, 4
+ psrlw xmm3, 4
+ por xmm0, xmm1
+ por xmm2, xmm3
+ movdqa xmm1, xmm0
+ punpcklbw xmm0, xmm2
+ punpckhbw xmm1, xmm2
+ movdqu [eax * 2 + edx], xmm0 // store 4 pixels of ARGB
+ movdqu [eax * 2 + edx + 16], xmm1 // store next 4 pixels of ARGB
+ lea eax, [eax + 16]
+ sub ecx, 8
+ jg convertloop
+ ret
+ }
+}
+
+__declspec(naked) void ARGBToRGB24Row_SSSE3(const uint8_t* src_argb,
+ uint8_t* dst_rgb,
+ int width) {
+ __asm {
+ mov eax, [esp + 4] // src_argb
+ mov edx, [esp + 8] // dst_rgb
+ mov ecx, [esp + 12] // width
+ movdqa xmm6, xmmword ptr kShuffleMaskARGBToRGB24
+
+ convertloop:
+ movdqu xmm0, [eax] // fetch 16 pixels of argb
+ movdqu xmm1, [eax + 16]
+ movdqu xmm2, [eax + 32]
+ movdqu xmm3, [eax + 48]
+ lea eax, [eax + 64]
+ pshufb xmm0, xmm6 // pack 16 bytes of ARGB to 12 bytes of RGB
+ pshufb xmm1, xmm6
+ pshufb xmm2, xmm6
+ pshufb xmm3, xmm6
+ movdqa xmm4, xmm1 // 4 bytes from 1 for 0
+ psrldq xmm1, 4 // 8 bytes from 1
+ pslldq xmm4, 12 // 4 bytes from 1 for 0
+ movdqa xmm5, xmm2 // 8 bytes from 2 for 1
+ por xmm0, xmm4 // 4 bytes from 1 for 0
+ pslldq xmm5, 8 // 8 bytes from 2 for 1
+ movdqu [edx], xmm0 // store 0
+ por xmm1, xmm5 // 8 bytes from 2 for 1
+ psrldq xmm2, 8 // 4 bytes from 2
+ pslldq xmm3, 4 // 12 bytes from 3 for 2
+ por xmm2, xmm3 // 12 bytes from 3 for 2
+ movdqu [edx + 16], xmm1 // store 1
+ movdqu [edx + 32], xmm2 // store 2
+ lea edx, [edx + 48]
+ sub ecx, 16
+ jg convertloop
+ ret
+ }
+}
+
+__declspec(naked) void ARGBToRAWRow_SSSE3(const uint8_t* src_argb,
+ uint8_t* dst_rgb,
+ int width) {
+ __asm {
+ mov eax, [esp + 4] // src_argb
+ mov edx, [esp + 8] // dst_rgb
+ mov ecx, [esp + 12] // width
+ movdqa xmm6, xmmword ptr kShuffleMaskARGBToRAW
+
+ convertloop:
+ movdqu xmm0, [eax] // fetch 16 pixels of argb
+ movdqu xmm1, [eax + 16]
+ movdqu xmm2, [eax + 32]
+ movdqu xmm3, [eax + 48]
+ lea eax, [eax + 64]
+ pshufb xmm0, xmm6 // pack 16 bytes of ARGB to 12 bytes of RGB
+ pshufb xmm1, xmm6
+ pshufb xmm2, xmm6
+ pshufb xmm3, xmm6
+ movdqa xmm4, xmm1 // 4 bytes from 1 for 0
+ psrldq xmm1, 4 // 8 bytes from 1
+ pslldq xmm4, 12 // 4 bytes from 1 for 0
+ movdqa xmm5, xmm2 // 8 bytes from 2 for 1
+ por xmm0, xmm4 // 4 bytes from 1 for 0
+ pslldq xmm5, 8 // 8 bytes from 2 for 1
+ movdqu [edx], xmm0 // store 0
+ por xmm1, xmm5 // 8 bytes from 2 for 1
+ psrldq xmm2, 8 // 4 bytes from 2
+ pslldq xmm3, 4 // 12 bytes from 3 for 2
+ por xmm2, xmm3 // 12 bytes from 3 for 2
+ movdqu [edx + 16], xmm1 // store 1
+ movdqu [edx + 32], xmm2 // store 2
+ lea edx, [edx + 48]
+ sub ecx, 16
+ jg convertloop
+ ret
+ }
+}
+
+__declspec(naked) void ARGBToRGB565Row_SSE2(const uint8_t* src_argb,
+ uint8_t* dst_rgb,
+ int width) {
+ __asm {
+ mov eax, [esp + 4] // src_argb
+ mov edx, [esp + 8] // dst_rgb
+ mov ecx, [esp + 12] // width
+ pcmpeqb xmm3, xmm3 // generate mask 0x0000001f
+ psrld xmm3, 27
+ pcmpeqb xmm4, xmm4 // generate mask 0x000007e0
+ psrld xmm4, 26
+ pslld xmm4, 5
+ pcmpeqb xmm5, xmm5 // generate mask 0xfffff800
+ pslld xmm5, 11
+
+ convertloop:
+ movdqu xmm0, [eax] // fetch 4 pixels of argb
+ movdqa xmm1, xmm0 // B
+ movdqa xmm2, xmm0 // G
+ pslld xmm0, 8 // R
+ psrld xmm1, 3 // B
+ psrld xmm2, 5 // G
+ psrad xmm0, 16 // R
+ pand xmm1, xmm3 // B
+ pand xmm2, xmm4 // G
+ pand xmm0, xmm5 // R
+ por xmm1, xmm2 // BG
+ por xmm0, xmm1 // BGR
+ packssdw xmm0, xmm0
+ lea eax, [eax + 16]
+ movq qword ptr [edx], xmm0 // store 4 pixels of RGB565
+ lea edx, [edx + 8]
+ sub ecx, 4
+ jg convertloop
+ ret
+ }
+}
+
+__declspec(naked) void ARGBToRGB565DitherRow_SSE2(const uint8_t* src_argb,
+ uint8_t* dst_rgb,
+ const uint32_t dither4,
+ int width) {
+ __asm {
+
+ mov eax, [esp + 4] // src_argb
+ mov edx, [esp + 8] // dst_rgb
+ movd xmm6, [esp + 12] // dither4
+ mov ecx, [esp + 16] // width
+ punpcklbw xmm6, xmm6 // make dither 16 bytes
+ movdqa xmm7, xmm6
+ punpcklwd xmm6, xmm6
+ punpckhwd xmm7, xmm7
+ pcmpeqb xmm3, xmm3 // generate mask 0x0000001f
+ psrld xmm3, 27
+ pcmpeqb xmm4, xmm4 // generate mask 0x000007e0
+ psrld xmm4, 26
+ pslld xmm4, 5
+ pcmpeqb xmm5, xmm5 // generate mask 0xfffff800
+ pslld xmm5, 11
+
+ convertloop:
+ movdqu xmm0, [eax] // fetch 4 pixels of argb
+ paddusb xmm0, xmm6 // add dither
+ movdqa xmm1, xmm0 // B
+ movdqa xmm2, xmm0 // G
+ pslld xmm0, 8 // R
+ psrld xmm1, 3 // B
+ psrld xmm2, 5 // G
+ psrad xmm0, 16 // R
+ pand xmm1, xmm3 // B
+ pand xmm2, xmm4 // G
+ pand xmm0, xmm5 // R
+ por xmm1, xmm2 // BG
+ por xmm0, xmm1 // BGR
+ packssdw xmm0, xmm0
+ lea eax, [eax + 16]
+ movq qword ptr [edx], xmm0 // store 4 pixels of RGB565
+ lea edx, [edx + 8]
+ sub ecx, 4
+ jg convertloop
+ ret
+ }
+}
+
+#ifdef HAS_ARGBTORGB565DITHERROW_AVX2
+__declspec(naked) void ARGBToRGB565DitherRow_AVX2(const uint8_t* src_argb,
+ uint8_t* dst_rgb,
+ const uint32_t dither4,
+ int width) {
+ __asm {
+ mov eax, [esp + 4] // src_argb
+ mov edx, [esp + 8] // dst_rgb
+ vbroadcastss xmm6, [esp + 12] // dither4
+ mov ecx, [esp + 16] // width
+ vpunpcklbw xmm6, xmm6, xmm6 // make dither 32 bytes
+ vpermq ymm6, ymm6, 0xd8
+ vpunpcklwd ymm6, ymm6, ymm6
+ vpcmpeqb ymm3, ymm3, ymm3 // generate mask 0x0000001f
+ vpsrld ymm3, ymm3, 27
+ vpcmpeqb ymm4, ymm4, ymm4 // generate mask 0x000007e0
+ vpsrld ymm4, ymm4, 26
+ vpslld ymm4, ymm4, 5
+ vpslld ymm5, ymm3, 11 // generate mask 0x0000f800
+
+ convertloop:
+ vmovdqu ymm0, [eax] // fetch 8 pixels of argb
+ vpaddusb ymm0, ymm0, ymm6 // add dither
+ vpsrld ymm2, ymm0, 5 // G
+ vpsrld ymm1, ymm0, 3 // B
+ vpsrld ymm0, ymm0, 8 // R
+ vpand ymm2, ymm2, ymm4 // G
+ vpand ymm1, ymm1, ymm3 // B
+ vpand ymm0, ymm0, ymm5 // R
+ vpor ymm1, ymm1, ymm2 // BG
+ vpor ymm0, ymm0, ymm1 // BGR
+ vpackusdw ymm0, ymm0, ymm0
+ vpermq ymm0, ymm0, 0xd8
+ lea eax, [eax + 32]
+ vmovdqu [edx], xmm0 // store 8 pixels of RGB565
+ lea edx, [edx + 16]
+ sub ecx, 8
+ jg convertloop
+ vzeroupper
+ ret
+ }
+}
+#endif // HAS_ARGBTORGB565DITHERROW_AVX2
+
+// TODO(fbarchard): Improve sign extension/packing.
+__declspec(naked) void ARGBToARGB1555Row_SSE2(const uint8_t* src_argb,
+ uint8_t* dst_rgb,
+ int width) {
+ __asm {
+ mov eax, [esp + 4] // src_argb
+ mov edx, [esp + 8] // dst_rgb
+ mov ecx, [esp + 12] // width
+ pcmpeqb xmm4, xmm4 // generate mask 0x0000001f
+ psrld xmm4, 27
+ movdqa xmm5, xmm4 // generate mask 0x000003e0
+ pslld xmm5, 5
+ movdqa xmm6, xmm4 // generate mask 0x00007c00
+ pslld xmm6, 10
+ pcmpeqb xmm7, xmm7 // generate mask 0xffff8000
+ pslld xmm7, 15
+
+ convertloop:
+ movdqu xmm0, [eax] // fetch 4 pixels of argb
+ movdqa xmm1, xmm0 // B
+ movdqa xmm2, xmm0 // G
+ movdqa xmm3, xmm0 // R
+ psrad xmm0, 16 // A
+ psrld xmm1, 3 // B
+ psrld xmm2, 6 // G
+ psrld xmm3, 9 // R
+ pand xmm0, xmm7 // A
+ pand xmm1, xmm4 // B
+ pand xmm2, xmm5 // G
+ pand xmm3, xmm6 // R
+ por xmm0, xmm1 // BA
+ por xmm2, xmm3 // GR
+ por xmm0, xmm2 // BGRA
+ packssdw xmm0, xmm0
+ lea eax, [eax + 16]
+ movq qword ptr [edx], xmm0 // store 4 pixels of ARGB1555
+ lea edx, [edx + 8]
+ sub ecx, 4
+ jg convertloop
+ ret
+ }
+}
+
+__declspec(naked) void ARGBToARGB4444Row_SSE2(const uint8_t* src_argb,
+ uint8_t* dst_rgb,
+ int width) {
+ __asm {
+ mov eax, [esp + 4] // src_argb
+ mov edx, [esp + 8] // dst_rgb
+ mov ecx, [esp + 12] // width
+ pcmpeqb xmm4, xmm4 // generate mask 0xf000f000
+ psllw xmm4, 12
+ movdqa xmm3, xmm4 // generate mask 0x00f000f0
+ psrlw xmm3, 8
+
+ convertloop:
+ movdqu xmm0, [eax] // fetch 4 pixels of argb
+ movdqa xmm1, xmm0
+ pand xmm0, xmm3 // low nibble
+ pand xmm1, xmm4 // high nibble
+ psrld xmm0, 4
+ psrld xmm1, 8
+ por xmm0, xmm1
+ packuswb xmm0, xmm0
+ lea eax, [eax + 16]
+ movq qword ptr [edx], xmm0 // store 4 pixels of ARGB4444
+ lea edx, [edx + 8]
+ sub ecx, 4
+ jg convertloop
+ ret
+ }
+}
+
+#ifdef HAS_ARGBTORGB565ROW_AVX2
+__declspec(naked) void ARGBToRGB565Row_AVX2(const uint8_t* src_argb,
+ uint8_t* dst_rgb,
+ int width) {
+ __asm {
+ mov eax, [esp + 4] // src_argb
+ mov edx, [esp + 8] // dst_rgb
+ mov ecx, [esp + 12] // width
+ vpcmpeqb ymm3, ymm3, ymm3 // generate mask 0x0000001f
+ vpsrld ymm3, ymm3, 27
+ vpcmpeqb ymm4, ymm4, ymm4 // generate mask 0x000007e0
+ vpsrld ymm4, ymm4, 26
+ vpslld ymm4, ymm4, 5
+ vpslld ymm5, ymm3, 11 // generate mask 0x0000f800
+
+ convertloop:
+ vmovdqu ymm0, [eax] // fetch 8 pixels of argb
+ vpsrld ymm2, ymm0, 5 // G
+ vpsrld ymm1, ymm0, 3 // B
+ vpsrld ymm0, ymm0, 8 // R
+ vpand ymm2, ymm2, ymm4 // G
+ vpand ymm1, ymm1, ymm3 // B
+ vpand ymm0, ymm0, ymm5 // R
+ vpor ymm1, ymm1, ymm2 // BG
+ vpor ymm0, ymm0, ymm1 // BGR
+ vpackusdw ymm0, ymm0, ymm0
+ vpermq ymm0, ymm0, 0xd8
+ lea eax, [eax + 32]
+ vmovdqu [edx], xmm0 // store 8 pixels of RGB565
+ lea edx, [edx + 16]
+ sub ecx, 8
+ jg convertloop
+ vzeroupper
+ ret
+ }
+}
+#endif // HAS_ARGBTORGB565ROW_AVX2
+
+#ifdef HAS_ARGBTOARGB1555ROW_AVX2
+__declspec(naked) void ARGBToARGB1555Row_AVX2(const uint8_t* src_argb,
+ uint8_t* dst_rgb,
+ int width) {
+ __asm {
+ mov eax, [esp + 4] // src_argb
+ mov edx, [esp + 8] // dst_rgb
+ mov ecx, [esp + 12] // width
+ vpcmpeqb ymm4, ymm4, ymm4
+ vpsrld ymm4, ymm4, 27 // generate mask 0x0000001f
+ vpslld ymm5, ymm4, 5 // generate mask 0x000003e0
+ vpslld ymm6, ymm4, 10 // generate mask 0x00007c00
+ vpcmpeqb ymm7, ymm7, ymm7 // generate mask 0xffff8000
+ vpslld ymm7, ymm7, 15
+
+ convertloop:
+ vmovdqu ymm0, [eax] // fetch 8 pixels of argb
+ vpsrld ymm3, ymm0, 9 // R
+ vpsrld ymm2, ymm0, 6 // G
+ vpsrld ymm1, ymm0, 3 // B
+ vpsrad ymm0, ymm0, 16 // A
+ vpand ymm3, ymm3, ymm6 // R
+ vpand ymm2, ymm2, ymm5 // G
+ vpand ymm1, ymm1, ymm4 // B
+ vpand ymm0, ymm0, ymm7 // A
+ vpor ymm0, ymm0, ymm1 // BA
+ vpor ymm2, ymm2, ymm3 // GR
+ vpor ymm0, ymm0, ymm2 // BGRA
+ vpackssdw ymm0, ymm0, ymm0
+ vpermq ymm0, ymm0, 0xd8
+ lea eax, [eax + 32]
+ vmovdqu [edx], xmm0 // store 8 pixels of ARGB1555
+ lea edx, [edx + 16]
+ sub ecx, 8
+ jg convertloop
+ vzeroupper
+ ret
+ }
+}
+#endif // HAS_ARGBTOARGB1555ROW_AVX2
+
+#ifdef HAS_ARGBTOARGB4444ROW_AVX2
+__declspec(naked) void ARGBToARGB4444Row_AVX2(const uint8_t* src_argb,
+ uint8_t* dst_rgb,
+ int width) {
+ __asm {
+ mov eax, [esp + 4] // src_argb
+ mov edx, [esp + 8] // dst_rgb
+ mov ecx, [esp + 12] // width
+ vpcmpeqb ymm4, ymm4, ymm4 // generate mask 0xf000f000
+ vpsllw ymm4, ymm4, 12
+ vpsrlw ymm3, ymm4, 8 // generate mask 0x00f000f0
+
+ convertloop:
+ vmovdqu ymm0, [eax] // fetch 8 pixels of argb
+ vpand ymm1, ymm0, ymm4 // high nibble
+ vpand ymm0, ymm0, ymm3 // low nibble
+ vpsrld ymm1, ymm1, 8
+ vpsrld ymm0, ymm0, 4
+ vpor ymm0, ymm0, ymm1
+ vpackuswb ymm0, ymm0, ymm0
+ vpermq ymm0, ymm0, 0xd8
+ lea eax, [eax + 32]
+ vmovdqu [edx], xmm0 // store 8 pixels of ARGB4444
+ lea edx, [edx + 16]
+ sub ecx, 8
+ jg convertloop
+ vzeroupper
+ ret
+ }
+}
+#endif // HAS_ARGBTOARGB4444ROW_AVX2
+
+// Convert 16 ARGB pixels (64 bytes) to 16 Y values.
+__declspec(naked) void ARGBToYRow_SSSE3(const uint8_t* src_argb,
+ uint8_t* dst_y,
+ int width) {
+ __asm {
+ mov eax, [esp + 4] /* src_argb */
+ mov edx, [esp + 8] /* dst_y */
+ mov ecx, [esp + 12] /* width */
+ movdqa xmm4, xmmword ptr kARGBToY
+ movdqa xmm5, xmmword ptr kAddY16
+
+ convertloop:
+ movdqu xmm0, [eax]
+ movdqu xmm1, [eax + 16]
+ movdqu xmm2, [eax + 32]
+ movdqu xmm3, [eax + 48]
+ pmaddubsw xmm0, xmm4
+ pmaddubsw xmm1, xmm4
+ pmaddubsw xmm2, xmm4
+ pmaddubsw xmm3, xmm4
+ lea eax, [eax + 64]
+ phaddw xmm0, xmm1
+ phaddw xmm2, xmm3
+ psrlw xmm0, 7
+ psrlw xmm2, 7
+ packuswb xmm0, xmm2
+ paddb xmm0, xmm5
+ movdqu [edx], xmm0
+ lea edx, [edx + 16]
+ sub ecx, 16
+ jg convertloop
+ ret
+ }
+}
+
+// Convert 16 ARGB pixels (64 bytes) to 16 YJ values.
+// Same as ARGBToYRow but different coefficients, no add 16, but do rounding.
+__declspec(naked) void ARGBToYJRow_SSSE3(const uint8_t* src_argb,
+ uint8_t* dst_y,
+ int width) {
+ __asm {
+ mov eax, [esp + 4] /* src_argb */
+ mov edx, [esp + 8] /* dst_y */
+ mov ecx, [esp + 12] /* width */
+ movdqa xmm4, xmmword ptr kARGBToYJ
+ movdqa xmm5, xmmword ptr kAddYJ64
+
+ convertloop:
+ movdqu xmm0, [eax]
+ movdqu xmm1, [eax + 16]
+ movdqu xmm2, [eax + 32]
+ movdqu xmm3, [eax + 48]
+ pmaddubsw xmm0, xmm4
+ pmaddubsw xmm1, xmm4
+ pmaddubsw xmm2, xmm4
+ pmaddubsw xmm3, xmm4
+ lea eax, [eax + 64]
+ phaddw xmm0, xmm1
+ phaddw xmm2, xmm3
+ paddw xmm0, xmm5 // Add .5 for rounding.
+ paddw xmm2, xmm5
+ psrlw xmm0, 7
+ psrlw xmm2, 7
+ packuswb xmm0, xmm2
+ movdqu [edx], xmm0
+ lea edx, [edx + 16]
+ sub ecx, 16
+ jg convertloop
+ ret
+ }
+}
+
+#ifdef HAS_ARGBTOYROW_AVX2
+// vpermd for vphaddw + vpackuswb vpermd.
+static const lvec32 kPermdARGBToY_AVX = {0, 4, 1, 5, 2, 6, 3, 7};
+
+// Convert 32 ARGB pixels (128 bytes) to 32 Y values.
+__declspec(naked) void ARGBToYRow_AVX2(const uint8_t* src_argb,
+ uint8_t* dst_y,
+ int width) {
+ __asm {
+ mov eax, [esp + 4] /* src_argb */
+ mov edx, [esp + 8] /* dst_y */
+ mov ecx, [esp + 12] /* width */
+ vbroadcastf128 ymm4, xmmword ptr kARGBToY
+ vbroadcastf128 ymm5, xmmword ptr kAddY16
+ vmovdqu ymm6, ymmword ptr kPermdARGBToY_AVX
+
+ convertloop:
+ vmovdqu ymm0, [eax]
+ vmovdqu ymm1, [eax + 32]
+ vmovdqu ymm2, [eax + 64]
+ vmovdqu ymm3, [eax + 96]
+ vpmaddubsw ymm0, ymm0, ymm4
+ vpmaddubsw ymm1, ymm1, ymm4
+ vpmaddubsw ymm2, ymm2, ymm4
+ vpmaddubsw ymm3, ymm3, ymm4
+ lea eax, [eax + 128]
+ vphaddw ymm0, ymm0, ymm1 // mutates.
+ vphaddw ymm2, ymm2, ymm3
+ vpsrlw ymm0, ymm0, 7
+ vpsrlw ymm2, ymm2, 7
+ vpackuswb ymm0, ymm0, ymm2 // mutates.
+ vpermd ymm0, ymm6, ymm0 // For vphaddw + vpackuswb mutation.
+ vpaddb ymm0, ymm0, ymm5 // add 16 for Y
+ vmovdqu [edx], ymm0
+ lea edx, [edx + 32]
+ sub ecx, 32
+ jg convertloop
+ vzeroupper
+ ret
+ }
+}
+#endif // HAS_ARGBTOYROW_AVX2
+
+#ifdef HAS_ARGBTOYJROW_AVX2
+// Convert 32 ARGB pixels (128 bytes) to 32 Y values.
+__declspec(naked) void ARGBToYJRow_AVX2(const uint8_t* src_argb,
+ uint8_t* dst_y,
+ int width) {
+ __asm {
+ mov eax, [esp + 4] /* src_argb */
+ mov edx, [esp + 8] /* dst_y */
+ mov ecx, [esp + 12] /* width */
+ vbroadcastf128 ymm4, xmmword ptr kARGBToYJ
+ vbroadcastf128 ymm5, xmmword ptr kAddYJ64
+ vmovdqu ymm6, ymmword ptr kPermdARGBToY_AVX
+
+ convertloop:
+ vmovdqu ymm0, [eax]
+ vmovdqu ymm1, [eax + 32]
+ vmovdqu ymm2, [eax + 64]
+ vmovdqu ymm3, [eax + 96]
+ vpmaddubsw ymm0, ymm0, ymm4
+ vpmaddubsw ymm1, ymm1, ymm4
+ vpmaddubsw ymm2, ymm2, ymm4
+ vpmaddubsw ymm3, ymm3, ymm4
+ lea eax, [eax + 128]
+ vphaddw ymm0, ymm0, ymm1 // mutates.
+ vphaddw ymm2, ymm2, ymm3
+ vpaddw ymm0, ymm0, ymm5 // Add .5 for rounding.
+ vpaddw ymm2, ymm2, ymm5
+ vpsrlw ymm0, ymm0, 7
+ vpsrlw ymm2, ymm2, 7
+ vpackuswb ymm0, ymm0, ymm2 // mutates.
+ vpermd ymm0, ymm6, ymm0 // For vphaddw + vpackuswb mutation.
+ vmovdqu [edx], ymm0
+ lea edx, [edx + 32]
+ sub ecx, 32
+ jg convertloop
+
+ vzeroupper
+ ret
+ }
+}
+#endif // HAS_ARGBTOYJROW_AVX2
+
+__declspec(naked) void BGRAToYRow_SSSE3(const uint8_t* src_argb,
+ uint8_t* dst_y,
+ int width) {
+ __asm {
+ mov eax, [esp + 4] /* src_argb */
+ mov edx, [esp + 8] /* dst_y */
+ mov ecx, [esp + 12] /* width */
+ movdqa xmm4, xmmword ptr kBGRAToY
+ movdqa xmm5, xmmword ptr kAddY16
+
+ convertloop:
+ movdqu xmm0, [eax]
+ movdqu xmm1, [eax + 16]
+ movdqu xmm2, [eax + 32]
+ movdqu xmm3, [eax + 48]
+ pmaddubsw xmm0, xmm4
+ pmaddubsw xmm1, xmm4
+ pmaddubsw xmm2, xmm4
+ pmaddubsw xmm3, xmm4
+ lea eax, [eax + 64]
+ phaddw xmm0, xmm1
+ phaddw xmm2, xmm3
+ psrlw xmm0, 7
+ psrlw xmm2, 7
+ packuswb xmm0, xmm2
+ paddb xmm0, xmm5
+ movdqu [edx], xmm0
+ lea edx, [edx + 16]
+ sub ecx, 16
+ jg convertloop
+ ret
+ }
+}
+
+__declspec(naked) void ABGRToYRow_SSSE3(const uint8_t* src_argb,
+ uint8_t* dst_y,
+ int width) {
+ __asm {
+ mov eax, [esp + 4] /* src_argb */
+ mov edx, [esp + 8] /* dst_y */
+ mov ecx, [esp + 12] /* width */
+ movdqa xmm4, xmmword ptr kABGRToY
+ movdqa xmm5, xmmword ptr kAddY16
+
+ convertloop:
+ movdqu xmm0, [eax]
+ movdqu xmm1, [eax + 16]
+ movdqu xmm2, [eax + 32]
+ movdqu xmm3, [eax + 48]
+ pmaddubsw xmm0, xmm4
+ pmaddubsw xmm1, xmm4
+ pmaddubsw xmm2, xmm4
+ pmaddubsw xmm3, xmm4
+ lea eax, [eax + 64]
+ phaddw xmm0, xmm1
+ phaddw xmm2, xmm3
+ psrlw xmm0, 7
+ psrlw xmm2, 7
+ packuswb xmm0, xmm2
+ paddb xmm0, xmm5
+ movdqu [edx], xmm0
+ lea edx, [edx + 16]
+ sub ecx, 16
+ jg convertloop
+ ret
+ }
+}
+
+__declspec(naked) void RGBAToYRow_SSSE3(const uint8_t* src_argb,
+ uint8_t* dst_y,
+ int width) {
+ __asm {
+ mov eax, [esp + 4] /* src_argb */
+ mov edx, [esp + 8] /* dst_y */
+ mov ecx, [esp + 12] /* width */
+ movdqa xmm4, xmmword ptr kRGBAToY
+ movdqa xmm5, xmmword ptr kAddY16
+
+ convertloop:
+ movdqu xmm0, [eax]
+ movdqu xmm1, [eax + 16]
+ movdqu xmm2, [eax + 32]
+ movdqu xmm3, [eax + 48]
+ pmaddubsw xmm0, xmm4
+ pmaddubsw xmm1, xmm4
+ pmaddubsw xmm2, xmm4
+ pmaddubsw xmm3, xmm4
+ lea eax, [eax + 64]
+ phaddw xmm0, xmm1
+ phaddw xmm2, xmm3
+ psrlw xmm0, 7
+ psrlw xmm2, 7
+ packuswb xmm0, xmm2
+ paddb xmm0, xmm5
+ movdqu [edx], xmm0
+ lea edx, [edx + 16]
+ sub ecx, 16
+ jg convertloop
+ ret
+ }
+}
+
+__declspec(naked) void ARGBToUVRow_SSSE3(const uint8_t* src_argb0,
+ int src_stride_argb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ __asm {
+ push esi
+ push edi
+ mov eax, [esp + 8 + 4] // src_argb
+ mov esi, [esp + 8 + 8] // src_stride_argb
+ mov edx, [esp + 8 + 12] // dst_u
+ mov edi, [esp + 8 + 16] // dst_v
+ mov ecx, [esp + 8 + 20] // width
+ movdqa xmm5, xmmword ptr kAddUV128
+ movdqa xmm6, xmmword ptr kARGBToV
+ movdqa xmm7, xmmword ptr kARGBToU
+ sub edi, edx // stride from u to v
+
+ convertloop:
+ /* step 1 - subsample 16x2 argb pixels to 8x1 */
+ movdqu xmm0, [eax]
+ movdqu xmm4, [eax + esi]
+ pavgb xmm0, xmm4
+ movdqu xmm1, [eax + 16]
+ movdqu xmm4, [eax + esi + 16]
+ pavgb xmm1, xmm4
+ movdqu xmm2, [eax + 32]
+ movdqu xmm4, [eax + esi + 32]
+ pavgb xmm2, xmm4
+ movdqu xmm3, [eax + 48]
+ movdqu xmm4, [eax + esi + 48]
+ pavgb xmm3, xmm4
+
+ lea eax, [eax + 64]
+ movdqa xmm4, xmm0
+ shufps xmm0, xmm1, 0x88
+ shufps xmm4, xmm1, 0xdd
+ pavgb xmm0, xmm4
+ movdqa xmm4, xmm2
+ shufps xmm2, xmm3, 0x88
+ shufps xmm4, xmm3, 0xdd
+ pavgb xmm2, xmm4
+
+ // step 2 - convert to U and V
+ // from here down is very similar to Y code except
+ // instead of 16 different pixels, its 8 pixels of U and 8 of V
+ movdqa xmm1, xmm0
+ movdqa xmm3, xmm2
+ pmaddubsw xmm0, xmm7 // U
+ pmaddubsw xmm2, xmm7
+ pmaddubsw xmm1, xmm6 // V
+ pmaddubsw xmm3, xmm6
+ phaddw xmm0, xmm2
+ phaddw xmm1, xmm3
+ psraw xmm0, 8
+ psraw xmm1, 8
+ packsswb xmm0, xmm1
+ paddb xmm0, xmm5 // -> unsigned
+
+ // step 3 - store 8 U and 8 V values
+ movlps qword ptr [edx], xmm0 // U
+ movhps qword ptr [edx + edi], xmm0 // V
+ lea edx, [edx + 8]
+ sub ecx, 16
+ jg convertloop
+
+ pop edi
+ pop esi
+ ret
+ }
+}
+
+__declspec(naked) void ARGBToUVJRow_SSSE3(const uint8_t* src_argb0,
+ int src_stride_argb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ __asm {
+ push esi
+ push edi
+ mov eax, [esp + 8 + 4] // src_argb
+ mov esi, [esp + 8 + 8] // src_stride_argb
+ mov edx, [esp + 8 + 12] // dst_u
+ mov edi, [esp + 8 + 16] // dst_v
+ mov ecx, [esp + 8 + 20] // width
+ movdqa xmm5, xmmword ptr kAddUVJ128
+ movdqa xmm6, xmmword ptr kARGBToVJ
+ movdqa xmm7, xmmword ptr kARGBToUJ
+ sub edi, edx // stride from u to v
+
+ convertloop:
+ /* step 1 - subsample 16x2 argb pixels to 8x1 */
+ movdqu xmm0, [eax]
+ movdqu xmm4, [eax + esi]
+ pavgb xmm0, xmm4
+ movdqu xmm1, [eax + 16]
+ movdqu xmm4, [eax + esi + 16]
+ pavgb xmm1, xmm4
+ movdqu xmm2, [eax + 32]
+ movdqu xmm4, [eax + esi + 32]
+ pavgb xmm2, xmm4
+ movdqu xmm3, [eax + 48]
+ movdqu xmm4, [eax + esi + 48]
+ pavgb xmm3, xmm4
+
+ lea eax, [eax + 64]
+ movdqa xmm4, xmm0
+ shufps xmm0, xmm1, 0x88
+ shufps xmm4, xmm1, 0xdd
+ pavgb xmm0, xmm4
+ movdqa xmm4, xmm2
+ shufps xmm2, xmm3, 0x88
+ shufps xmm4, xmm3, 0xdd
+ pavgb xmm2, xmm4
+
+ // step 2 - convert to U and V
+ // from here down is very similar to Y code except
+ // instead of 16 different pixels, its 8 pixels of U and 8 of V
+ movdqa xmm1, xmm0
+ movdqa xmm3, xmm2
+ pmaddubsw xmm0, xmm7 // U
+ pmaddubsw xmm2, xmm7
+ pmaddubsw xmm1, xmm6 // V
+ pmaddubsw xmm3, xmm6
+ phaddw xmm0, xmm2
+ phaddw xmm1, xmm3
+ paddw xmm0, xmm5 // +.5 rounding -> unsigned
+ paddw xmm1, xmm5
+ psraw xmm0, 8
+ psraw xmm1, 8
+ packsswb xmm0, xmm1
+
+ // step 3 - store 8 U and 8 V values
+ movlps qword ptr [edx], xmm0 // U
+ movhps qword ptr [edx + edi], xmm0 // V
+ lea edx, [edx + 8]
+ sub ecx, 16
+ jg convertloop
+
+ pop edi
+ pop esi
+ ret
+ }
+}
+
+#ifdef HAS_ARGBTOUVROW_AVX2
+__declspec(naked) void ARGBToUVRow_AVX2(const uint8_t* src_argb0,
+ int src_stride_argb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ __asm {
+ push esi
+ push edi
+ mov eax, [esp + 8 + 4] // src_argb
+ mov esi, [esp + 8 + 8] // src_stride_argb
+ mov edx, [esp + 8 + 12] // dst_u
+ mov edi, [esp + 8 + 16] // dst_v
+ mov ecx, [esp + 8 + 20] // width
+ vbroadcastf128 ymm5, xmmword ptr kAddUV128
+ vbroadcastf128 ymm6, xmmword ptr kARGBToV
+ vbroadcastf128 ymm7, xmmword ptr kARGBToU
+ sub edi, edx // stride from u to v
+
+ convertloop:
+ /* step 1 - subsample 32x2 argb pixels to 16x1 */
+ vmovdqu ymm0, [eax]
+ vmovdqu ymm1, [eax + 32]
+ vmovdqu ymm2, [eax + 64]
+ vmovdqu ymm3, [eax + 96]
+ vpavgb ymm0, ymm0, [eax + esi]
+ vpavgb ymm1, ymm1, [eax + esi + 32]
+ vpavgb ymm2, ymm2, [eax + esi + 64]
+ vpavgb ymm3, ymm3, [eax + esi + 96]
+ lea eax, [eax + 128]
+ vshufps ymm4, ymm0, ymm1, 0x88
+ vshufps ymm0, ymm0, ymm1, 0xdd
+ vpavgb ymm0, ymm0, ymm4 // mutated by vshufps
+ vshufps ymm4, ymm2, ymm3, 0x88
+ vshufps ymm2, ymm2, ymm3, 0xdd
+ vpavgb ymm2, ymm2, ymm4 // mutated by vshufps
+
+ // step 2 - convert to U and V
+ // from here down is very similar to Y code except
+ // instead of 32 different pixels, its 16 pixels of U and 16 of V
+ vpmaddubsw ymm1, ymm0, ymm7 // U
+ vpmaddubsw ymm3, ymm2, ymm7
+ vpmaddubsw ymm0, ymm0, ymm6 // V
+ vpmaddubsw ymm2, ymm2, ymm6
+ vphaddw ymm1, ymm1, ymm3 // mutates
+ vphaddw ymm0, ymm0, ymm2
+ vpsraw ymm1, ymm1, 8
+ vpsraw ymm0, ymm0, 8
+ vpacksswb ymm0, ymm1, ymm0 // mutates
+ vpermq ymm0, ymm0, 0xd8 // For vpacksswb
+ vpshufb ymm0, ymm0, ymmword ptr kShufARGBToUV_AVX // for vshufps/vphaddw
+ vpaddb ymm0, ymm0, ymm5 // -> unsigned
+
+ // step 3 - store 16 U and 16 V values
+ vextractf128 [edx], ymm0, 0 // U
+ vextractf128 [edx + edi], ymm0, 1 // V
+ lea edx, [edx + 16]
+ sub ecx, 32
+ jg convertloop
+
+ pop edi
+ pop esi
+ vzeroupper
+ ret
+ }
+}
+#endif // HAS_ARGBTOUVROW_AVX2
+
+#ifdef HAS_ARGBTOUVJROW_AVX2
+__declspec(naked) void ARGBToUVJRow_AVX2(const uint8_t* src_argb0,
+ int src_stride_argb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ __asm {
+ push esi
+ push edi
+ mov eax, [esp + 8 + 4] // src_argb
+ mov esi, [esp + 8 + 8] // src_stride_argb
+ mov edx, [esp + 8 + 12] // dst_u
+ mov edi, [esp + 8 + 16] // dst_v
+ mov ecx, [esp + 8 + 20] // width
+ vbroadcastf128 ymm5, xmmword ptr kAddUV128
+ vbroadcastf128 ymm6, xmmword ptr kARGBToV
+ vbroadcastf128 ymm7, xmmword ptr kARGBToU
+ sub edi, edx // stride from u to v
+
+ convertloop:
+ /* step 1 - subsample 32x2 argb pixels to 16x1 */
+ vmovdqu ymm0, [eax]
+ vmovdqu ymm1, [eax + 32]
+ vmovdqu ymm2, [eax + 64]
+ vmovdqu ymm3, [eax + 96]
+ vpavgb ymm0, ymm0, [eax + esi]
+ vpavgb ymm1, ymm1, [eax + esi + 32]
+ vpavgb ymm2, ymm2, [eax + esi + 64]
+ vpavgb ymm3, ymm3, [eax + esi + 96]
+ lea eax, [eax + 128]
+ vshufps ymm4, ymm0, ymm1, 0x88
+ vshufps ymm0, ymm0, ymm1, 0xdd
+ vpavgb ymm0, ymm0, ymm4 // mutated by vshufps
+ vshufps ymm4, ymm2, ymm3, 0x88
+ vshufps ymm2, ymm2, ymm3, 0xdd
+ vpavgb ymm2, ymm2, ymm4 // mutated by vshufps
+
+ // step 2 - convert to U and V
+ // from here down is very similar to Y code except
+ // instead of 32 different pixels, its 16 pixels of U and 16 of V
+ vpmaddubsw ymm1, ymm0, ymm7 // U
+ vpmaddubsw ymm3, ymm2, ymm7
+ vpmaddubsw ymm0, ymm0, ymm6 // V
+ vpmaddubsw ymm2, ymm2, ymm6
+ vphaddw ymm1, ymm1, ymm3 // mutates
+ vphaddw ymm0, ymm0, ymm2
+ vpaddw ymm1, ymm1, ymm5 // +.5 rounding -> unsigned
+ vpaddw ymm0, ymm0, ymm5
+ vpsraw ymm1, ymm1, 8
+ vpsraw ymm0, ymm0, 8
+ vpacksswb ymm0, ymm1, ymm0 // mutates
+ vpermq ymm0, ymm0, 0xd8 // For vpacksswb
+ vpshufb ymm0, ymm0, ymmword ptr kShufARGBToUV_AVX // for vshufps/vphaddw
+
+ // step 3 - store 16 U and 16 V values
+ vextractf128 [edx], ymm0, 0 // U
+ vextractf128 [edx + edi], ymm0, 1 // V
+ lea edx, [edx + 16]
+ sub ecx, 32
+ jg convertloop
+
+ pop edi
+ pop esi
+ vzeroupper
+ ret
+ }
+}
+#endif // HAS_ARGBTOUVJROW_AVX2
+
+__declspec(naked) void ARGBToUV444Row_SSSE3(const uint8_t* src_argb0,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ __asm {
+ push edi
+ mov eax, [esp + 4 + 4] // src_argb
+ mov edx, [esp + 4 + 8] // dst_u
+ mov edi, [esp + 4 + 12] // dst_v
+ mov ecx, [esp + 4 + 16] // width
+ movdqa xmm5, xmmword ptr kAddUV128
+ movdqa xmm6, xmmword ptr kARGBToV
+ movdqa xmm7, xmmword ptr kARGBToU
+ sub edi, edx // stride from u to v
+
+ convertloop:
+ /* convert to U and V */
+ movdqu xmm0, [eax] // U
+ movdqu xmm1, [eax + 16]
+ movdqu xmm2, [eax + 32]
+ movdqu xmm3, [eax + 48]
+ pmaddubsw xmm0, xmm7
+ pmaddubsw xmm1, xmm7
+ pmaddubsw xmm2, xmm7
+ pmaddubsw xmm3, xmm7
+ phaddw xmm0, xmm1
+ phaddw xmm2, xmm3
+ psraw xmm0, 8
+ psraw xmm2, 8
+ packsswb xmm0, xmm2
+ paddb xmm0, xmm5
+ movdqu [edx], xmm0
+
+ movdqu xmm0, [eax] // V
+ movdqu xmm1, [eax + 16]
+ movdqu xmm2, [eax + 32]
+ movdqu xmm3, [eax + 48]
+ pmaddubsw xmm0, xmm6
+ pmaddubsw xmm1, xmm6
+ pmaddubsw xmm2, xmm6
+ pmaddubsw xmm3, xmm6
+ phaddw xmm0, xmm1
+ phaddw xmm2, xmm3
+ psraw xmm0, 8
+ psraw xmm2, 8
+ packsswb xmm0, xmm2
+ paddb xmm0, xmm5
+ lea eax, [eax + 64]
+ movdqu [edx + edi], xmm0
+ lea edx, [edx + 16]
+ sub ecx, 16
+ jg convertloop
+
+ pop edi
+ ret
+ }
+}
+
+__declspec(naked) void BGRAToUVRow_SSSE3(const uint8_t* src_argb0,
+ int src_stride_argb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ __asm {
+ push esi
+ push edi
+ mov eax, [esp + 8 + 4] // src_argb
+ mov esi, [esp + 8 + 8] // src_stride_argb
+ mov edx, [esp + 8 + 12] // dst_u
+ mov edi, [esp + 8 + 16] // dst_v
+ mov ecx, [esp + 8 + 20] // width
+ movdqa xmm5, xmmword ptr kAddUV128
+ movdqa xmm6, xmmword ptr kBGRAToV
+ movdqa xmm7, xmmword ptr kBGRAToU
+ sub edi, edx // stride from u to v
+
+ convertloop:
+ /* step 1 - subsample 16x2 argb pixels to 8x1 */
+ movdqu xmm0, [eax]
+ movdqu xmm4, [eax + esi]
+ pavgb xmm0, xmm4
+ movdqu xmm1, [eax + 16]
+ movdqu xmm4, [eax + esi + 16]
+ pavgb xmm1, xmm4
+ movdqu xmm2, [eax + 32]
+ movdqu xmm4, [eax + esi + 32]
+ pavgb xmm2, xmm4
+ movdqu xmm3, [eax + 48]
+ movdqu xmm4, [eax + esi + 48]
+ pavgb xmm3, xmm4
+
+ lea eax, [eax + 64]
+ movdqa xmm4, xmm0
+ shufps xmm0, xmm1, 0x88
+ shufps xmm4, xmm1, 0xdd
+ pavgb xmm0, xmm4
+ movdqa xmm4, xmm2
+ shufps xmm2, xmm3, 0x88
+ shufps xmm4, xmm3, 0xdd
+ pavgb xmm2, xmm4
+
+ // step 2 - convert to U and V
+ // from here down is very similar to Y code except
+ // instead of 16 different pixels, its 8 pixels of U and 8 of V
+ movdqa xmm1, xmm0
+ movdqa xmm3, xmm2
+ pmaddubsw xmm0, xmm7 // U
+ pmaddubsw xmm2, xmm7
+ pmaddubsw xmm1, xmm6 // V
+ pmaddubsw xmm3, xmm6
+ phaddw xmm0, xmm2
+ phaddw xmm1, xmm3
+ psraw xmm0, 8
+ psraw xmm1, 8
+ packsswb xmm0, xmm1
+ paddb xmm0, xmm5 // -> unsigned
+
+ // step 3 - store 8 U and 8 V values
+ movlps qword ptr [edx], xmm0 // U
+ movhps qword ptr [edx + edi], xmm0 // V
+ lea edx, [edx + 8]
+ sub ecx, 16
+ jg convertloop
+
+ pop edi
+ pop esi
+ ret
+ }
+}
+
+__declspec(naked) void ABGRToUVRow_SSSE3(const uint8_t* src_argb0,
+ int src_stride_argb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ __asm {
+ push esi
+ push edi
+ mov eax, [esp + 8 + 4] // src_argb
+ mov esi, [esp + 8 + 8] // src_stride_argb
+ mov edx, [esp + 8 + 12] // dst_u
+ mov edi, [esp + 8 + 16] // dst_v
+ mov ecx, [esp + 8 + 20] // width
+ movdqa xmm5, xmmword ptr kAddUV128
+ movdqa xmm6, xmmword ptr kABGRToV
+ movdqa xmm7, xmmword ptr kABGRToU
+ sub edi, edx // stride from u to v
+
+ convertloop:
+ /* step 1 - subsample 16x2 argb pixels to 8x1 */
+ movdqu xmm0, [eax]
+ movdqu xmm4, [eax + esi]
+ pavgb xmm0, xmm4
+ movdqu xmm1, [eax + 16]
+ movdqu xmm4, [eax + esi + 16]
+ pavgb xmm1, xmm4
+ movdqu xmm2, [eax + 32]
+ movdqu xmm4, [eax + esi + 32]
+ pavgb xmm2, xmm4
+ movdqu xmm3, [eax + 48]
+ movdqu xmm4, [eax + esi + 48]
+ pavgb xmm3, xmm4
+
+ lea eax, [eax + 64]
+ movdqa xmm4, xmm0
+ shufps xmm0, xmm1, 0x88
+ shufps xmm4, xmm1, 0xdd
+ pavgb xmm0, xmm4
+ movdqa xmm4, xmm2
+ shufps xmm2, xmm3, 0x88
+ shufps xmm4, xmm3, 0xdd
+ pavgb xmm2, xmm4
+
+ // step 2 - convert to U and V
+ // from here down is very similar to Y code except
+ // instead of 16 different pixels, its 8 pixels of U and 8 of V
+ movdqa xmm1, xmm0
+ movdqa xmm3, xmm2
+ pmaddubsw xmm0, xmm7 // U
+ pmaddubsw xmm2, xmm7
+ pmaddubsw xmm1, xmm6 // V
+ pmaddubsw xmm3, xmm6
+ phaddw xmm0, xmm2
+ phaddw xmm1, xmm3
+ psraw xmm0, 8
+ psraw xmm1, 8
+ packsswb xmm0, xmm1
+ paddb xmm0, xmm5 // -> unsigned
+
+ // step 3 - store 8 U and 8 V values
+ movlps qword ptr [edx], xmm0 // U
+ movhps qword ptr [edx + edi], xmm0 // V
+ lea edx, [edx + 8]
+ sub ecx, 16
+ jg convertloop
+
+ pop edi
+ pop esi
+ ret
+ }
+}
+
+__declspec(naked) void RGBAToUVRow_SSSE3(const uint8_t* src_argb0,
+ int src_stride_argb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ __asm {
+ push esi
+ push edi
+ mov eax, [esp + 8 + 4] // src_argb
+ mov esi, [esp + 8 + 8] // src_stride_argb
+ mov edx, [esp + 8 + 12] // dst_u
+ mov edi, [esp + 8 + 16] // dst_v
+ mov ecx, [esp + 8 + 20] // width
+ movdqa xmm5, xmmword ptr kAddUV128
+ movdqa xmm6, xmmword ptr kRGBAToV
+ movdqa xmm7, xmmword ptr kRGBAToU
+ sub edi, edx // stride from u to v
+
+ convertloop:
+ /* step 1 - subsample 16x2 argb pixels to 8x1 */
+ movdqu xmm0, [eax]
+ movdqu xmm4, [eax + esi]
+ pavgb xmm0, xmm4
+ movdqu xmm1, [eax + 16]
+ movdqu xmm4, [eax + esi + 16]
+ pavgb xmm1, xmm4
+ movdqu xmm2, [eax + 32]
+ movdqu xmm4, [eax + esi + 32]
+ pavgb xmm2, xmm4
+ movdqu xmm3, [eax + 48]
+ movdqu xmm4, [eax + esi + 48]
+ pavgb xmm3, xmm4
+
+ lea eax, [eax + 64]
+ movdqa xmm4, xmm0
+ shufps xmm0, xmm1, 0x88
+ shufps xmm4, xmm1, 0xdd
+ pavgb xmm0, xmm4
+ movdqa xmm4, xmm2
+ shufps xmm2, xmm3, 0x88
+ shufps xmm4, xmm3, 0xdd
+ pavgb xmm2, xmm4
+
+ // step 2 - convert to U and V
+ // from here down is very similar to Y code except
+ // instead of 16 different pixels, its 8 pixels of U and 8 of V
+ movdqa xmm1, xmm0
+ movdqa xmm3, xmm2
+ pmaddubsw xmm0, xmm7 // U
+ pmaddubsw xmm2, xmm7
+ pmaddubsw xmm1, xmm6 // V
+ pmaddubsw xmm3, xmm6
+ phaddw xmm0, xmm2
+ phaddw xmm1, xmm3
+ psraw xmm0, 8
+ psraw xmm1, 8
+ packsswb xmm0, xmm1
+ paddb xmm0, xmm5 // -> unsigned
+
+ // step 3 - store 8 U and 8 V values
+ movlps qword ptr [edx], xmm0 // U
+ movhps qword ptr [edx + edi], xmm0 // V
+ lea edx, [edx + 8]
+ sub ecx, 16
+ jg convertloop
+
+ pop edi
+ pop esi
+ ret
+ }
+}
+#endif // HAS_ARGBTOYROW_SSSE3
+
+// Read 16 UV from 444
+#define READYUV444_AVX2 \
+ __asm { \
+ __asm vmovdqu xmm0, [esi] /* U */ \
+ __asm vmovdqu xmm1, [esi + edi] /* V */ \
+ __asm lea esi, [esi + 16] \
+ __asm vpermq ymm0, ymm0, 0xd8 \
+ __asm vpermq ymm1, ymm1, 0xd8 \
+ __asm vpunpcklbw ymm0, ymm0, ymm1 /* UV */ \
+ __asm vmovdqu xmm4, [eax] /* Y */ \
+ __asm vpermq ymm4, ymm4, 0xd8 \
+ __asm vpunpcklbw ymm4, ymm4, ymm4 \
+ __asm lea eax, [eax + 16]}
+
+// Read 8 UV from 422, upsample to 16 UV.
+#define READYUV422_AVX2 \
+ __asm { \
+ __asm vmovq xmm0, qword ptr [esi] /* U */ \
+ __asm vmovq xmm1, qword ptr [esi + edi] /* V */ \
+ __asm lea esi, [esi + 8] \
+ __asm vpunpcklbw ymm0, ymm0, ymm1 /* UV */ \
+ __asm vpermq ymm0, ymm0, 0xd8 \
+ __asm vpunpcklwd ymm0, ymm0, ymm0 /* UVUV (upsample) */ \
+ __asm vmovdqu xmm4, [eax] /* Y */ \
+ __asm vpermq ymm4, ymm4, 0xd8 \
+ __asm vpunpcklbw ymm4, ymm4, ymm4 \
+ __asm lea eax, [eax + 16]}
+
+// Read 8 UV from 422, upsample to 16 UV. With 16 Alpha.
+#define READYUVA422_AVX2 \
+ __asm { \
+ __asm vmovq xmm0, qword ptr [esi] /* U */ \
+ __asm vmovq xmm1, qword ptr [esi + edi] /* V */ \
+ __asm lea esi, [esi + 8] \
+ __asm vpunpcklbw ymm0, ymm0, ymm1 /* UV */ \
+ __asm vpermq ymm0, ymm0, 0xd8 \
+ __asm vpunpcklwd ymm0, ymm0, ymm0 /* UVUV (upsample) */ \
+ __asm vmovdqu xmm4, [eax] /* Y */ \
+ __asm vpermq ymm4, ymm4, 0xd8 \
+ __asm vpunpcklbw ymm4, ymm4, ymm4 \
+ __asm lea eax, [eax + 16] \
+ __asm vmovdqu xmm5, [ebp] /* A */ \
+ __asm vpermq ymm5, ymm5, 0xd8 \
+ __asm lea ebp, [ebp + 16]}
+
+// Read 8 UV from NV12, upsample to 16 UV.
+#define READNV12_AVX2 \
+ __asm { \
+ __asm vmovdqu xmm0, [esi] /* UV */ \
+ __asm lea esi, [esi + 16] \
+ __asm vpermq ymm0, ymm0, 0xd8 \
+ __asm vpunpcklwd ymm0, ymm0, ymm0 /* UVUV (upsample) */ \
+ __asm vmovdqu xmm4, [eax] /* Y */ \
+ __asm vpermq ymm4, ymm4, 0xd8 \
+ __asm vpunpcklbw ymm4, ymm4, ymm4 \
+ __asm lea eax, [eax + 16]}
+
+// Read 8 UV from NV21, upsample to 16 UV.
+#define READNV21_AVX2 \
+ __asm { \
+ __asm vmovdqu xmm0, [esi] /* UV */ \
+ __asm lea esi, [esi + 16] \
+ __asm vpermq ymm0, ymm0, 0xd8 \
+ __asm vpshufb ymm0, ymm0, ymmword ptr kShuffleNV21 \
+ __asm vmovdqu xmm4, [eax] /* Y */ \
+ __asm vpermq ymm4, ymm4, 0xd8 \
+ __asm vpunpcklbw ymm4, ymm4, ymm4 \
+ __asm lea eax, [eax + 16]}
+
+// Read 8 YUY2 with 16 Y and upsample 8 UV to 16 UV.
+#define READYUY2_AVX2 \
+ __asm { \
+ __asm vmovdqu ymm4, [eax] /* YUY2 */ \
+ __asm vpshufb ymm4, ymm4, ymmword ptr kShuffleYUY2Y \
+ __asm vmovdqu ymm0, [eax] /* UV */ \
+ __asm vpshufb ymm0, ymm0, ymmword ptr kShuffleYUY2UV \
+ __asm lea eax, [eax + 32]}
+
+// Read 8 UYVY with 16 Y and upsample 8 UV to 16 UV.
+#define READUYVY_AVX2 \
+ __asm { \
+ __asm vmovdqu ymm4, [eax] /* UYVY */ \
+ __asm vpshufb ymm4, ymm4, ymmword ptr kShuffleUYVYY \
+ __asm vmovdqu ymm0, [eax] /* UV */ \
+ __asm vpshufb ymm0, ymm0, ymmword ptr kShuffleUYVYUV \
+ __asm lea eax, [eax + 32]}
+
+// Convert 16 pixels: 16 UV and 16 Y.
+#define YUVTORGB_AVX2(YuvConstants) \
+ __asm { \
+ __asm vpmaddubsw ymm2, ymm0, ymmword ptr [YuvConstants + KUVTOR] /* R UV */\
+ __asm vpmaddubsw ymm1, ymm0, ymmword ptr [YuvConstants + KUVTOG] /* G UV */\
+ __asm vpmaddubsw ymm0, ymm0, ymmword ptr [YuvConstants + KUVTOB] /* B UV */\
+ __asm vmovdqu ymm3, ymmword ptr [YuvConstants + KUVBIASR] \
+ __asm vpsubw ymm2, ymm3, ymm2 \
+ __asm vmovdqu ymm3, ymmword ptr [YuvConstants + KUVBIASG] \
+ __asm vpsubw ymm1, ymm3, ymm1 \
+ __asm vmovdqu ymm3, ymmword ptr [YuvConstants + KUVBIASB] \
+ __asm vpsubw ymm0, ymm3, ymm0 /* Step 2: Find Y contribution to 16 R,G,B values */ \
+ __asm vpmulhuw ymm4, ymm4, ymmword ptr [YuvConstants + KYTORGB] \
+ __asm vpaddsw ymm0, ymm0, ymm4 /* B += Y */ \
+ __asm vpaddsw ymm1, ymm1, ymm4 /* G += Y */ \
+ __asm vpaddsw ymm2, ymm2, ymm4 /* R += Y */ \
+ __asm vpsraw ymm0, ymm0, 6 \
+ __asm vpsraw ymm1, ymm1, 6 \
+ __asm vpsraw ymm2, ymm2, 6 \
+ __asm vpackuswb ymm0, ymm0, ymm0 /* B */ \
+ __asm vpackuswb ymm1, ymm1, ymm1 /* G */ \
+ __asm vpackuswb ymm2, ymm2, ymm2 /* R */ \
+ }
+
+// Store 16 ARGB values.
+#define STOREARGB_AVX2 \
+ __asm { \
+ __asm vpunpcklbw ymm0, ymm0, ymm1 /* BG */ \
+ __asm vpermq ymm0, ymm0, 0xd8 \
+ __asm vpunpcklbw ymm2, ymm2, ymm5 /* RA */ \
+ __asm vpermq ymm2, ymm2, 0xd8 \
+ __asm vpunpcklwd ymm1, ymm0, ymm2 /* BGRA first 8 pixels */ \
+ __asm vpunpckhwd ymm0, ymm0, ymm2 /* BGRA next 8 pixels */ \
+ __asm vmovdqu 0[edx], ymm1 \
+ __asm vmovdqu 32[edx], ymm0 \
+ __asm lea edx, [edx + 64]}
+
+// Store 16 RGBA values.
+#define STORERGBA_AVX2 \
+ __asm { \
+ __asm vpunpcklbw ymm1, ymm1, ymm2 /* GR */ \
+ __asm vpermq ymm1, ymm1, 0xd8 \
+ __asm vpunpcklbw ymm2, ymm5, ymm0 /* AB */ \
+ __asm vpermq ymm2, ymm2, 0xd8 \
+ __asm vpunpcklwd ymm0, ymm2, ymm1 /* ABGR first 8 pixels */ \
+ __asm vpunpckhwd ymm1, ymm2, ymm1 /* ABGR next 8 pixels */ \
+ __asm vmovdqu [edx], ymm0 \
+ __asm vmovdqu [edx + 32], ymm1 \
+ __asm lea edx, [edx + 64]}
+
+#ifdef HAS_I422TOARGBROW_AVX2
+// 16 pixels
+// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
+__declspec(naked) void I422ToARGBRow_AVX2(
+ const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ __asm {
+ push esi
+ push edi
+ push ebx
+ mov eax, [esp + 12 + 4] // Y
+ mov esi, [esp + 12 + 8] // U
+ mov edi, [esp + 12 + 12] // V
+ mov edx, [esp + 12 + 16] // argb
+ mov ebx, [esp + 12 + 20] // yuvconstants
+ mov ecx, [esp + 12 + 24] // width
+ sub edi, esi
+ vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
+
+ convertloop:
+ READYUV422_AVX2
+ YUVTORGB_AVX2(ebx)
+ STOREARGB_AVX2
+
+ sub ecx, 16
+ jg convertloop
+
+ pop ebx
+ pop edi
+ pop esi
+ vzeroupper
+ ret
+ }
+}
+#endif // HAS_I422TOARGBROW_AVX2
+
+#ifdef HAS_I422ALPHATOARGBROW_AVX2
+// 16 pixels
+// 8 UV values upsampled to 16 UV, mixed with 16 Y and 16 A producing 16 ARGB.
+__declspec(naked) void I422AlphaToARGBRow_AVX2(
+ const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ const uint8_t* a_buf,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ __asm {
+ push esi
+ push edi
+ push ebx
+ push ebp
+ mov eax, [esp + 16 + 4] // Y
+ mov esi, [esp + 16 + 8] // U
+ mov edi, [esp + 16 + 12] // V
+ mov ebp, [esp + 16 + 16] // A
+ mov edx, [esp + 16 + 20] // argb
+ mov ebx, [esp + 16 + 24] // yuvconstants
+ mov ecx, [esp + 16 + 28] // width
+ sub edi, esi
+
+ convertloop:
+ READYUVA422_AVX2
+ YUVTORGB_AVX2(ebx)
+ STOREARGB_AVX2
+
+ sub ecx, 16
+ jg convertloop
+
+ pop ebp
+ pop ebx
+ pop edi
+ pop esi
+ vzeroupper
+ ret
+ }
+}
+#endif // HAS_I422ALPHATOARGBROW_AVX2
+
+#ifdef HAS_I444TOARGBROW_AVX2
+// 16 pixels
+// 16 UV values with 16 Y producing 16 ARGB (64 bytes).
+__declspec(naked) void I444ToARGBRow_AVX2(
+ const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ __asm {
+ push esi
+ push edi
+ push ebx
+ mov eax, [esp + 12 + 4] // Y
+ mov esi, [esp + 12 + 8] // U
+ mov edi, [esp + 12 + 12] // V
+ mov edx, [esp + 12 + 16] // argb
+ mov ebx, [esp + 12 + 20] // yuvconstants
+ mov ecx, [esp + 12 + 24] // width
+ sub edi, esi
+ vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
+ convertloop:
+ READYUV444_AVX2
+ YUVTORGB_AVX2(ebx)
+ STOREARGB_AVX2
+
+ sub ecx, 16
+ jg convertloop
+
+ pop ebx
+ pop edi
+ pop esi
+ vzeroupper
+ ret
+ }
+}
+#endif // HAS_I444TOARGBROW_AVX2
+
+#ifdef HAS_NV12TOARGBROW_AVX2
+// 16 pixels.
+// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
+__declspec(naked) void NV12ToARGBRow_AVX2(
+ const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ __asm {
+ push esi
+ push ebx
+ mov eax, [esp + 8 + 4] // Y
+ mov esi, [esp + 8 + 8] // UV
+ mov edx, [esp + 8 + 12] // argb
+ mov ebx, [esp + 8 + 16] // yuvconstants
+ mov ecx, [esp + 8 + 20] // width
+ vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
+
+ convertloop:
+ READNV12_AVX2
+ YUVTORGB_AVX2(ebx)
+ STOREARGB_AVX2
+
+ sub ecx, 16
+ jg convertloop
+
+ pop ebx
+ pop esi
+ vzeroupper
+ ret
+ }
+}
+#endif // HAS_NV12TOARGBROW_AVX2
+
+#ifdef HAS_NV21TOARGBROW_AVX2
+// 16 pixels.
+// 8 VU values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
+__declspec(naked) void NV21ToARGBRow_AVX2(
+ const uint8_t* y_buf,
+ const uint8_t* vu_buf,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ __asm {
+ push esi
+ push ebx
+ mov eax, [esp + 8 + 4] // Y
+ mov esi, [esp + 8 + 8] // VU
+ mov edx, [esp + 8 + 12] // argb
+ mov ebx, [esp + 8 + 16] // yuvconstants
+ mov ecx, [esp + 8 + 20] // width
+ vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
+
+ convertloop:
+ READNV21_AVX2
+ YUVTORGB_AVX2(ebx)
+ STOREARGB_AVX2
+
+ sub ecx, 16
+ jg convertloop
+
+ pop ebx
+ pop esi
+ vzeroupper
+ ret
+ }
+}
+#endif // HAS_NV21TOARGBROW_AVX2
+
+#ifdef HAS_YUY2TOARGBROW_AVX2
+// 16 pixels.
+// 8 YUY2 values with 16 Y and 8 UV producing 16 ARGB (64 bytes).
+__declspec(naked) void YUY2ToARGBRow_AVX2(
+ const uint8_t* src_yuy2,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ __asm {
+ push ebx
+ mov eax, [esp + 4 + 4] // yuy2
+ mov edx, [esp + 4 + 8] // argb
+ mov ebx, [esp + 4 + 12] // yuvconstants
+ mov ecx, [esp + 4 + 16] // width
+ vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
+
+ convertloop:
+ READYUY2_AVX2
+ YUVTORGB_AVX2(ebx)
+ STOREARGB_AVX2
+
+ sub ecx, 16
+ jg convertloop
+
+ pop ebx
+ vzeroupper
+ ret
+ }
+}
+#endif // HAS_YUY2TOARGBROW_AVX2
+
+#ifdef HAS_UYVYTOARGBROW_AVX2
+// 16 pixels.
+// 8 UYVY values with 16 Y and 8 UV producing 16 ARGB (64 bytes).
+__declspec(naked) void UYVYToARGBRow_AVX2(
+ const uint8_t* src_uyvy,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ __asm {
+ push ebx
+ mov eax, [esp + 4 + 4] // uyvy
+ mov edx, [esp + 4 + 8] // argb
+ mov ebx, [esp + 4 + 12] // yuvconstants
+ mov ecx, [esp + 4 + 16] // width
+ vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
+
+ convertloop:
+ READUYVY_AVX2
+ YUVTORGB_AVX2(ebx)
+ STOREARGB_AVX2
+
+ sub ecx, 16
+ jg convertloop
+
+ pop ebx
+ vzeroupper
+ ret
+ }
+}
+#endif // HAS_UYVYTOARGBROW_AVX2
+
+#ifdef HAS_I422TORGBAROW_AVX2
+// 16 pixels
+// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 RGBA (64 bytes).
+__declspec(naked) void I422ToRGBARow_AVX2(
+ const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ __asm {
+ push esi
+ push edi
+ push ebx
+ mov eax, [esp + 12 + 4] // Y
+ mov esi, [esp + 12 + 8] // U
+ mov edi, [esp + 12 + 12] // V
+ mov edx, [esp + 12 + 16] // abgr
+ mov ebx, [esp + 12 + 20] // yuvconstants
+ mov ecx, [esp + 12 + 24] // width
+ sub edi, esi
+ vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
+
+ convertloop:
+ READYUV422_AVX2
+ YUVTORGB_AVX2(ebx)
+ STORERGBA_AVX2
+
+ sub ecx, 16
+ jg convertloop
+
+ pop ebx
+ pop edi
+ pop esi
+ vzeroupper
+ ret
+ }
+}
+#endif // HAS_I422TORGBAROW_AVX2
+
+#if defined(HAS_I422TOARGBROW_SSSE3)
+// TODO(fbarchard): Read that does half size on Y and treats 420 as 444.
+// Allows a conversion with half size scaling.
+
+// Read 8 UV from 444.
+#define READYUV444 \
+ __asm { \
+ __asm movq xmm0, qword ptr [esi] /* U */ \
+ __asm movq xmm1, qword ptr [esi + edi] /* V */ \
+ __asm lea esi, [esi + 8] \
+ __asm punpcklbw xmm0, xmm1 /* UV */ \
+ __asm movq xmm4, qword ptr [eax] \
+ __asm punpcklbw xmm4, xmm4 \
+ __asm lea eax, [eax + 8]}
+
+// Read 4 UV from 422, upsample to 8 UV.
+#define READYUV422 \
+ __asm { \
+ __asm movd xmm0, [esi] /* U */ \
+ __asm movd xmm1, [esi + edi] /* V */ \
+ __asm lea esi, [esi + 4] \
+ __asm punpcklbw xmm0, xmm1 /* UV */ \
+ __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \
+ __asm movq xmm4, qword ptr [eax] \
+ __asm punpcklbw xmm4, xmm4 \
+ __asm lea eax, [eax + 8]}
+
+// Read 4 UV from 422, upsample to 8 UV. With 8 Alpha.
+#define READYUVA422 \
+ __asm { \
+ __asm movd xmm0, [esi] /* U */ \
+ __asm movd xmm1, [esi + edi] /* V */ \
+ __asm lea esi, [esi + 4] \
+ __asm punpcklbw xmm0, xmm1 /* UV */ \
+ __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \
+ __asm movq xmm4, qword ptr [eax] /* Y */ \
+ __asm punpcklbw xmm4, xmm4 \
+ __asm lea eax, [eax + 8] \
+ __asm movq xmm5, qword ptr [ebp] /* A */ \
+ __asm lea ebp, [ebp + 8]}
+
+// Read 4 UV from NV12, upsample to 8 UV.
+#define READNV12 \
+ __asm { \
+ __asm movq xmm0, qword ptr [esi] /* UV */ \
+ __asm lea esi, [esi + 8] \
+ __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \
+ __asm movq xmm4, qword ptr [eax] \
+ __asm punpcklbw xmm4, xmm4 \
+ __asm lea eax, [eax + 8]}
+
+// Read 4 VU from NV21, upsample to 8 UV.
+#define READNV21 \
+ __asm { \
+ __asm movq xmm0, qword ptr [esi] /* UV */ \
+ __asm lea esi, [esi + 8] \
+ __asm pshufb xmm0, xmmword ptr kShuffleNV21 \
+ __asm movq xmm4, qword ptr [eax] \
+ __asm punpcklbw xmm4, xmm4 \
+ __asm lea eax, [eax + 8]}
+
+// Read 4 YUY2 with 8 Y and upsample 4 UV to 8 UV.
+#define READYUY2 \
+ __asm { \
+ __asm movdqu xmm4, [eax] /* YUY2 */ \
+ __asm pshufb xmm4, xmmword ptr kShuffleYUY2Y \
+ __asm movdqu xmm0, [eax] /* UV */ \
+ __asm pshufb xmm0, xmmword ptr kShuffleYUY2UV \
+ __asm lea eax, [eax + 16]}
+
+// Read 4 UYVY with 8 Y and upsample 4 UV to 8 UV.
+#define READUYVY \
+ __asm { \
+ __asm movdqu xmm4, [eax] /* UYVY */ \
+ __asm pshufb xmm4, xmmword ptr kShuffleUYVYY \
+ __asm movdqu xmm0, [eax] /* UV */ \
+ __asm pshufb xmm0, xmmword ptr kShuffleUYVYUV \
+ __asm lea eax, [eax + 16]}
+
+// Convert 8 pixels: 8 UV and 8 Y.
+#define YUVTORGB(YuvConstants) \
+ __asm { \
+ __asm movdqa xmm1, xmm0 \
+ __asm movdqa xmm2, xmm0 \
+ __asm movdqa xmm3, xmm0 \
+ __asm movdqa xmm0, xmmword ptr [YuvConstants + KUVBIASB] \
+ __asm pmaddubsw xmm1, xmmword ptr [YuvConstants + KUVTOB] \
+ __asm psubw xmm0, xmm1 \
+ __asm movdqa xmm1, xmmword ptr [YuvConstants + KUVBIASG] \
+ __asm pmaddubsw xmm2, xmmword ptr [YuvConstants + KUVTOG] \
+ __asm psubw xmm1, xmm2 \
+ __asm movdqa xmm2, xmmword ptr [YuvConstants + KUVBIASR] \
+ __asm pmaddubsw xmm3, xmmword ptr [YuvConstants + KUVTOR] \
+ __asm psubw xmm2, xmm3 \
+ __asm pmulhuw xmm4, xmmword ptr [YuvConstants + KYTORGB] \
+ __asm paddsw xmm0, xmm4 /* B += Y */ \
+ __asm paddsw xmm1, xmm4 /* G += Y */ \
+ __asm paddsw xmm2, xmm4 /* R += Y */ \
+ __asm psraw xmm0, 6 \
+ __asm psraw xmm1, 6 \
+ __asm psraw xmm2, 6 \
+ __asm packuswb xmm0, xmm0 /* B */ \
+ __asm packuswb xmm1, xmm1 /* G */ \
+ __asm packuswb xmm2, xmm2 /* R */ \
+ }
+
+// Store 8 ARGB values.
+#define STOREARGB \
+ __asm { \
+ __asm punpcklbw xmm0, xmm1 /* BG */ \
+ __asm punpcklbw xmm2, xmm5 /* RA */ \
+ __asm movdqa xmm1, xmm0 \
+ __asm punpcklwd xmm0, xmm2 /* BGRA first 4 pixels */ \
+ __asm punpckhwd xmm1, xmm2 /* BGRA next 4 pixels */ \
+ __asm movdqu 0[edx], xmm0 \
+ __asm movdqu 16[edx], xmm1 \
+ __asm lea edx, [edx + 32]}
+
+// Store 8 BGRA values.
+#define STOREBGRA \
+ __asm { \
+ __asm pcmpeqb xmm5, xmm5 /* generate 0xffffffff for alpha */ \
+ __asm punpcklbw xmm1, xmm0 /* GB */ \
+ __asm punpcklbw xmm5, xmm2 /* AR */ \
+ __asm movdqa xmm0, xmm5 \
+ __asm punpcklwd xmm5, xmm1 /* BGRA first 4 pixels */ \
+ __asm punpckhwd xmm0, xmm1 /* BGRA next 4 pixels */ \
+ __asm movdqu 0[edx], xmm5 \
+ __asm movdqu 16[edx], xmm0 \
+ __asm lea edx, [edx + 32]}
+
+// Store 8 RGBA values.
+#define STORERGBA \
+ __asm { \
+ __asm pcmpeqb xmm5, xmm5 /* generate 0xffffffff for alpha */ \
+ __asm punpcklbw xmm1, xmm2 /* GR */ \
+ __asm punpcklbw xmm5, xmm0 /* AB */ \
+ __asm movdqa xmm0, xmm5 \
+ __asm punpcklwd xmm5, xmm1 /* RGBA first 4 pixels */ \
+ __asm punpckhwd xmm0, xmm1 /* RGBA next 4 pixels */ \
+ __asm movdqu 0[edx], xmm5 \
+ __asm movdqu 16[edx], xmm0 \
+ __asm lea edx, [edx + 32]}
+
+// Store 8 RGB24 values.
+#define STORERGB24 \
+ __asm {/* Weave into RRGB */ \
+ __asm punpcklbw xmm0, xmm1 /* BG */ \
+ __asm punpcklbw xmm2, xmm2 /* RR */ \
+ __asm movdqa xmm1, xmm0 \
+ __asm punpcklwd xmm0, xmm2 /* BGRR first 4 pixels */ \
+ __asm punpckhwd xmm1, xmm2 /* BGRR next 4 pixels */ /* RRGB -> RGB24 */ \
+ __asm pshufb xmm0, xmm5 /* Pack first 8 and last 4 bytes. */ \
+ __asm pshufb xmm1, xmm6 /* Pack first 12 bytes. */ \
+ __asm palignr xmm1, xmm0, 12 /* last 4 bytes of xmm0 + 12 xmm1 */ \
+ __asm movq qword ptr 0[edx], xmm0 /* First 8 bytes */ \
+ __asm movdqu 8[edx], xmm1 /* Last 16 bytes */ \
+ __asm lea edx, [edx + 24]}
+
+// Store 8 RGB565 values.
+#define STORERGB565 \
+ __asm {/* Weave into RRGB */ \
+ __asm punpcklbw xmm0, xmm1 /* BG */ \
+ __asm punpcklbw xmm2, xmm2 /* RR */ \
+ __asm movdqa xmm1, xmm0 \
+ __asm punpcklwd xmm0, xmm2 /* BGRR first 4 pixels */ \
+ __asm punpckhwd xmm1, xmm2 /* BGRR next 4 pixels */ /* RRGB -> RGB565 */ \
+ __asm movdqa xmm3, xmm0 /* B first 4 pixels of argb */ \
+ __asm movdqa xmm2, xmm0 /* G */ \
+ __asm pslld xmm0, 8 /* R */ \
+ __asm psrld xmm3, 3 /* B */ \
+ __asm psrld xmm2, 5 /* G */ \
+ __asm psrad xmm0, 16 /* R */ \
+ __asm pand xmm3, xmm5 /* B */ \
+ __asm pand xmm2, xmm6 /* G */ \
+ __asm pand xmm0, xmm7 /* R */ \
+ __asm por xmm3, xmm2 /* BG */ \
+ __asm por xmm0, xmm3 /* BGR */ \
+ __asm movdqa xmm3, xmm1 /* B next 4 pixels of argb */ \
+ __asm movdqa xmm2, xmm1 /* G */ \
+ __asm pslld xmm1, 8 /* R */ \
+ __asm psrld xmm3, 3 /* B */ \
+ __asm psrld xmm2, 5 /* G */ \
+ __asm psrad xmm1, 16 /* R */ \
+ __asm pand xmm3, xmm5 /* B */ \
+ __asm pand xmm2, xmm6 /* G */ \
+ __asm pand xmm1, xmm7 /* R */ \
+ __asm por xmm3, xmm2 /* BG */ \
+ __asm por xmm1, xmm3 /* BGR */ \
+ __asm packssdw xmm0, xmm1 \
+ __asm movdqu 0[edx], xmm0 /* store 8 pixels of RGB565 */ \
+ __asm lea edx, [edx + 16]}
+
+// 8 pixels.
+// 8 UV values, mixed with 8 Y producing 8 ARGB (32 bytes).
+__declspec(naked) void I444ToARGBRow_SSSE3(
+ const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ __asm {
+ push esi
+ push edi
+ push ebx
+ mov eax, [esp + 12 + 4] // Y
+ mov esi, [esp + 12 + 8] // U
+ mov edi, [esp + 12 + 12] // V
+ mov edx, [esp + 12 + 16] // argb
+ mov ebx, [esp + 12 + 20] // yuvconstants
+ mov ecx, [esp + 12 + 24] // width
+ sub edi, esi
+ pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
+
+ convertloop:
+ READYUV444
+ YUVTORGB(ebx)
+ STOREARGB
+
+ sub ecx, 8
+ jg convertloop
+
+ pop ebx
+ pop edi
+ pop esi
+ ret
+ }
+}
+
+// 8 pixels.
+// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 RGB24 (24 bytes).
+__declspec(naked) void I422ToRGB24Row_SSSE3(
+ const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_rgb24,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ __asm {
+ push esi
+ push edi
+ push ebx
+ mov eax, [esp + 12 + 4] // Y
+ mov esi, [esp + 12 + 8] // U
+ mov edi, [esp + 12 + 12] // V
+ mov edx, [esp + 12 + 16] // argb
+ mov ebx, [esp + 12 + 20] // yuvconstants
+ mov ecx, [esp + 12 + 24] // width
+ sub edi, esi
+ movdqa xmm5, xmmword ptr kShuffleMaskARGBToRGB24_0
+ movdqa xmm6, xmmword ptr kShuffleMaskARGBToRGB24
+
+ convertloop:
+ READYUV422
+ YUVTORGB(ebx)
+ STORERGB24
+
+ sub ecx, 8
+ jg convertloop
+
+ pop ebx
+ pop edi
+ pop esi
+ ret
+ }
+}
+
+// 8 pixels
+// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 RGB565 (16 bytes).
+__declspec(naked) void I422ToRGB565Row_SSSE3(
+ const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* rgb565_buf,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ __asm {
+ push esi
+ push edi
+ push ebx
+ mov eax, [esp + 12 + 4] // Y
+ mov esi, [esp + 12 + 8] // U
+ mov edi, [esp + 12 + 12] // V
+ mov edx, [esp + 12 + 16] // argb
+ mov ebx, [esp + 12 + 20] // yuvconstants
+ mov ecx, [esp + 12 + 24] // width
+ sub edi, esi
+ pcmpeqb xmm5, xmm5 // generate mask 0x0000001f
+ psrld xmm5, 27
+ pcmpeqb xmm6, xmm6 // generate mask 0x000007e0
+ psrld xmm6, 26
+ pslld xmm6, 5
+ pcmpeqb xmm7, xmm7 // generate mask 0xfffff800
+ pslld xmm7, 11
+
+ convertloop:
+ READYUV422
+ YUVTORGB(ebx)
+ STORERGB565
+
+ sub ecx, 8
+ jg convertloop
+
+ pop ebx
+ pop edi
+ pop esi
+ ret
+ }
+}
+
+// 8 pixels.
+// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
+__declspec(naked) void I422ToARGBRow_SSSE3(
+ const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ __asm {
+ push esi
+ push edi
+ push ebx
+ mov eax, [esp + 12 + 4] // Y
+ mov esi, [esp + 12 + 8] // U
+ mov edi, [esp + 12 + 12] // V
+ mov edx, [esp + 12 + 16] // argb
+ mov ebx, [esp + 12 + 20] // yuvconstants
+ mov ecx, [esp + 12 + 24] // width
+ sub edi, esi
+ pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
+
+ convertloop:
+ READYUV422
+ YUVTORGB(ebx)
+ STOREARGB
+
+ sub ecx, 8
+ jg convertloop
+
+ pop ebx
+ pop edi
+ pop esi
+ ret
+ }
+}
+
+// 8 pixels.
+// 4 UV values upsampled to 8 UV, mixed with 8 Y and 8 A producing 8 ARGB.
+__declspec(naked) void I422AlphaToARGBRow_SSSE3(
+ const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ const uint8_t* a_buf,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ __asm {
+ push esi
+ push edi
+ push ebx
+ push ebp
+ mov eax, [esp + 16 + 4] // Y
+ mov esi, [esp + 16 + 8] // U
+ mov edi, [esp + 16 + 12] // V
+ mov ebp, [esp + 16 + 16] // A
+ mov edx, [esp + 16 + 20] // argb
+ mov ebx, [esp + 16 + 24] // yuvconstants
+ mov ecx, [esp + 16 + 28] // width
+ sub edi, esi
+
+ convertloop:
+ READYUVA422
+ YUVTORGB(ebx)
+ STOREARGB
+
+ sub ecx, 8
+ jg convertloop
+
+ pop ebp
+ pop ebx
+ pop edi
+ pop esi
+ ret
+ }
+}
+
+// 8 pixels.
+// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
+__declspec(naked) void NV12ToARGBRow_SSSE3(
+ const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ __asm {
+ push esi
+ push ebx
+ mov eax, [esp + 8 + 4] // Y
+ mov esi, [esp + 8 + 8] // UV
+ mov edx, [esp + 8 + 12] // argb
+ mov ebx, [esp + 8 + 16] // yuvconstants
+ mov ecx, [esp + 8 + 20] // width
+ pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
+
+ convertloop:
+ READNV12
+ YUVTORGB(ebx)
+ STOREARGB
+
+ sub ecx, 8
+ jg convertloop
+
+ pop ebx
+ pop esi
+ ret
+ }
+}
+
+// 8 pixels.
+// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
+__declspec(naked) void NV21ToARGBRow_SSSE3(
+ const uint8_t* y_buf,
+ const uint8_t* vu_buf,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ __asm {
+ push esi
+ push ebx
+ mov eax, [esp + 8 + 4] // Y
+ mov esi, [esp + 8 + 8] // VU
+ mov edx, [esp + 8 + 12] // argb
+ mov ebx, [esp + 8 + 16] // yuvconstants
+ mov ecx, [esp + 8 + 20] // width
+ pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
+
+ convertloop:
+ READNV21
+ YUVTORGB(ebx)
+ STOREARGB
+
+ sub ecx, 8
+ jg convertloop
+
+ pop ebx
+ pop esi
+ ret
+ }
+}
+
+// 8 pixels.
+// 4 YUY2 values with 8 Y and 4 UV producing 8 ARGB (32 bytes).
+__declspec(naked) void YUY2ToARGBRow_SSSE3(
+ const uint8_t* src_yuy2,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ __asm {
+ push ebx
+ mov eax, [esp + 4 + 4] // yuy2
+ mov edx, [esp + 4 + 8] // argb
+ mov ebx, [esp + 4 + 12] // yuvconstants
+ mov ecx, [esp + 4 + 16] // width
+ pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
+
+ convertloop:
+ READYUY2
+ YUVTORGB(ebx)
+ STOREARGB
+
+ sub ecx, 8
+ jg convertloop
+
+ pop ebx
+ ret
+ }
+}
+
+// 8 pixels.
+// 4 UYVY values with 8 Y and 4 UV producing 8 ARGB (32 bytes).
+__declspec(naked) void UYVYToARGBRow_SSSE3(
+ const uint8_t* src_uyvy,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ __asm {
+ push ebx
+ mov eax, [esp + 4 + 4] // uyvy
+ mov edx, [esp + 4 + 8] // argb
+ mov ebx, [esp + 4 + 12] // yuvconstants
+ mov ecx, [esp + 4 + 16] // width
+ pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
+
+ convertloop:
+ READUYVY
+ YUVTORGB(ebx)
+ STOREARGB
+
+ sub ecx, 8
+ jg convertloop
+
+ pop ebx
+ ret
+ }
+}
+
+__declspec(naked) void I422ToRGBARow_SSSE3(
+ const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_rgba,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ __asm {
+ push esi
+ push edi
+ push ebx
+ mov eax, [esp + 12 + 4] // Y
+ mov esi, [esp + 12 + 8] // U
+ mov edi, [esp + 12 + 12] // V
+ mov edx, [esp + 12 + 16] // argb
+ mov ebx, [esp + 12 + 20] // yuvconstants
+ mov ecx, [esp + 12 + 24] // width
+ sub edi, esi
+
+ convertloop:
+ READYUV422
+ YUVTORGB(ebx)
+ STORERGBA
+
+ sub ecx, 8
+ jg convertloop
+
+ pop ebx
+ pop edi
+ pop esi
+ ret
+ }
+}
+#endif // HAS_I422TOARGBROW_SSSE3
+
+#ifdef HAS_I400TOARGBROW_SSE2
+// 8 pixels of Y converted to 8 pixels of ARGB (32 bytes).
+__declspec(naked) void I400ToARGBRow_SSE2(const uint8_t* y_buf,
+ uint8_t* rgb_buf,
+ int width) {
+ __asm {
+ mov eax, 0x4a354a35 // 4a35 = 18997 = round(1.164 * 64 * 256)
+ movd xmm2, eax
+ pshufd xmm2, xmm2,0
+ mov eax, 0x04880488 // 0488 = 1160 = round(1.164 * 64 * 16)
+ movd xmm3, eax
+ pshufd xmm3, xmm3, 0
+ pcmpeqb xmm4, xmm4 // generate mask 0xff000000
+ pslld xmm4, 24
+
+ mov eax, [esp + 4] // Y
+ mov edx, [esp + 8] // rgb
+ mov ecx, [esp + 12] // width
+
+ convertloop:
+ // Step 1: Scale Y contribution to 8 G values. G = (y - 16) * 1.164
+ movq xmm0, qword ptr [eax]
+ lea eax, [eax + 8]
+ punpcklbw xmm0, xmm0 // Y.Y
+ pmulhuw xmm0, xmm2
+ psubusw xmm0, xmm3
+ psrlw xmm0, 6
+ packuswb xmm0, xmm0 // G
+
+ // Step 2: Weave into ARGB
+ punpcklbw xmm0, xmm0 // GG
+ movdqa xmm1, xmm0
+ punpcklwd xmm0, xmm0 // BGRA first 4 pixels
+ punpckhwd xmm1, xmm1 // BGRA next 4 pixels
+ por xmm0, xmm4
+ por xmm1, xmm4
+ movdqu [edx], xmm0
+ movdqu [edx + 16], xmm1
+ lea edx, [edx + 32]
+ sub ecx, 8
+ jg convertloop
+ ret
+ }
+}
+#endif // HAS_I400TOARGBROW_SSE2
+
+#ifdef HAS_I400TOARGBROW_AVX2
+// 16 pixels of Y converted to 16 pixels of ARGB (64 bytes).
+// note: vpunpcklbw mutates and vpackuswb unmutates.
+__declspec(naked) void I400ToARGBRow_AVX2(const uint8_t* y_buf,
+ uint8_t* rgb_buf,
+ int width) {
+ __asm {
+ mov eax, 0x4a354a35 // 4a35 = 18997 = round(1.164 * 64 * 256)
+ vmovd xmm2, eax
+ vbroadcastss ymm2, xmm2
+ mov eax, 0x04880488 // 0488 = 1160 = round(1.164 * 64 * 16)
+ vmovd xmm3, eax
+ vbroadcastss ymm3, xmm3
+ vpcmpeqb ymm4, ymm4, ymm4 // generate mask 0xff000000
+ vpslld ymm4, ymm4, 24
+
+ mov eax, [esp + 4] // Y
+ mov edx, [esp + 8] // rgb
+ mov ecx, [esp + 12] // width
+
+ convertloop:
+ // Step 1: Scale Y contriportbution to 16 G values. G = (y - 16) * 1.164
+ vmovdqu xmm0, [eax]
+ lea eax, [eax + 16]
+ vpermq ymm0, ymm0, 0xd8 // vpunpcklbw mutates
+ vpunpcklbw ymm0, ymm0, ymm0 // Y.Y
+ vpmulhuw ymm0, ymm0, ymm2
+ vpsubusw ymm0, ymm0, ymm3
+ vpsrlw ymm0, ymm0, 6
+ vpackuswb ymm0, ymm0, ymm0 // G. still mutated: 3120
+
+ // TODO(fbarchard): Weave alpha with unpack.
+ // Step 2: Weave into ARGB
+ vpunpcklbw ymm1, ymm0, ymm0 // GG - mutates
+ vpermq ymm1, ymm1, 0xd8
+ vpunpcklwd ymm0, ymm1, ymm1 // GGGG first 8 pixels
+ vpunpckhwd ymm1, ymm1, ymm1 // GGGG next 8 pixels
+ vpor ymm0, ymm0, ymm4
+ vpor ymm1, ymm1, ymm4
+ vmovdqu [edx], ymm0
+ vmovdqu [edx + 32], ymm1
+ lea edx, [edx + 64]
+ sub ecx, 16
+ jg convertloop
+ vzeroupper
+ ret
+ }
+}
+#endif // HAS_I400TOARGBROW_AVX2
+
+#ifdef HAS_MIRRORROW_SSSE3
+// Shuffle table for reversing the bytes.
+static const uvec8 kShuffleMirror = {15u, 14u, 13u, 12u, 11u, 10u, 9u, 8u,
+ 7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u};
+
+// TODO(fbarchard): Replace lea with -16 offset.
+__declspec(naked) void MirrorRow_SSSE3(const uint8_t* src,
+ uint8_t* dst,
+ int width) {
+ __asm {
+ mov eax, [esp + 4] // src
+ mov edx, [esp + 8] // dst
+ mov ecx, [esp + 12] // width
+ movdqa xmm5, xmmword ptr kShuffleMirror
+
+ convertloop:
+ movdqu xmm0, [eax - 16 + ecx]
+ pshufb xmm0, xmm5
+ movdqu [edx], xmm0
+ lea edx, [edx + 16]
+ sub ecx, 16
+ jg convertloop
+ ret
+ }
+}
+#endif // HAS_MIRRORROW_SSSE3
+
+#ifdef HAS_MIRRORROW_AVX2
+__declspec(naked) void MirrorRow_AVX2(const uint8_t* src,
+ uint8_t* dst,
+ int width) {
+ __asm {
+ mov eax, [esp + 4] // src
+ mov edx, [esp + 8] // dst
+ mov ecx, [esp + 12] // width
+ vbroadcastf128 ymm5, xmmword ptr kShuffleMirror
+
+ convertloop:
+ vmovdqu ymm0, [eax - 32 + ecx]
+ vpshufb ymm0, ymm0, ymm5
+ vpermq ymm0, ymm0, 0x4e // swap high and low halfs
+ vmovdqu [edx], ymm0
+ lea edx, [edx + 32]
+ sub ecx, 32
+ jg convertloop
+ vzeroupper
+ ret
+ }
+}
+#endif // HAS_MIRRORROW_AVX2
+
+#ifdef HAS_MIRRORUVROW_SSSE3
+// Shuffle table for reversing the bytes of UV channels.
+static const uvec8 kShuffleMirrorUV = {14u, 12u, 10u, 8u, 6u, 4u, 2u, 0u,
+ 15u, 13u, 11u, 9u, 7u, 5u, 3u, 1u};
+
+__declspec(naked) void MirrorUVRow_SSSE3(const uint8_t* src,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ __asm {
+ push edi
+ mov eax, [esp + 4 + 4] // src
+ mov edx, [esp + 4 + 8] // dst_u
+ mov edi, [esp + 4 + 12] // dst_v
+ mov ecx, [esp + 4 + 16] // width
+ movdqa xmm1, xmmword ptr kShuffleMirrorUV
+ lea eax, [eax + ecx * 2 - 16]
+ sub edi, edx
+
+ convertloop:
+ movdqu xmm0, [eax]
+ lea eax, [eax - 16]
+ pshufb xmm0, xmm1
+ movlpd qword ptr [edx], xmm0
+ movhpd qword ptr [edx + edi], xmm0
+ lea edx, [edx + 8]
+ sub ecx, 8
+ jg convertloop
+
+ pop edi
+ ret
+ }
+}
+#endif // HAS_MIRRORUVROW_SSSE3
+
+#ifdef HAS_ARGBMIRRORROW_SSE2
+__declspec(naked) void ARGBMirrorRow_SSE2(const uint8_t* src,
+ uint8_t* dst,
+ int width) {
+ __asm {
+ mov eax, [esp + 4] // src
+ mov edx, [esp + 8] // dst
+ mov ecx, [esp + 12] // width
+ lea eax, [eax - 16 + ecx * 4] // last 4 pixels.
+
+ convertloop:
+ movdqu xmm0, [eax]
+ lea eax, [eax - 16]
+ pshufd xmm0, xmm0, 0x1b
+ movdqu [edx], xmm0
+ lea edx, [edx + 16]
+ sub ecx, 4
+ jg convertloop
+ ret
+ }
+}
+#endif // HAS_ARGBMIRRORROW_SSE2
+
+#ifdef HAS_ARGBMIRRORROW_AVX2
+// Shuffle table for reversing the bytes.
+static const ulvec32 kARGBShuffleMirror_AVX2 = {7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u};
+
+__declspec(naked) void ARGBMirrorRow_AVX2(const uint8_t* src,
+ uint8_t* dst,
+ int width) {
+ __asm {
+ mov eax, [esp + 4] // src
+ mov edx, [esp + 8] // dst
+ mov ecx, [esp + 12] // width
+ vmovdqu ymm5, ymmword ptr kARGBShuffleMirror_AVX2
+
+ convertloop:
+ vpermd ymm0, ymm5, [eax - 32 + ecx * 4] // permute dword order
+ vmovdqu [edx], ymm0
+ lea edx, [edx + 32]
+ sub ecx, 8
+ jg convertloop
+ vzeroupper
+ ret
+ }
+}
+#endif // HAS_ARGBMIRRORROW_AVX2
+
+#ifdef HAS_SPLITUVROW_SSE2
+__declspec(naked) void SplitUVRow_SSE2(const uint8_t* src_uv,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ __asm {
+ push edi
+ mov eax, [esp + 4 + 4] // src_uv
+ mov edx, [esp + 4 + 8] // dst_u
+ mov edi, [esp + 4 + 12] // dst_v
+ mov ecx, [esp + 4 + 16] // width
+ pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff
+ psrlw xmm5, 8
+ sub edi, edx
+
+ convertloop:
+ movdqu xmm0, [eax]
+ movdqu xmm1, [eax + 16]
+ lea eax, [eax + 32]
+ movdqa xmm2, xmm0
+ movdqa xmm3, xmm1
+ pand xmm0, xmm5 // even bytes
+ pand xmm1, xmm5
+ packuswb xmm0, xmm1
+ psrlw xmm2, 8 // odd bytes
+ psrlw xmm3, 8
+ packuswb xmm2, xmm3
+ movdqu [edx], xmm0
+ movdqu [edx + edi], xmm2
+ lea edx, [edx + 16]
+ sub ecx, 16
+ jg convertloop
+
+ pop edi
+ ret
+ }
+}
+
+#endif // HAS_SPLITUVROW_SSE2
+
+#ifdef HAS_SPLITUVROW_AVX2
+__declspec(naked) void SplitUVRow_AVX2(const uint8_t* src_uv,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ __asm {
+ push edi
+ mov eax, [esp + 4 + 4] // src_uv
+ mov edx, [esp + 4 + 8] // dst_u
+ mov edi, [esp + 4 + 12] // dst_v
+ mov ecx, [esp + 4 + 16] // width
+ vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0x00ff00ff
+ vpsrlw ymm5, ymm5, 8
+ sub edi, edx
+
+ convertloop:
+ vmovdqu ymm0, [eax]
+ vmovdqu ymm1, [eax + 32]
+ lea eax, [eax + 64]
+ vpsrlw ymm2, ymm0, 8 // odd bytes
+ vpsrlw ymm3, ymm1, 8
+ vpand ymm0, ymm0, ymm5 // even bytes
+ vpand ymm1, ymm1, ymm5
+ vpackuswb ymm0, ymm0, ymm1
+ vpackuswb ymm2, ymm2, ymm3
+ vpermq ymm0, ymm0, 0xd8
+ vpermq ymm2, ymm2, 0xd8
+ vmovdqu [edx], ymm0
+ vmovdqu [edx + edi], ymm2
+ lea edx, [edx + 32]
+ sub ecx, 32
+ jg convertloop
+
+ pop edi
+ vzeroupper
+ ret
+ }
+}
+#endif // HAS_SPLITUVROW_AVX2
+
+#ifdef HAS_MERGEUVROW_SSE2
+__declspec(naked) void MergeUVRow_SSE2(const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_uv,
+ int width) {
+ __asm {
+ push edi
+ mov eax, [esp + 4 + 4] // src_u
+ mov edx, [esp + 4 + 8] // src_v
+ mov edi, [esp + 4 + 12] // dst_uv
+ mov ecx, [esp + 4 + 16] // width
+ sub edx, eax
+
+ convertloop:
+ movdqu xmm0, [eax] // read 16 U's
+ movdqu xmm1, [eax + edx] // and 16 V's
+ lea eax, [eax + 16]
+ movdqa xmm2, xmm0
+ punpcklbw xmm0, xmm1 // first 8 UV pairs
+ punpckhbw xmm2, xmm1 // next 8 UV pairs
+ movdqu [edi], xmm0
+ movdqu [edi + 16], xmm2
+ lea edi, [edi + 32]
+ sub ecx, 16
+ jg convertloop
+
+ pop edi
+ ret
+ }
+}
+#endif // HAS_MERGEUVROW_SSE2
+
+#ifdef HAS_MERGEUVROW_AVX2
+__declspec(naked) void MergeUVRow_AVX2(const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_uv,
+ int width) {
+ __asm {
+ push edi
+ mov eax, [esp + 4 + 4] // src_u
+ mov edx, [esp + 4 + 8] // src_v
+ mov edi, [esp + 4 + 12] // dst_uv
+ mov ecx, [esp + 4 + 16] // width
+ sub edx, eax
+
+ convertloop:
+ vmovdqu ymm0, [eax] // read 32 U's
+ vmovdqu ymm1, [eax + edx] // and 32 V's
+ lea eax, [eax + 32]
+ vpunpcklbw ymm2, ymm0, ymm1 // low 16 UV pairs. mutated qqword 0,2
+ vpunpckhbw ymm0, ymm0, ymm1 // high 16 UV pairs. mutated qqword 1,3
+ vextractf128 [edi], ymm2, 0 // bytes 0..15
+ vextractf128 [edi + 16], ymm0, 0 // bytes 16..31
+ vextractf128 [edi + 32], ymm2, 1 // bytes 32..47
+ vextractf128 [edi + 48], ymm0, 1 // bytes 47..63
+ lea edi, [edi + 64]
+ sub ecx, 32
+ jg convertloop
+
+ pop edi
+ vzeroupper
+ ret
+ }
+}
+#endif // HAS_MERGEUVROW_AVX2
+
+#ifdef HAS_COPYROW_SSE2
+// CopyRow copys 'width' bytes using a 16 byte load/store, 32 bytes at time.
+__declspec(naked) void CopyRow_SSE2(const uint8_t* src,
+ uint8_t* dst,
+ int width) {
+ __asm {
+ mov eax, [esp + 4] // src
+ mov edx, [esp + 8] // dst
+ mov ecx, [esp + 12] // width
+ test eax, 15
+ jne convertloopu
+ test edx, 15
+ jne convertloopu
+
+ convertloopa:
+ movdqa xmm0, [eax]
+ movdqa xmm1, [eax + 16]
+ lea eax, [eax + 32]
+ movdqa [edx], xmm0
+ movdqa [edx + 16], xmm1
+ lea edx, [edx + 32]
+ sub ecx, 32
+ jg convertloopa
+ ret
+
+ convertloopu:
+ movdqu xmm0, [eax]
+ movdqu xmm1, [eax + 16]
+ lea eax, [eax + 32]
+ movdqu [edx], xmm0
+ movdqu [edx + 16], xmm1
+ lea edx, [edx + 32]
+ sub ecx, 32
+ jg convertloopu
+ ret
+ }
+}
+#endif // HAS_COPYROW_SSE2
+
+#ifdef HAS_COPYROW_AVX
+// CopyRow copys 'width' bytes using a 32 byte load/store, 64 bytes at time.
+__declspec(naked) void CopyRow_AVX(const uint8_t* src,
+ uint8_t* dst,
+ int width) {
+ __asm {
+ mov eax, [esp + 4] // src
+ mov edx, [esp + 8] // dst
+ mov ecx, [esp + 12] // width
+
+ convertloop:
+ vmovdqu ymm0, [eax]
+ vmovdqu ymm1, [eax + 32]
+ lea eax, [eax + 64]
+ vmovdqu [edx], ymm0
+ vmovdqu [edx + 32], ymm1
+ lea edx, [edx + 64]
+ sub ecx, 64
+ jg convertloop
+
+ vzeroupper
+ ret
+ }
+}
+#endif // HAS_COPYROW_AVX
+
+// Multiple of 1.
+__declspec(naked) void CopyRow_ERMS(const uint8_t* src,
+ uint8_t* dst,
+ int width) {
+ __asm {
+ mov eax, esi
+ mov edx, edi
+ mov esi, [esp + 4] // src
+ mov edi, [esp + 8] // dst
+ mov ecx, [esp + 12] // width
+ rep movsb
+ mov edi, edx
+ mov esi, eax
+ ret
+ }
+}
+
+#ifdef HAS_ARGBCOPYALPHAROW_SSE2
+// width in pixels
+__declspec(naked) void ARGBCopyAlphaRow_SSE2(const uint8_t* src,
+ uint8_t* dst,
+ int width) {
+ __asm {
+ mov eax, [esp + 4] // src
+ mov edx, [esp + 8] // dst
+ mov ecx, [esp + 12] // width
+ pcmpeqb xmm0, xmm0 // generate mask 0xff000000
+ pslld xmm0, 24
+ pcmpeqb xmm1, xmm1 // generate mask 0x00ffffff
+ psrld xmm1, 8
+
+ convertloop:
+ movdqu xmm2, [eax]
+ movdqu xmm3, [eax + 16]
+ lea eax, [eax + 32]
+ movdqu xmm4, [edx]
+ movdqu xmm5, [edx + 16]
+ pand xmm2, xmm0
+ pand xmm3, xmm0
+ pand xmm4, xmm1
+ pand xmm5, xmm1
+ por xmm2, xmm4
+ por xmm3, xmm5
+ movdqu [edx], xmm2
+ movdqu [edx + 16], xmm3
+ lea edx, [edx + 32]
+ sub ecx, 8
+ jg convertloop
+
+ ret
+ }
+}
+#endif // HAS_ARGBCOPYALPHAROW_SSE2
+
+#ifdef HAS_ARGBCOPYALPHAROW_AVX2
+// width in pixels
+__declspec(naked) void ARGBCopyAlphaRow_AVX2(const uint8_t* src,
+ uint8_t* dst,
+ int width) {
+ __asm {
+ mov eax, [esp + 4] // src
+ mov edx, [esp + 8] // dst
+ mov ecx, [esp + 12] // width
+ vpcmpeqb ymm0, ymm0, ymm0
+ vpsrld ymm0, ymm0, 8 // generate mask 0x00ffffff
+
+ convertloop:
+ vmovdqu ymm1, [eax]
+ vmovdqu ymm2, [eax + 32]
+ lea eax, [eax + 64]
+ vpblendvb ymm1, ymm1, [edx], ymm0
+ vpblendvb ymm2, ymm2, [edx + 32], ymm0
+ vmovdqu [edx], ymm1
+ vmovdqu [edx + 32], ymm2
+ lea edx, [edx + 64]
+ sub ecx, 16
+ jg convertloop
+
+ vzeroupper
+ ret
+ }
+}
+#endif // HAS_ARGBCOPYALPHAROW_AVX2
+
+#ifdef HAS_ARGBEXTRACTALPHAROW_SSE2
+// width in pixels
+__declspec(naked) void ARGBExtractAlphaRow_SSE2(const uint8_t* src_argb,
+ uint8_t* dst_a,
+ int width) {
+ __asm {
+ mov eax, [esp + 4] // src_argb
+ mov edx, [esp + 8] // dst_a
+ mov ecx, [esp + 12] // width
+
+ extractloop:
+ movdqu xmm0, [eax]
+ movdqu xmm1, [eax + 16]
+ lea eax, [eax + 32]
+ psrld xmm0, 24
+ psrld xmm1, 24
+ packssdw xmm0, xmm1
+ packuswb xmm0, xmm0
+ movq qword ptr [edx], xmm0
+ lea edx, [edx + 8]
+ sub ecx, 8
+ jg extractloop
+
+ ret
+ }
+}
+#endif // HAS_ARGBEXTRACTALPHAROW_SSE2
+
+#ifdef HAS_ARGBEXTRACTALPHAROW_AVX2
+// width in pixels
+__declspec(naked) void ARGBExtractAlphaRow_AVX2(const uint8_t* src_argb,
+ uint8_t* dst_a,
+ int width) {
+ __asm {
+ mov eax, [esp + 4] // src_argb
+ mov edx, [esp + 8] // dst_a
+ mov ecx, [esp + 12] // width
+ vmovdqa ymm4, ymmword ptr kPermdARGBToY_AVX
+
+ extractloop:
+ vmovdqu ymm0, [eax]
+ vmovdqu ymm1, [eax + 32]
+ vpsrld ymm0, ymm0, 24
+ vpsrld ymm1, ymm1, 24
+ vmovdqu ymm2, [eax + 64]
+ vmovdqu ymm3, [eax + 96]
+ lea eax, [eax + 128]
+ vpackssdw ymm0, ymm0, ymm1 // mutates
+ vpsrld ymm2, ymm2, 24
+ vpsrld ymm3, ymm3, 24
+ vpackssdw ymm2, ymm2, ymm3 // mutates
+ vpackuswb ymm0, ymm0, ymm2 // mutates
+ vpermd ymm0, ymm4, ymm0 // unmutate
+ vmovdqu [edx], ymm0
+ lea edx, [edx + 32]
+ sub ecx, 32
+ jg extractloop
+
+ vzeroupper
+ ret
+ }
+}
+#endif // HAS_ARGBEXTRACTALPHAROW_AVX2
+
+#ifdef HAS_ARGBCOPYYTOALPHAROW_SSE2
+// width in pixels
+__declspec(naked) void ARGBCopyYToAlphaRow_SSE2(const uint8_t* src,
+ uint8_t* dst,
+ int width) {
+ __asm {
+ mov eax, [esp + 4] // src
+ mov edx, [esp + 8] // dst
+ mov ecx, [esp + 12] // width
+ pcmpeqb xmm0, xmm0 // generate mask 0xff000000
+ pslld xmm0, 24
+ pcmpeqb xmm1, xmm1 // generate mask 0x00ffffff
+ psrld xmm1, 8
+
+ convertloop:
+ movq xmm2, qword ptr [eax] // 8 Y's
+ lea eax, [eax + 8]
+ punpcklbw xmm2, xmm2
+ punpckhwd xmm3, xmm2
+ punpcklwd xmm2, xmm2
+ movdqu xmm4, [edx]
+ movdqu xmm5, [edx + 16]
+ pand xmm2, xmm0
+ pand xmm3, xmm0
+ pand xmm4, xmm1
+ pand xmm5, xmm1
+ por xmm2, xmm4
+ por xmm3, xmm5
+ movdqu [edx], xmm2
+ movdqu [edx + 16], xmm3
+ lea edx, [edx + 32]
+ sub ecx, 8
+ jg convertloop
+
+ ret
+ }
+}
+#endif // HAS_ARGBCOPYYTOALPHAROW_SSE2
+
+#ifdef HAS_ARGBCOPYYTOALPHAROW_AVX2
+// width in pixels
+__declspec(naked) void ARGBCopyYToAlphaRow_AVX2(const uint8_t* src,
+ uint8_t* dst,
+ int width) {
+ __asm {
+ mov eax, [esp + 4] // src
+ mov edx, [esp + 8] // dst
+ mov ecx, [esp + 12] // width
+ vpcmpeqb ymm0, ymm0, ymm0
+ vpsrld ymm0, ymm0, 8 // generate mask 0x00ffffff
+
+ convertloop:
+ vpmovzxbd ymm1, qword ptr [eax]
+ vpmovzxbd ymm2, qword ptr [eax + 8]
+ lea eax, [eax + 16]
+ vpslld ymm1, ymm1, 24
+ vpslld ymm2, ymm2, 24
+ vpblendvb ymm1, ymm1, [edx], ymm0
+ vpblendvb ymm2, ymm2, [edx + 32], ymm0
+ vmovdqu [edx], ymm1
+ vmovdqu [edx + 32], ymm2
+ lea edx, [edx + 64]
+ sub ecx, 16
+ jg convertloop
+
+ vzeroupper
+ ret
+ }
+}
+#endif // HAS_ARGBCOPYYTOALPHAROW_AVX2
+
+#ifdef HAS_SETROW_X86
+// Write 'width' bytes using an 8 bit value repeated.
+// width should be multiple of 4.
+__declspec(naked) void SetRow_X86(uint8_t* dst, uint8_t v8, int width) {
+ __asm {
+ movzx eax, byte ptr [esp + 8] // v8
+ mov edx, 0x01010101 // Duplicate byte to all bytes.
+ mul edx // overwrites edx with upper part of result.
+ mov edx, edi
+ mov edi, [esp + 4] // dst
+ mov ecx, [esp + 12] // width
+ shr ecx, 2
+ rep stosd
+ mov edi, edx
+ ret
+ }
+}
+
+// Write 'width' bytes using an 8 bit value repeated.
+__declspec(naked) void SetRow_ERMS(uint8_t* dst, uint8_t v8, int width) {
+ __asm {
+ mov edx, edi
+ mov edi, [esp + 4] // dst
+ mov eax, [esp + 8] // v8
+ mov ecx, [esp + 12] // width
+ rep stosb
+ mov edi, edx
+ ret
+ }
+}
+
+// Write 'width' 32 bit values.
+__declspec(naked) void ARGBSetRow_X86(uint8_t* dst_argb,
+ uint32_t v32,
+ int width) {
+ __asm {
+ mov edx, edi
+ mov edi, [esp + 4] // dst
+ mov eax, [esp + 8] // v32
+ mov ecx, [esp + 12] // width
+ rep stosd
+ mov edi, edx
+ ret
+ }
+}
+#endif // HAS_SETROW_X86
+
+#ifdef HAS_YUY2TOYROW_AVX2
+__declspec(naked) void YUY2ToYRow_AVX2(const uint8_t* src_yuy2,
+ uint8_t* dst_y,
+ int width) {
+ __asm {
+ mov eax, [esp + 4] // src_yuy2
+ mov edx, [esp + 8] // dst_y
+ mov ecx, [esp + 12] // width
+ vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0x00ff00ff
+ vpsrlw ymm5, ymm5, 8
+
+ convertloop:
+ vmovdqu ymm0, [eax]
+ vmovdqu ymm1, [eax + 32]
+ lea eax, [eax + 64]
+ vpand ymm0, ymm0, ymm5 // even bytes are Y
+ vpand ymm1, ymm1, ymm5
+ vpackuswb ymm0, ymm0, ymm1 // mutates.
+ vpermq ymm0, ymm0, 0xd8
+ vmovdqu [edx], ymm0
+ lea edx, [edx + 32]
+ sub ecx, 32
+ jg convertloop
+ vzeroupper
+ ret
+ }
+}
+
+__declspec(naked) void YUY2ToUVRow_AVX2(const uint8_t* src_yuy2,
+ int stride_yuy2,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ __asm {
+ push esi
+ push edi
+ mov eax, [esp + 8 + 4] // src_yuy2
+ mov esi, [esp + 8 + 8] // stride_yuy2
+ mov edx, [esp + 8 + 12] // dst_u
+ mov edi, [esp + 8 + 16] // dst_v
+ mov ecx, [esp + 8 + 20] // width
+ vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0x00ff00ff
+ vpsrlw ymm5, ymm5, 8
+ sub edi, edx
+
+ convertloop:
+ vmovdqu ymm0, [eax]
+ vmovdqu ymm1, [eax + 32]
+ vpavgb ymm0, ymm0, [eax + esi]
+ vpavgb ymm1, ymm1, [eax + esi + 32]
+ lea eax, [eax + 64]
+ vpsrlw ymm0, ymm0, 8 // YUYV -> UVUV
+ vpsrlw ymm1, ymm1, 8
+ vpackuswb ymm0, ymm0, ymm1 // mutates.
+ vpermq ymm0, ymm0, 0xd8
+ vpand ymm1, ymm0, ymm5 // U
+ vpsrlw ymm0, ymm0, 8 // V
+ vpackuswb ymm1, ymm1, ymm1 // mutates.
+ vpackuswb ymm0, ymm0, ymm0 // mutates.
+ vpermq ymm1, ymm1, 0xd8
+ vpermq ymm0, ymm0, 0xd8
+ vextractf128 [edx], ymm1, 0 // U
+ vextractf128 [edx + edi], ymm0, 0 // V
+ lea edx, [edx + 16]
+ sub ecx, 32
+ jg convertloop
+
+ pop edi
+ pop esi
+ vzeroupper
+ ret
+ }
+}
+
+__declspec(naked) void YUY2ToUV422Row_AVX2(const uint8_t* src_yuy2,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ __asm {
+ push edi
+ mov eax, [esp + 4 + 4] // src_yuy2
+ mov edx, [esp + 4 + 8] // dst_u
+ mov edi, [esp + 4 + 12] // dst_v
+ mov ecx, [esp + 4 + 16] // width
+ vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0x00ff00ff
+ vpsrlw ymm5, ymm5, 8
+ sub edi, edx
+
+ convertloop:
+ vmovdqu ymm0, [eax]
+ vmovdqu ymm1, [eax + 32]
+ lea eax, [eax + 64]
+ vpsrlw ymm0, ymm0, 8 // YUYV -> UVUV
+ vpsrlw ymm1, ymm1, 8
+ vpackuswb ymm0, ymm0, ymm1 // mutates.
+ vpermq ymm0, ymm0, 0xd8
+ vpand ymm1, ymm0, ymm5 // U
+ vpsrlw ymm0, ymm0, 8 // V
+ vpackuswb ymm1, ymm1, ymm1 // mutates.
+ vpackuswb ymm0, ymm0, ymm0 // mutates.
+ vpermq ymm1, ymm1, 0xd8
+ vpermq ymm0, ymm0, 0xd8
+ vextractf128 [edx], ymm1, 0 // U
+ vextractf128 [edx + edi], ymm0, 0 // V
+ lea edx, [edx + 16]
+ sub ecx, 32
+ jg convertloop
+
+ pop edi
+ vzeroupper
+ ret
+ }
+}
+
+__declspec(naked) void UYVYToYRow_AVX2(const uint8_t* src_uyvy,
+ uint8_t* dst_y,
+ int width) {
+ __asm {
+ mov eax, [esp + 4] // src_uyvy
+ mov edx, [esp + 8] // dst_y
+ mov ecx, [esp + 12] // width
+
+ convertloop:
+ vmovdqu ymm0, [eax]
+ vmovdqu ymm1, [eax + 32]
+ lea eax, [eax + 64]
+ vpsrlw ymm0, ymm0, 8 // odd bytes are Y
+ vpsrlw ymm1, ymm1, 8
+ vpackuswb ymm0, ymm0, ymm1 // mutates.
+ vpermq ymm0, ymm0, 0xd8
+ vmovdqu [edx], ymm0
+ lea edx, [edx + 32]
+ sub ecx, 32
+ jg convertloop
+ vzeroupper
+ ret
+ }
+}
+
+__declspec(naked) void UYVYToUVRow_AVX2(const uint8_t* src_uyvy,
+ int stride_uyvy,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ __asm {
+ push esi
+ push edi
+ mov eax, [esp + 8 + 4] // src_yuy2
+ mov esi, [esp + 8 + 8] // stride_yuy2
+ mov edx, [esp + 8 + 12] // dst_u
+ mov edi, [esp + 8 + 16] // dst_v
+ mov ecx, [esp + 8 + 20] // width
+ vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0x00ff00ff
+ vpsrlw ymm5, ymm5, 8
+ sub edi, edx
+
+ convertloop:
+ vmovdqu ymm0, [eax]
+ vmovdqu ymm1, [eax + 32]
+ vpavgb ymm0, ymm0, [eax + esi]
+ vpavgb ymm1, ymm1, [eax + esi + 32]
+ lea eax, [eax + 64]
+ vpand ymm0, ymm0, ymm5 // UYVY -> UVUV
+ vpand ymm1, ymm1, ymm5
+ vpackuswb ymm0, ymm0, ymm1 // mutates.
+ vpermq ymm0, ymm0, 0xd8
+ vpand ymm1, ymm0, ymm5 // U
+ vpsrlw ymm0, ymm0, 8 // V
+ vpackuswb ymm1, ymm1, ymm1 // mutates.
+ vpackuswb ymm0, ymm0, ymm0 // mutates.
+ vpermq ymm1, ymm1, 0xd8
+ vpermq ymm0, ymm0, 0xd8
+ vextractf128 [edx], ymm1, 0 // U
+ vextractf128 [edx + edi], ymm0, 0 // V
+ lea edx, [edx + 16]
+ sub ecx, 32
+ jg convertloop
+
+ pop edi
+ pop esi
+ vzeroupper
+ ret
+ }
+}
+
+__declspec(naked) void UYVYToUV422Row_AVX2(const uint8_t* src_uyvy,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ __asm {
+ push edi
+ mov eax, [esp + 4 + 4] // src_yuy2
+ mov edx, [esp + 4 + 8] // dst_u
+ mov edi, [esp + 4 + 12] // dst_v
+ mov ecx, [esp + 4 + 16] // width
+ vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0x00ff00ff
+ vpsrlw ymm5, ymm5, 8
+ sub edi, edx
+
+ convertloop:
+ vmovdqu ymm0, [eax]
+ vmovdqu ymm1, [eax + 32]
+ lea eax, [eax + 64]
+ vpand ymm0, ymm0, ymm5 // UYVY -> UVUV
+ vpand ymm1, ymm1, ymm5
+ vpackuswb ymm0, ymm0, ymm1 // mutates.
+ vpermq ymm0, ymm0, 0xd8
+ vpand ymm1, ymm0, ymm5 // U
+ vpsrlw ymm0, ymm0, 8 // V
+ vpackuswb ymm1, ymm1, ymm1 // mutates.
+ vpackuswb ymm0, ymm0, ymm0 // mutates.
+ vpermq ymm1, ymm1, 0xd8
+ vpermq ymm0, ymm0, 0xd8
+ vextractf128 [edx], ymm1, 0 // U
+ vextractf128 [edx + edi], ymm0, 0 // V
+ lea edx, [edx + 16]
+ sub ecx, 32
+ jg convertloop
+
+ pop edi
+ vzeroupper
+ ret
+ }
+}
+#endif // HAS_YUY2TOYROW_AVX2
+
+#ifdef HAS_YUY2TOYROW_SSE2
+__declspec(naked) void YUY2ToYRow_SSE2(const uint8_t* src_yuy2,
+ uint8_t* dst_y,
+ int width) {
+ __asm {
+ mov eax, [esp + 4] // src_yuy2
+ mov edx, [esp + 8] // dst_y
+ mov ecx, [esp + 12] // width
+ pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff
+ psrlw xmm5, 8
+
+ convertloop:
+ movdqu xmm0, [eax]
+ movdqu xmm1, [eax + 16]
+ lea eax, [eax + 32]
+ pand xmm0, xmm5 // even bytes are Y
+ pand xmm1, xmm5
+ packuswb xmm0, xmm1
+ movdqu [edx], xmm0
+ lea edx, [edx + 16]
+ sub ecx, 16
+ jg convertloop
+ ret
+ }
+}
+
+__declspec(naked) void YUY2ToUVRow_SSE2(const uint8_t* src_yuy2,
+ int stride_yuy2,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ __asm {
+ push esi
+ push edi
+ mov eax, [esp + 8 + 4] // src_yuy2
+ mov esi, [esp + 8 + 8] // stride_yuy2
+ mov edx, [esp + 8 + 12] // dst_u
+ mov edi, [esp + 8 + 16] // dst_v
+ mov ecx, [esp + 8 + 20] // width
+ pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff
+ psrlw xmm5, 8
+ sub edi, edx
+
+ convertloop:
+ movdqu xmm0, [eax]
+ movdqu xmm1, [eax + 16]
+ movdqu xmm2, [eax + esi]
+ movdqu xmm3, [eax + esi + 16]
+ lea eax, [eax + 32]
+ pavgb xmm0, xmm2
+ pavgb xmm1, xmm3
+ psrlw xmm0, 8 // YUYV -> UVUV
+ psrlw xmm1, 8
+ packuswb xmm0, xmm1
+ movdqa xmm1, xmm0
+ pand xmm0, xmm5 // U
+ packuswb xmm0, xmm0
+ psrlw xmm1, 8 // V
+ packuswb xmm1, xmm1
+ movq qword ptr [edx], xmm0
+ movq qword ptr [edx + edi], xmm1
+ lea edx, [edx + 8]
+ sub ecx, 16
+ jg convertloop
+
+ pop edi
+ pop esi
+ ret
+ }
+}
+
+__declspec(naked) void YUY2ToUV422Row_SSE2(const uint8_t* src_yuy2,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ __asm {
+ push edi
+ mov eax, [esp + 4 + 4] // src_yuy2
+ mov edx, [esp + 4 + 8] // dst_u
+ mov edi, [esp + 4 + 12] // dst_v
+ mov ecx, [esp + 4 + 16] // width
+ pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff
+ psrlw xmm5, 8
+ sub edi, edx
+
+ convertloop:
+ movdqu xmm0, [eax]
+ movdqu xmm1, [eax + 16]
+ lea eax, [eax + 32]
+ psrlw xmm0, 8 // YUYV -> UVUV
+ psrlw xmm1, 8
+ packuswb xmm0, xmm1
+ movdqa xmm1, xmm0
+ pand xmm0, xmm5 // U
+ packuswb xmm0, xmm0
+ psrlw xmm1, 8 // V
+ packuswb xmm1, xmm1
+ movq qword ptr [edx], xmm0
+ movq qword ptr [edx + edi], xmm1
+ lea edx, [edx + 8]
+ sub ecx, 16
+ jg convertloop
+
+ pop edi
+ ret
+ }
+}
+
+__declspec(naked) void UYVYToYRow_SSE2(const uint8_t* src_uyvy,
+ uint8_t* dst_y,
+ int width) {
+ __asm {
+ mov eax, [esp + 4] // src_uyvy
+ mov edx, [esp + 8] // dst_y
+ mov ecx, [esp + 12] // width
+
+ convertloop:
+ movdqu xmm0, [eax]
+ movdqu xmm1, [eax + 16]
+ lea eax, [eax + 32]
+ psrlw xmm0, 8 // odd bytes are Y
+ psrlw xmm1, 8
+ packuswb xmm0, xmm1
+ movdqu [edx], xmm0
+ lea edx, [edx + 16]
+ sub ecx, 16
+ jg convertloop
+ ret
+ }
+}
+
+__declspec(naked) void UYVYToUVRow_SSE2(const uint8_t* src_uyvy,
+ int stride_uyvy,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ __asm {
+ push esi
+ push edi
+ mov eax, [esp + 8 + 4] // src_yuy2
+ mov esi, [esp + 8 + 8] // stride_yuy2
+ mov edx, [esp + 8 + 12] // dst_u
+ mov edi, [esp + 8 + 16] // dst_v
+ mov ecx, [esp + 8 + 20] // width
+ pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff
+ psrlw xmm5, 8
+ sub edi, edx
+
+ convertloop:
+ movdqu xmm0, [eax]
+ movdqu xmm1, [eax + 16]
+ movdqu xmm2, [eax + esi]
+ movdqu xmm3, [eax + esi + 16]
+ lea eax, [eax + 32]
+ pavgb xmm0, xmm2
+ pavgb xmm1, xmm3
+ pand xmm0, xmm5 // UYVY -> UVUV
+ pand xmm1, xmm5
+ packuswb xmm0, xmm1
+ movdqa xmm1, xmm0
+ pand xmm0, xmm5 // U
+ packuswb xmm0, xmm0
+ psrlw xmm1, 8 // V
+ packuswb xmm1, xmm1
+ movq qword ptr [edx], xmm0
+ movq qword ptr [edx + edi], xmm1
+ lea edx, [edx + 8]
+ sub ecx, 16
+ jg convertloop
+
+ pop edi
+ pop esi
+ ret
+ }
+}
+
+__declspec(naked) void UYVYToUV422Row_SSE2(const uint8_t* src_uyvy,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ __asm {
+ push edi
+ mov eax, [esp + 4 + 4] // src_yuy2
+ mov edx, [esp + 4 + 8] // dst_u
+ mov edi, [esp + 4 + 12] // dst_v
+ mov ecx, [esp + 4 + 16] // width
+ pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff
+ psrlw xmm5, 8
+ sub edi, edx
+
+ convertloop:
+ movdqu xmm0, [eax]
+ movdqu xmm1, [eax + 16]
+ lea eax, [eax + 32]
+ pand xmm0, xmm5 // UYVY -> UVUV
+ pand xmm1, xmm5
+ packuswb xmm0, xmm1
+ movdqa xmm1, xmm0
+ pand xmm0, xmm5 // U
+ packuswb xmm0, xmm0
+ psrlw xmm1, 8 // V
+ packuswb xmm1, xmm1
+ movq qword ptr [edx], xmm0
+ movq qword ptr [edx + edi], xmm1
+ lea edx, [edx + 8]
+ sub ecx, 16
+ jg convertloop
+
+ pop edi
+ ret
+ }
+}
+#endif // HAS_YUY2TOYROW_SSE2
+
+#ifdef HAS_BLENDPLANEROW_SSSE3
+// Blend 8 pixels at a time.
+// unsigned version of math
+// =((A2*C2)+(B2*(255-C2))+255)/256
+// signed version of math
+// =(((A2-128)*C2)+((B2-128)*(255-C2))+32768+127)/256
+__declspec(naked) void BlendPlaneRow_SSSE3(const uint8_t* src0,
+ const uint8_t* src1,
+ const uint8_t* alpha,
+ uint8_t* dst,
+ int width) {
+ __asm {
+ push esi
+ push edi
+ pcmpeqb xmm5, xmm5 // generate mask 0xff00ff00
+ psllw xmm5, 8
+ mov eax, 0x80808080 // 128 for biasing image to signed.
+ movd xmm6, eax
+ pshufd xmm6, xmm6, 0x00
+
+ mov eax, 0x807f807f // 32768 + 127 for unbias and round.
+ movd xmm7, eax
+ pshufd xmm7, xmm7, 0x00
+ mov eax, [esp + 8 + 4] // src0
+ mov edx, [esp + 8 + 8] // src1
+ mov esi, [esp + 8 + 12] // alpha
+ mov edi, [esp + 8 + 16] // dst
+ mov ecx, [esp + 8 + 20] // width
+ sub eax, esi
+ sub edx, esi
+ sub edi, esi
+
+ // 8 pixel loop.
+ convertloop8:
+ movq xmm0, qword ptr [esi] // alpha
+ punpcklbw xmm0, xmm0
+ pxor xmm0, xmm5 // a, 255-a
+ movq xmm1, qword ptr [eax + esi] // src0
+ movq xmm2, qword ptr [edx + esi] // src1
+ punpcklbw xmm1, xmm2
+ psubb xmm1, xmm6 // bias src0/1 - 128
+ pmaddubsw xmm0, xmm1
+ paddw xmm0, xmm7 // unbias result - 32768 and round.
+ psrlw xmm0, 8
+ packuswb xmm0, xmm0
+ movq qword ptr [edi + esi], xmm0
+ lea esi, [esi + 8]
+ sub ecx, 8
+ jg convertloop8
+
+ pop edi
+ pop esi
+ ret
+ }
+}
+#endif // HAS_BLENDPLANEROW_SSSE3
+
+#ifdef HAS_BLENDPLANEROW_AVX2
+// Blend 32 pixels at a time.
+// unsigned version of math
+// =((A2*C2)+(B2*(255-C2))+255)/256
+// signed version of math
+// =(((A2-128)*C2)+((B2-128)*(255-C2))+32768+127)/256
+__declspec(naked) void BlendPlaneRow_AVX2(const uint8_t* src0,
+ const uint8_t* src1,
+ const uint8_t* alpha,
+ uint8_t* dst,
+ int width) {
+ __asm {
+ push esi
+ push edi
+ vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0xff00ff00
+ vpsllw ymm5, ymm5, 8
+ mov eax, 0x80808080 // 128 for biasing image to signed.
+ vmovd xmm6, eax
+ vbroadcastss ymm6, xmm6
+ mov eax, 0x807f807f // 32768 + 127 for unbias and round.
+ vmovd xmm7, eax
+ vbroadcastss ymm7, xmm7
+ mov eax, [esp + 8 + 4] // src0
+ mov edx, [esp + 8 + 8] // src1
+ mov esi, [esp + 8 + 12] // alpha
+ mov edi, [esp + 8 + 16] // dst
+ mov ecx, [esp + 8 + 20] // width
+ sub eax, esi
+ sub edx, esi
+ sub edi, esi
+
+ // 32 pixel loop.
+ convertloop32:
+ vmovdqu ymm0, [esi] // alpha
+ vpunpckhbw ymm3, ymm0, ymm0 // 8..15, 24..31
+ vpunpcklbw ymm0, ymm0, ymm0 // 0..7, 16..23
+ vpxor ymm3, ymm3, ymm5 // a, 255-a
+ vpxor ymm0, ymm0, ymm5 // a, 255-a
+ vmovdqu ymm1, [eax + esi] // src0
+ vmovdqu ymm2, [edx + esi] // src1
+ vpunpckhbw ymm4, ymm1, ymm2
+ vpunpcklbw ymm1, ymm1, ymm2
+ vpsubb ymm4, ymm4, ymm6 // bias src0/1 - 128
+ vpsubb ymm1, ymm1, ymm6 // bias src0/1 - 128
+ vpmaddubsw ymm3, ymm3, ymm4
+ vpmaddubsw ymm0, ymm0, ymm1
+ vpaddw ymm3, ymm3, ymm7 // unbias result - 32768 and round.
+ vpaddw ymm0, ymm0, ymm7 // unbias result - 32768 and round.
+ vpsrlw ymm3, ymm3, 8
+ vpsrlw ymm0, ymm0, 8
+ vpackuswb ymm0, ymm0, ymm3
+ vmovdqu [edi + esi], ymm0
+ lea esi, [esi + 32]
+ sub ecx, 32
+ jg convertloop32
+
+ pop edi
+ pop esi
+ vzeroupper
+ ret
+ }
+}
+#endif // HAS_BLENDPLANEROW_AVX2
+
+#ifdef HAS_ARGBBLENDROW_SSSE3
+// Shuffle table for isolating alpha.
+static const uvec8 kShuffleAlpha = {3u, 0x80, 3u, 0x80, 7u, 0x80, 7u, 0x80,
+ 11u, 0x80, 11u, 0x80, 15u, 0x80, 15u, 0x80};
+
+// Blend 8 pixels at a time.
+__declspec(naked) void ARGBBlendRow_SSSE3(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
+ int width) {
+ __asm {
+ push esi
+ mov eax, [esp + 4 + 4] // src_argb0
+ mov esi, [esp + 4 + 8] // src_argb1
+ mov edx, [esp + 4 + 12] // dst_argb
+ mov ecx, [esp + 4 + 16] // width
+ pcmpeqb xmm7, xmm7 // generate constant 0x0001
+ psrlw xmm7, 15
+ pcmpeqb xmm6, xmm6 // generate mask 0x00ff00ff
+ psrlw xmm6, 8
+ pcmpeqb xmm5, xmm5 // generate mask 0xff00ff00
+ psllw xmm5, 8
+ pcmpeqb xmm4, xmm4 // generate mask 0xff000000
+ pslld xmm4, 24
+ sub ecx, 4
+ jl convertloop4b // less than 4 pixels?
+
+ // 4 pixel loop.
+ convertloop4:
+ movdqu xmm3, [eax] // src argb
+ lea eax, [eax + 16]
+ movdqa xmm0, xmm3 // src argb
+ pxor xmm3, xmm4 // ~alpha
+ movdqu xmm2, [esi] // _r_b
+ pshufb xmm3, xmmword ptr kShuffleAlpha // alpha
+ pand xmm2, xmm6 // _r_b
+ paddw xmm3, xmm7 // 256 - alpha
+ pmullw xmm2, xmm3 // _r_b * alpha
+ movdqu xmm1, [esi] // _a_g
+ lea esi, [esi + 16]
+ psrlw xmm1, 8 // _a_g
+ por xmm0, xmm4 // set alpha to 255
+ pmullw xmm1, xmm3 // _a_g * alpha
+ psrlw xmm2, 8 // _r_b convert to 8 bits again
+ paddusb xmm0, xmm2 // + src argb
+ pand xmm1, xmm5 // a_g_ convert to 8 bits again
+ paddusb xmm0, xmm1 // + src argb
+ movdqu [edx], xmm0
+ lea edx, [edx + 16]
+ sub ecx, 4
+ jge convertloop4
+
+ convertloop4b:
+ add ecx, 4 - 1
+ jl convertloop1b
+
+ // 1 pixel loop.
+ convertloop1:
+ movd xmm3, [eax] // src argb
+ lea eax, [eax + 4]
+ movdqa xmm0, xmm3 // src argb
+ pxor xmm3, xmm4 // ~alpha
+ movd xmm2, [esi] // _r_b
+ pshufb xmm3, xmmword ptr kShuffleAlpha // alpha
+ pand xmm2, xmm6 // _r_b
+ paddw xmm3, xmm7 // 256 - alpha
+ pmullw xmm2, xmm3 // _r_b * alpha
+ movd xmm1, [esi] // _a_g
+ lea esi, [esi + 4]
+ psrlw xmm1, 8 // _a_g
+ por xmm0, xmm4 // set alpha to 255
+ pmullw xmm1, xmm3 // _a_g * alpha
+ psrlw xmm2, 8 // _r_b convert to 8 bits again
+ paddusb xmm0, xmm2 // + src argb
+ pand xmm1, xmm5 // a_g_ convert to 8 bits again
+ paddusb xmm0, xmm1 // + src argb
+ movd [edx], xmm0
+ lea edx, [edx + 4]
+ sub ecx, 1
+ jge convertloop1
+
+ convertloop1b:
+ pop esi
+ ret
+ }
+}
+#endif // HAS_ARGBBLENDROW_SSSE3
+
+#ifdef HAS_ARGBATTENUATEROW_SSSE3
+// Shuffle table duplicating alpha.
+static const uvec8 kShuffleAlpha0 = {
+ 3u, 3u, 3u, 3u, 3u, 3u, 128u, 128u, 7u, 7u, 7u, 7u, 7u, 7u, 128u, 128u,
+};
+static const uvec8 kShuffleAlpha1 = {
+ 11u, 11u, 11u, 11u, 11u, 11u, 128u, 128u,
+ 15u, 15u, 15u, 15u, 15u, 15u, 128u, 128u,
+};
+__declspec(naked) void ARGBAttenuateRow_SSSE3(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ int width) {
+ __asm {
+ mov eax, [esp + 4] // src_argb0
+ mov edx, [esp + 8] // dst_argb
+ mov ecx, [esp + 12] // width
+ pcmpeqb xmm3, xmm3 // generate mask 0xff000000
+ pslld xmm3, 24
+ movdqa xmm4, xmmword ptr kShuffleAlpha0
+ movdqa xmm5, xmmword ptr kShuffleAlpha1
+
+ convertloop:
+ movdqu xmm0, [eax] // read 4 pixels
+ pshufb xmm0, xmm4 // isolate first 2 alphas
+ movdqu xmm1, [eax] // read 4 pixels
+ punpcklbw xmm1, xmm1 // first 2 pixel rgbs
+ pmulhuw xmm0, xmm1 // rgb * a
+ movdqu xmm1, [eax] // read 4 pixels
+ pshufb xmm1, xmm5 // isolate next 2 alphas
+ movdqu xmm2, [eax] // read 4 pixels
+ punpckhbw xmm2, xmm2 // next 2 pixel rgbs
+ pmulhuw xmm1, xmm2 // rgb * a
+ movdqu xmm2, [eax] // mask original alpha
+ lea eax, [eax + 16]
+ pand xmm2, xmm3
+ psrlw xmm0, 8
+ psrlw xmm1, 8
+ packuswb xmm0, xmm1
+ por xmm0, xmm2 // copy original alpha
+ movdqu [edx], xmm0
+ lea edx, [edx + 16]
+ sub ecx, 4
+ jg convertloop
+
+ ret
+ }
+}
+#endif // HAS_ARGBATTENUATEROW_SSSE3
+
+#ifdef HAS_ARGBATTENUATEROW_AVX2
+// Shuffle table duplicating alpha.
+static const uvec8 kShuffleAlpha_AVX2 = {6u, 7u, 6u, 7u, 6u, 7u,
+ 128u, 128u, 14u, 15u, 14u, 15u,
+ 14u, 15u, 128u, 128u};
+__declspec(naked) void ARGBAttenuateRow_AVX2(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ int width) {
+ __asm {
+ mov eax, [esp + 4] // src_argb0
+ mov edx, [esp + 8] // dst_argb
+ mov ecx, [esp + 12] // width
+ sub edx, eax
+ vbroadcastf128 ymm4, xmmword ptr kShuffleAlpha_AVX2
+ vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0xff000000
+ vpslld ymm5, ymm5, 24
+
+ convertloop:
+ vmovdqu ymm6, [eax] // read 8 pixels.
+ vpunpcklbw ymm0, ymm6, ymm6 // low 4 pixels. mutated.
+ vpunpckhbw ymm1, ymm6, ymm6 // high 4 pixels. mutated.
+ vpshufb ymm2, ymm0, ymm4 // low 4 alphas
+ vpshufb ymm3, ymm1, ymm4 // high 4 alphas
+ vpmulhuw ymm0, ymm0, ymm2 // rgb * a
+ vpmulhuw ymm1, ymm1, ymm3 // rgb * a
+ vpand ymm6, ymm6, ymm5 // isolate alpha
+ vpsrlw ymm0, ymm0, 8
+ vpsrlw ymm1, ymm1, 8
+ vpackuswb ymm0, ymm0, ymm1 // unmutated.
+ vpor ymm0, ymm0, ymm6 // copy original alpha
+ vmovdqu [eax + edx], ymm0
+ lea eax, [eax + 32]
+ sub ecx, 8
+ jg convertloop
+
+ vzeroupper
+ ret
+ }
+}
+#endif // HAS_ARGBATTENUATEROW_AVX2
+
+#ifdef HAS_ARGBUNATTENUATEROW_SSE2
+// Unattenuate 4 pixels at a time.
+__declspec(naked) void ARGBUnattenuateRow_SSE2(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ int width) {
+ __asm {
+ push ebx
+ push esi
+ push edi
+ mov eax, [esp + 12 + 4] // src_argb
+ mov edx, [esp + 12 + 8] // dst_argb
+ mov ecx, [esp + 12 + 12] // width
+ lea ebx, fixed_invtbl8
+
+ convertloop:
+ movdqu xmm0, [eax] // read 4 pixels
+ movzx esi, byte ptr [eax + 3] // first alpha
+ movzx edi, byte ptr [eax + 7] // second alpha
+ punpcklbw xmm0, xmm0 // first 2
+ movd xmm2, dword ptr [ebx + esi * 4]
+ movd xmm3, dword ptr [ebx + edi * 4]
+ pshuflw xmm2, xmm2, 040h // first 4 inv_alpha words. 1, a, a, a
+ pshuflw xmm3, xmm3, 040h // next 4 inv_alpha words
+ movlhps xmm2, xmm3
+ pmulhuw xmm0, xmm2 // rgb * a
+
+ movdqu xmm1, [eax] // read 4 pixels
+ movzx esi, byte ptr [eax + 11] // third alpha
+ movzx edi, byte ptr [eax + 15] // forth alpha
+ punpckhbw xmm1, xmm1 // next 2
+ movd xmm2, dword ptr [ebx + esi * 4]
+ movd xmm3, dword ptr [ebx + edi * 4]
+ pshuflw xmm2, xmm2, 040h // first 4 inv_alpha words
+ pshuflw xmm3, xmm3, 040h // next 4 inv_alpha words
+ movlhps xmm2, xmm3
+ pmulhuw xmm1, xmm2 // rgb * a
+ lea eax, [eax + 16]
+ packuswb xmm0, xmm1
+ movdqu [edx], xmm0
+ lea edx, [edx + 16]
+ sub ecx, 4
+ jg convertloop
+
+ pop edi
+ pop esi
+ pop ebx
+ ret
+ }
+}
+#endif // HAS_ARGBUNATTENUATEROW_SSE2
+
+#ifdef HAS_ARGBUNATTENUATEROW_AVX2
+// Shuffle table duplicating alpha.
+static const uvec8 kUnattenShuffleAlpha_AVX2 = {
+ 0u, 1u, 0u, 1u, 0u, 1u, 6u, 7u, 8u, 9u, 8u, 9u, 8u, 9u, 14u, 15u};
+// TODO(fbarchard): Enable USE_GATHER for future hardware if faster.
+// USE_GATHER is not on by default, due to being a slow instruction.
+#ifdef USE_GATHER
+__declspec(naked) void ARGBUnattenuateRow_AVX2(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ int width) {
+ __asm {
+ mov eax, [esp + 4] // src_argb0
+ mov edx, [esp + 8] // dst_argb
+ mov ecx, [esp + 12] // width
+ sub edx, eax
+ vbroadcastf128 ymm4, xmmword ptr kUnattenShuffleAlpha_AVX2
+
+ convertloop:
+ vmovdqu ymm6, [eax] // read 8 pixels.
+ vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0xffffffff for gather.
+ vpsrld ymm2, ymm6, 24 // alpha in low 8 bits.
+ vpunpcklbw ymm0, ymm6, ymm6 // low 4 pixels. mutated.
+ vpunpckhbw ymm1, ymm6, ymm6 // high 4 pixels. mutated.
+ vpgatherdd ymm3, [ymm2 * 4 + fixed_invtbl8], ymm5 // ymm5 cleared. 1, a
+ vpunpcklwd ymm2, ymm3, ymm3 // low 4 inverted alphas. mutated. 1, 1, a, a
+ vpunpckhwd ymm3, ymm3, ymm3 // high 4 inverted alphas. mutated.
+ vpshufb ymm2, ymm2, ymm4 // replicate low 4 alphas. 1, a, a, a
+ vpshufb ymm3, ymm3, ymm4 // replicate high 4 alphas
+ vpmulhuw ymm0, ymm0, ymm2 // rgb * ia
+ vpmulhuw ymm1, ymm1, ymm3 // rgb * ia
+ vpackuswb ymm0, ymm0, ymm1 // unmutated.
+ vmovdqu [eax + edx], ymm0
+ lea eax, [eax + 32]
+ sub ecx, 8
+ jg convertloop
+
+ vzeroupper
+ ret
+ }
+}
+#else // USE_GATHER
+__declspec(naked) void ARGBUnattenuateRow_AVX2(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ int width) {
+ __asm {
+
+ push ebx
+ push esi
+ push edi
+ mov eax, [esp + 12 + 4] // src_argb
+ mov edx, [esp + 12 + 8] // dst_argb
+ mov ecx, [esp + 12 + 12] // width
+ sub edx, eax
+ lea ebx, fixed_invtbl8
+ vbroadcastf128 ymm5, xmmword ptr kUnattenShuffleAlpha_AVX2
+
+ convertloop:
+ // replace VPGATHER
+ movzx esi, byte ptr [eax + 3] // alpha0
+ movzx edi, byte ptr [eax + 7] // alpha1
+ vmovd xmm0, dword ptr [ebx + esi * 4] // [1,a0]
+ vmovd xmm1, dword ptr [ebx + edi * 4] // [1,a1]
+ movzx esi, byte ptr [eax + 11] // alpha2
+ movzx edi, byte ptr [eax + 15] // alpha3
+ vpunpckldq xmm6, xmm0, xmm1 // [1,a1,1,a0]
+ vmovd xmm2, dword ptr [ebx + esi * 4] // [1,a2]
+ vmovd xmm3, dword ptr [ebx + edi * 4] // [1,a3]
+ movzx esi, byte ptr [eax + 19] // alpha4
+ movzx edi, byte ptr [eax + 23] // alpha5
+ vpunpckldq xmm7, xmm2, xmm3 // [1,a3,1,a2]
+ vmovd xmm0, dword ptr [ebx + esi * 4] // [1,a4]
+ vmovd xmm1, dword ptr [ebx + edi * 4] // [1,a5]
+ movzx esi, byte ptr [eax + 27] // alpha6
+ movzx edi, byte ptr [eax + 31] // alpha7
+ vpunpckldq xmm0, xmm0, xmm1 // [1,a5,1,a4]
+ vmovd xmm2, dword ptr [ebx + esi * 4] // [1,a6]
+ vmovd xmm3, dword ptr [ebx + edi * 4] // [1,a7]
+ vpunpckldq xmm2, xmm2, xmm3 // [1,a7,1,a6]
+ vpunpcklqdq xmm3, xmm6, xmm7 // [1,a3,1,a2,1,a1,1,a0]
+ vpunpcklqdq xmm0, xmm0, xmm2 // [1,a7,1,a6,1,a5,1,a4]
+ vinserti128 ymm3, ymm3, xmm0, 1 // [1,a7,1,a6,1,a5,1,a4,1,a3,1,a2,1,a1,1,a0]
+ // end of VPGATHER
+
+ vmovdqu ymm6, [eax] // read 8 pixels.
+ vpunpcklbw ymm0, ymm6, ymm6 // low 4 pixels. mutated.
+ vpunpckhbw ymm1, ymm6, ymm6 // high 4 pixels. mutated.
+ vpunpcklwd ymm2, ymm3, ymm3 // low 4 inverted alphas. mutated. 1, 1, a, a
+ vpunpckhwd ymm3, ymm3, ymm3 // high 4 inverted alphas. mutated.
+ vpshufb ymm2, ymm2, ymm5 // replicate low 4 alphas. 1, a, a, a
+ vpshufb ymm3, ymm3, ymm5 // replicate high 4 alphas
+ vpmulhuw ymm0, ymm0, ymm2 // rgb * ia
+ vpmulhuw ymm1, ymm1, ymm3 // rgb * ia
+ vpackuswb ymm0, ymm0, ymm1 // unmutated.
+ vmovdqu [eax + edx], ymm0
+ lea eax, [eax + 32]
+ sub ecx, 8
+ jg convertloop
+
+ pop edi
+ pop esi
+ pop ebx
+ vzeroupper
+ ret
+ }
+}
+#endif // USE_GATHER
+#endif // HAS_ARGBATTENUATEROW_AVX2
+
+#ifdef HAS_ARGBGRAYROW_SSSE3
+// Convert 8 ARGB pixels (64 bytes) to 8 Gray ARGB pixels.
+__declspec(naked) void ARGBGrayRow_SSSE3(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ int width) {
+ __asm {
+ mov eax, [esp + 4] /* src_argb */
+ mov edx, [esp + 8] /* dst_argb */
+ mov ecx, [esp + 12] /* width */
+ movdqa xmm4, xmmword ptr kARGBToYJ
+ movdqa xmm5, xmmword ptr kAddYJ64
+
+ convertloop:
+ movdqu xmm0, [eax] // G
+ movdqu xmm1, [eax + 16]
+ pmaddubsw xmm0, xmm4
+ pmaddubsw xmm1, xmm4
+ phaddw xmm0, xmm1
+ paddw xmm0, xmm5 // Add .5 for rounding.
+ psrlw xmm0, 7
+ packuswb xmm0, xmm0 // 8 G bytes
+ movdqu xmm2, [eax] // A
+ movdqu xmm3, [eax + 16]
+ lea eax, [eax + 32]
+ psrld xmm2, 24
+ psrld xmm3, 24
+ packuswb xmm2, xmm3
+ packuswb xmm2, xmm2 // 8 A bytes
+ movdqa xmm3, xmm0 // Weave into GG, GA, then GGGA
+ punpcklbw xmm0, xmm0 // 8 GG words
+ punpcklbw xmm3, xmm2 // 8 GA words
+ movdqa xmm1, xmm0
+ punpcklwd xmm0, xmm3 // GGGA first 4
+ punpckhwd xmm1, xmm3 // GGGA next 4
+ movdqu [edx], xmm0
+ movdqu [edx + 16], xmm1
+ lea edx, [edx + 32]
+ sub ecx, 8
+ jg convertloop
+ ret
+ }
+}
+#endif // HAS_ARGBGRAYROW_SSSE3
+
+#ifdef HAS_ARGBSEPIAROW_SSSE3
+// b = (r * 35 + g * 68 + b * 17) >> 7
+// g = (r * 45 + g * 88 + b * 22) >> 7
+// r = (r * 50 + g * 98 + b * 24) >> 7
+// Constant for ARGB color to sepia tone.
+static const vec8 kARGBToSepiaB = {17, 68, 35, 0, 17, 68, 35, 0,
+ 17, 68, 35, 0, 17, 68, 35, 0};
+
+static const vec8 kARGBToSepiaG = {22, 88, 45, 0, 22, 88, 45, 0,
+ 22, 88, 45, 0, 22, 88, 45, 0};
+
+static const vec8 kARGBToSepiaR = {24, 98, 50, 0, 24, 98, 50, 0,
+ 24, 98, 50, 0, 24, 98, 50, 0};
+
+// Convert 8 ARGB pixels (32 bytes) to 8 Sepia ARGB pixels.
+__declspec(naked) void ARGBSepiaRow_SSSE3(uint8_t* dst_argb, int width) {
+ __asm {
+ mov eax, [esp + 4] /* dst_argb */
+ mov ecx, [esp + 8] /* width */
+ movdqa xmm2, xmmword ptr kARGBToSepiaB
+ movdqa xmm3, xmmword ptr kARGBToSepiaG
+ movdqa xmm4, xmmword ptr kARGBToSepiaR
+
+ convertloop:
+ movdqu xmm0, [eax] // B
+ movdqu xmm6, [eax + 16]
+ pmaddubsw xmm0, xmm2
+ pmaddubsw xmm6, xmm2
+ phaddw xmm0, xmm6
+ psrlw xmm0, 7
+ packuswb xmm0, xmm0 // 8 B values
+ movdqu xmm5, [eax] // G
+ movdqu xmm1, [eax + 16]
+ pmaddubsw xmm5, xmm3
+ pmaddubsw xmm1, xmm3
+ phaddw xmm5, xmm1
+ psrlw xmm5, 7
+ packuswb xmm5, xmm5 // 8 G values
+ punpcklbw xmm0, xmm5 // 8 BG values
+ movdqu xmm5, [eax] // R
+ movdqu xmm1, [eax + 16]
+ pmaddubsw xmm5, xmm4
+ pmaddubsw xmm1, xmm4
+ phaddw xmm5, xmm1
+ psrlw xmm5, 7
+ packuswb xmm5, xmm5 // 8 R values
+ movdqu xmm6, [eax] // A
+ movdqu xmm1, [eax + 16]
+ psrld xmm6, 24
+ psrld xmm1, 24
+ packuswb xmm6, xmm1
+ packuswb xmm6, xmm6 // 8 A values
+ punpcklbw xmm5, xmm6 // 8 RA values
+ movdqa xmm1, xmm0 // Weave BG, RA together
+ punpcklwd xmm0, xmm5 // BGRA first 4
+ punpckhwd xmm1, xmm5 // BGRA next 4
+ movdqu [eax], xmm0
+ movdqu [eax + 16], xmm1
+ lea eax, [eax + 32]
+ sub ecx, 8
+ jg convertloop
+ ret
+ }
+}
+#endif // HAS_ARGBSEPIAROW_SSSE3
+
+#ifdef HAS_ARGBCOLORMATRIXROW_SSSE3
+// Tranform 8 ARGB pixels (32 bytes) with color matrix.
+// Same as Sepia except matrix is provided.
+// TODO(fbarchard): packuswbs only use half of the reg. To make RGBA, combine R
+// and B into a high and low, then G/A, unpackl/hbw and then unpckl/hwd.
+__declspec(naked) void ARGBColorMatrixRow_SSSE3(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ const int8_t* matrix_argb,
+ int width) {
+ __asm {
+ mov eax, [esp + 4] /* src_argb */
+ mov edx, [esp + 8] /* dst_argb */
+ mov ecx, [esp + 12] /* matrix_argb */
+ movdqu xmm5, [ecx]
+ pshufd xmm2, xmm5, 0x00
+ pshufd xmm3, xmm5, 0x55
+ pshufd xmm4, xmm5, 0xaa
+ pshufd xmm5, xmm5, 0xff
+ mov ecx, [esp + 16] /* width */
+
+ convertloop:
+ movdqu xmm0, [eax] // B
+ movdqu xmm7, [eax + 16]
+ pmaddubsw xmm0, xmm2
+ pmaddubsw xmm7, xmm2
+ movdqu xmm6, [eax] // G
+ movdqu xmm1, [eax + 16]
+ pmaddubsw xmm6, xmm3
+ pmaddubsw xmm1, xmm3
+ phaddsw xmm0, xmm7 // B
+ phaddsw xmm6, xmm1 // G
+ psraw xmm0, 6 // B
+ psraw xmm6, 6 // G
+ packuswb xmm0, xmm0 // 8 B values
+ packuswb xmm6, xmm6 // 8 G values
+ punpcklbw xmm0, xmm6 // 8 BG values
+ movdqu xmm1, [eax] // R
+ movdqu xmm7, [eax + 16]
+ pmaddubsw xmm1, xmm4
+ pmaddubsw xmm7, xmm4
+ phaddsw xmm1, xmm7 // R
+ movdqu xmm6, [eax] // A
+ movdqu xmm7, [eax + 16]
+ pmaddubsw xmm6, xmm5
+ pmaddubsw xmm7, xmm5
+ phaddsw xmm6, xmm7 // A
+ psraw xmm1, 6 // R
+ psraw xmm6, 6 // A
+ packuswb xmm1, xmm1 // 8 R values
+ packuswb xmm6, xmm6 // 8 A values
+ punpcklbw xmm1, xmm6 // 8 RA values
+ movdqa xmm6, xmm0 // Weave BG, RA together
+ punpcklwd xmm0, xmm1 // BGRA first 4
+ punpckhwd xmm6, xmm1 // BGRA next 4
+ movdqu [edx], xmm0
+ movdqu [edx + 16], xmm6
+ lea eax, [eax + 32]
+ lea edx, [edx + 32]
+ sub ecx, 8
+ jg convertloop
+ ret
+ }
+}
+#endif // HAS_ARGBCOLORMATRIXROW_SSSE3
+
+#ifdef HAS_ARGBQUANTIZEROW_SSE2
+// Quantize 4 ARGB pixels (16 bytes).
+__declspec(naked) void ARGBQuantizeRow_SSE2(uint8_t* dst_argb,
+ int scale,
+ int interval_size,
+ int interval_offset,
+ int width) {
+ __asm {
+ mov eax, [esp + 4] /* dst_argb */
+ movd xmm2, [esp + 8] /* scale */
+ movd xmm3, [esp + 12] /* interval_size */
+ movd xmm4, [esp + 16] /* interval_offset */
+ mov ecx, [esp + 20] /* width */
+ pshuflw xmm2, xmm2, 040h
+ pshufd xmm2, xmm2, 044h
+ pshuflw xmm3, xmm3, 040h
+ pshufd xmm3, xmm3, 044h
+ pshuflw xmm4, xmm4, 040h
+ pshufd xmm4, xmm4, 044h
+ pxor xmm5, xmm5 // constant 0
+ pcmpeqb xmm6, xmm6 // generate mask 0xff000000
+ pslld xmm6, 24
+
+ convertloop:
+ movdqu xmm0, [eax] // read 4 pixels
+ punpcklbw xmm0, xmm5 // first 2 pixels
+ pmulhuw xmm0, xmm2 // pixel * scale >> 16
+ movdqu xmm1, [eax] // read 4 pixels
+ punpckhbw xmm1, xmm5 // next 2 pixels
+ pmulhuw xmm1, xmm2
+ pmullw xmm0, xmm3 // * interval_size
+ movdqu xmm7, [eax] // read 4 pixels
+ pmullw xmm1, xmm3
+ pand xmm7, xmm6 // mask alpha
+ paddw xmm0, xmm4 // + interval_size / 2
+ paddw xmm1, xmm4
+ packuswb xmm0, xmm1
+ por xmm0, xmm7
+ movdqu [eax], xmm0
+ lea eax, [eax + 16]
+ sub ecx, 4
+ jg convertloop
+ ret
+ }
+}
+#endif // HAS_ARGBQUANTIZEROW_SSE2
+
+#ifdef HAS_ARGBSHADEROW_SSE2
+// Shade 4 pixels at a time by specified value.
+__declspec(naked) void ARGBShadeRow_SSE2(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ int width,
+ uint32_t value) {
+ __asm {
+ mov eax, [esp + 4] // src_argb
+ mov edx, [esp + 8] // dst_argb
+ mov ecx, [esp + 12] // width
+ movd xmm2, [esp + 16] // value
+ punpcklbw xmm2, xmm2
+ punpcklqdq xmm2, xmm2
+
+ convertloop:
+ movdqu xmm0, [eax] // read 4 pixels
+ lea eax, [eax + 16]
+ movdqa xmm1, xmm0
+ punpcklbw xmm0, xmm0 // first 2
+ punpckhbw xmm1, xmm1 // next 2
+ pmulhuw xmm0, xmm2 // argb * value
+ pmulhuw xmm1, xmm2 // argb * value
+ psrlw xmm0, 8
+ psrlw xmm1, 8
+ packuswb xmm0, xmm1
+ movdqu [edx], xmm0
+ lea edx, [edx + 16]
+ sub ecx, 4
+ jg convertloop
+
+ ret
+ }
+}
+#endif // HAS_ARGBSHADEROW_SSE2
+
+#ifdef HAS_ARGBMULTIPLYROW_SSE2
+// Multiply 2 rows of ARGB pixels together, 4 pixels at a time.
+__declspec(naked) void ARGBMultiplyRow_SSE2(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
+ int width) {
+ __asm {
+ push esi
+ mov eax, [esp + 4 + 4] // src_argb0
+ mov esi, [esp + 4 + 8] // src_argb1
+ mov edx, [esp + 4 + 12] // dst_argb
+ mov ecx, [esp + 4 + 16] // width
+ pxor xmm5, xmm5 // constant 0
+
+ convertloop:
+ movdqu xmm0, [eax] // read 4 pixels from src_argb0
+ movdqu xmm2, [esi] // read 4 pixels from src_argb1
+ movdqu xmm1, xmm0
+ movdqu xmm3, xmm2
+ punpcklbw xmm0, xmm0 // first 2
+ punpckhbw xmm1, xmm1 // next 2
+ punpcklbw xmm2, xmm5 // first 2
+ punpckhbw xmm3, xmm5 // next 2
+ pmulhuw xmm0, xmm2 // src_argb0 * src_argb1 first 2
+ pmulhuw xmm1, xmm3 // src_argb0 * src_argb1 next 2
+ lea eax, [eax + 16]
+ lea esi, [esi + 16]
+ packuswb xmm0, xmm1
+ movdqu [edx], xmm0
+ lea edx, [edx + 16]
+ sub ecx, 4
+ jg convertloop
+
+ pop esi
+ ret
+ }
+}
+#endif // HAS_ARGBMULTIPLYROW_SSE2
+
+#ifdef HAS_ARGBADDROW_SSE2
+// Add 2 rows of ARGB pixels together, 4 pixels at a time.
+// TODO(fbarchard): Port this to posix, neon and other math functions.
+__declspec(naked) void ARGBAddRow_SSE2(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
+ int width) {
+ __asm {
+ push esi
+ mov eax, [esp + 4 + 4] // src_argb0
+ mov esi, [esp + 4 + 8] // src_argb1
+ mov edx, [esp + 4 + 12] // dst_argb
+ mov ecx, [esp + 4 + 16] // width
+
+ sub ecx, 4
+ jl convertloop49
+
+ convertloop4:
+ movdqu xmm0, [eax] // read 4 pixels from src_argb0
+ lea eax, [eax + 16]
+ movdqu xmm1, [esi] // read 4 pixels from src_argb1
+ lea esi, [esi + 16]
+ paddusb xmm0, xmm1 // src_argb0 + src_argb1
+ movdqu [edx], xmm0
+ lea edx, [edx + 16]
+ sub ecx, 4
+ jge convertloop4
+
+ convertloop49:
+ add ecx, 4 - 1
+ jl convertloop19
+
+ convertloop1:
+ movd xmm0, [eax] // read 1 pixels from src_argb0
+ lea eax, [eax + 4]
+ movd xmm1, [esi] // read 1 pixels from src_argb1
+ lea esi, [esi + 4]
+ paddusb xmm0, xmm1 // src_argb0 + src_argb1
+ movd [edx], xmm0
+ lea edx, [edx + 4]
+ sub ecx, 1
+ jge convertloop1
+
+ convertloop19:
+ pop esi
+ ret
+ }
+}
+#endif // HAS_ARGBADDROW_SSE2
+
+#ifdef HAS_ARGBSUBTRACTROW_SSE2
+// Subtract 2 rows of ARGB pixels together, 4 pixels at a time.
+__declspec(naked) void ARGBSubtractRow_SSE2(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
+ int width) {
+ __asm {
+ push esi
+ mov eax, [esp + 4 + 4] // src_argb0
+ mov esi, [esp + 4 + 8] // src_argb1
+ mov edx, [esp + 4 + 12] // dst_argb
+ mov ecx, [esp + 4 + 16] // width
+
+ convertloop:
+ movdqu xmm0, [eax] // read 4 pixels from src_argb0
+ lea eax, [eax + 16]
+ movdqu xmm1, [esi] // read 4 pixels from src_argb1
+ lea esi, [esi + 16]
+ psubusb xmm0, xmm1 // src_argb0 - src_argb1
+ movdqu [edx], xmm0
+ lea edx, [edx + 16]
+ sub ecx, 4
+ jg convertloop
+
+ pop esi
+ ret
+ }
+}
+#endif // HAS_ARGBSUBTRACTROW_SSE2
+
+#ifdef HAS_ARGBMULTIPLYROW_AVX2
+// Multiply 2 rows of ARGB pixels together, 8 pixels at a time.
+__declspec(naked) void ARGBMultiplyRow_AVX2(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
+ int width) {
+ __asm {
+ push esi
+ mov eax, [esp + 4 + 4] // src_argb0
+ mov esi, [esp + 4 + 8] // src_argb1
+ mov edx, [esp + 4 + 12] // dst_argb
+ mov ecx, [esp + 4 + 16] // width
+ vpxor ymm5, ymm5, ymm5 // constant 0
+
+ convertloop:
+ vmovdqu ymm1, [eax] // read 8 pixels from src_argb0
+ lea eax, [eax + 32]
+ vmovdqu ymm3, [esi] // read 8 pixels from src_argb1
+ lea esi, [esi + 32]
+ vpunpcklbw ymm0, ymm1, ymm1 // low 4
+ vpunpckhbw ymm1, ymm1, ymm1 // high 4
+ vpunpcklbw ymm2, ymm3, ymm5 // low 4
+ vpunpckhbw ymm3, ymm3, ymm5 // high 4
+ vpmulhuw ymm0, ymm0, ymm2 // src_argb0 * src_argb1 low 4
+ vpmulhuw ymm1, ymm1, ymm3 // src_argb0 * src_argb1 high 4
+ vpackuswb ymm0, ymm0, ymm1
+ vmovdqu [edx], ymm0
+ lea edx, [edx + 32]
+ sub ecx, 8
+ jg convertloop
+
+ pop esi
+ vzeroupper
+ ret
+ }
+}
+#endif // HAS_ARGBMULTIPLYROW_AVX2
+
+#ifdef HAS_ARGBADDROW_AVX2
+// Add 2 rows of ARGB pixels together, 8 pixels at a time.
+__declspec(naked) void ARGBAddRow_AVX2(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
+ int width) {
+ __asm {
+ push esi
+ mov eax, [esp + 4 + 4] // src_argb0
+ mov esi, [esp + 4 + 8] // src_argb1
+ mov edx, [esp + 4 + 12] // dst_argb
+ mov ecx, [esp + 4 + 16] // width
+
+ convertloop:
+ vmovdqu ymm0, [eax] // read 8 pixels from src_argb0
+ lea eax, [eax + 32]
+ vpaddusb ymm0, ymm0, [esi] // add 8 pixels from src_argb1
+ lea esi, [esi + 32]
+ vmovdqu [edx], ymm0
+ lea edx, [edx + 32]
+ sub ecx, 8
+ jg convertloop
+
+ pop esi
+ vzeroupper
+ ret
+ }
+}
+#endif // HAS_ARGBADDROW_AVX2
+
+#ifdef HAS_ARGBSUBTRACTROW_AVX2
+// Subtract 2 rows of ARGB pixels together, 8 pixels at a time.
+__declspec(naked) void ARGBSubtractRow_AVX2(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
+ int width) {
+ __asm {
+ push esi
+ mov eax, [esp + 4 + 4] // src_argb0
+ mov esi, [esp + 4 + 8] // src_argb1
+ mov edx, [esp + 4 + 12] // dst_argb
+ mov ecx, [esp + 4 + 16] // width
+
+ convertloop:
+ vmovdqu ymm0, [eax] // read 8 pixels from src_argb0
+ lea eax, [eax + 32]
+ vpsubusb ymm0, ymm0, [esi] // src_argb0 - src_argb1
+ lea esi, [esi + 32]
+ vmovdqu [edx], ymm0
+ lea edx, [edx + 32]
+ sub ecx, 8
+ jg convertloop
+
+ pop esi
+ vzeroupper
+ ret
+ }
+}
+#endif // HAS_ARGBSUBTRACTROW_AVX2
+
+#ifdef HAS_SOBELXROW_SSE2
+// SobelX as a matrix is
+// -1 0 1
+// -2 0 2
+// -1 0 1
+__declspec(naked) void SobelXRow_SSE2(const uint8_t* src_y0,
+ const uint8_t* src_y1,
+ const uint8_t* src_y2,
+ uint8_t* dst_sobelx,
+ int width) {
+ __asm {
+ push esi
+ push edi
+ mov eax, [esp + 8 + 4] // src_y0
+ mov esi, [esp + 8 + 8] // src_y1
+ mov edi, [esp + 8 + 12] // src_y2
+ mov edx, [esp + 8 + 16] // dst_sobelx
+ mov ecx, [esp + 8 + 20] // width
+ sub esi, eax
+ sub edi, eax
+ sub edx, eax
+ pxor xmm5, xmm5 // constant 0
+
+ convertloop:
+ movq xmm0, qword ptr [eax] // read 8 pixels from src_y0[0]
+ movq xmm1, qword ptr [eax + 2] // read 8 pixels from src_y0[2]
+ punpcklbw xmm0, xmm5
+ punpcklbw xmm1, xmm5
+ psubw xmm0, xmm1
+ movq xmm1, qword ptr [eax + esi] // read 8 pixels from src_y1[0]
+ movq xmm2, qword ptr [eax + esi + 2] // read 8 pixels from src_y1[2]
+ punpcklbw xmm1, xmm5
+ punpcklbw xmm2, xmm5
+ psubw xmm1, xmm2
+ movq xmm2, qword ptr [eax + edi] // read 8 pixels from src_y2[0]
+ movq xmm3, qword ptr [eax + edi + 2] // read 8 pixels from src_y2[2]
+ punpcklbw xmm2, xmm5
+ punpcklbw xmm3, xmm5
+ psubw xmm2, xmm3
+ paddw xmm0, xmm2
+ paddw xmm0, xmm1
+ paddw xmm0, xmm1
+ pxor xmm1, xmm1 // abs = max(xmm0, -xmm0). SSSE3 could use pabsw
+ psubw xmm1, xmm0
+ pmaxsw xmm0, xmm1
+ packuswb xmm0, xmm0
+ movq qword ptr [eax + edx], xmm0
+ lea eax, [eax + 8]
+ sub ecx, 8
+ jg convertloop
+
+ pop edi
+ pop esi
+ ret
+ }
+}
+#endif // HAS_SOBELXROW_SSE2
+
+#ifdef HAS_SOBELYROW_SSE2
+// SobelY as a matrix is
+// -1 -2 -1
+// 0 0 0
+// 1 2 1
+__declspec(naked) void SobelYRow_SSE2(const uint8_t* src_y0,
+ const uint8_t* src_y1,
+ uint8_t* dst_sobely,
+ int width) {
+ __asm {
+ push esi
+ mov eax, [esp + 4 + 4] // src_y0
+ mov esi, [esp + 4 + 8] // src_y1
+ mov edx, [esp + 4 + 12] // dst_sobely
+ mov ecx, [esp + 4 + 16] // width
+ sub esi, eax
+ sub edx, eax
+ pxor xmm5, xmm5 // constant 0
+
+ convertloop:
+ movq xmm0, qword ptr [eax] // read 8 pixels from src_y0[0]
+ movq xmm1, qword ptr [eax + esi] // read 8 pixels from src_y1[0]
+ punpcklbw xmm0, xmm5
+ punpcklbw xmm1, xmm5
+ psubw xmm0, xmm1
+ movq xmm1, qword ptr [eax + 1] // read 8 pixels from src_y0[1]
+ movq xmm2, qword ptr [eax + esi + 1] // read 8 pixels from src_y1[1]
+ punpcklbw xmm1, xmm5
+ punpcklbw xmm2, xmm5
+ psubw xmm1, xmm2
+ movq xmm2, qword ptr [eax + 2] // read 8 pixels from src_y0[2]
+ movq xmm3, qword ptr [eax + esi + 2] // read 8 pixels from src_y1[2]
+ punpcklbw xmm2, xmm5
+ punpcklbw xmm3, xmm5
+ psubw xmm2, xmm3
+ paddw xmm0, xmm2
+ paddw xmm0, xmm1
+ paddw xmm0, xmm1
+ pxor xmm1, xmm1 // abs = max(xmm0, -xmm0). SSSE3 could use pabsw
+ psubw xmm1, xmm0
+ pmaxsw xmm0, xmm1
+ packuswb xmm0, xmm0
+ movq qword ptr [eax + edx], xmm0
+ lea eax, [eax + 8]
+ sub ecx, 8
+ jg convertloop
+
+ pop esi
+ ret
+ }
+}
+#endif // HAS_SOBELYROW_SSE2
+
+#ifdef HAS_SOBELROW_SSE2
+// Adds Sobel X and Sobel Y and stores Sobel into ARGB.
+// A = 255
+// R = Sobel
+// G = Sobel
+// B = Sobel
+__declspec(naked) void SobelRow_SSE2(const uint8_t* src_sobelx,
+ const uint8_t* src_sobely,
+ uint8_t* dst_argb,
+ int width) {
+ __asm {
+ push esi
+ mov eax, [esp + 4 + 4] // src_sobelx
+ mov esi, [esp + 4 + 8] // src_sobely
+ mov edx, [esp + 4 + 12] // dst_argb
+ mov ecx, [esp + 4 + 16] // width
+ sub esi, eax
+ pcmpeqb xmm5, xmm5 // alpha 255
+ pslld xmm5, 24 // 0xff000000
+
+ convertloop:
+ movdqu xmm0, [eax] // read 16 pixels src_sobelx
+ movdqu xmm1, [eax + esi] // read 16 pixels src_sobely
+ lea eax, [eax + 16]
+ paddusb xmm0, xmm1 // sobel = sobelx + sobely
+ movdqa xmm2, xmm0 // GG
+ punpcklbw xmm2, xmm0 // First 8
+ punpckhbw xmm0, xmm0 // Next 8
+ movdqa xmm1, xmm2 // GGGG
+ punpcklwd xmm1, xmm2 // First 4
+ punpckhwd xmm2, xmm2 // Next 4
+ por xmm1, xmm5 // GGGA
+ por xmm2, xmm5
+ movdqa xmm3, xmm0 // GGGG
+ punpcklwd xmm3, xmm0 // Next 4
+ punpckhwd xmm0, xmm0 // Last 4
+ por xmm3, xmm5 // GGGA
+ por xmm0, xmm5
+ movdqu [edx], xmm1
+ movdqu [edx + 16], xmm2
+ movdqu [edx + 32], xmm3
+ movdqu [edx + 48], xmm0
+ lea edx, [edx + 64]
+ sub ecx, 16
+ jg convertloop
+
+ pop esi
+ ret
+ }
+}
+#endif // HAS_SOBELROW_SSE2
+
+#ifdef HAS_SOBELTOPLANEROW_SSE2
+// Adds Sobel X and Sobel Y and stores Sobel into a plane.
+__declspec(naked) void SobelToPlaneRow_SSE2(const uint8_t* src_sobelx,
+ const uint8_t* src_sobely,
+ uint8_t* dst_y,
+ int width) {
+ __asm {
+ push esi
+ mov eax, [esp + 4 + 4] // src_sobelx
+ mov esi, [esp + 4 + 8] // src_sobely
+ mov edx, [esp + 4 + 12] // dst_argb
+ mov ecx, [esp + 4 + 16] // width
+ sub esi, eax
+
+ convertloop:
+ movdqu xmm0, [eax] // read 16 pixels src_sobelx
+ movdqu xmm1, [eax + esi] // read 16 pixels src_sobely
+ lea eax, [eax + 16]
+ paddusb xmm0, xmm1 // sobel = sobelx + sobely
+ movdqu [edx], xmm0
+ lea edx, [edx + 16]
+ sub ecx, 16
+ jg convertloop
+
+ pop esi
+ ret
+ }
+}
+#endif // HAS_SOBELTOPLANEROW_SSE2
+
+#ifdef HAS_SOBELXYROW_SSE2
+// Mixes Sobel X, Sobel Y and Sobel into ARGB.
+// A = 255
+// R = Sobel X
+// G = Sobel
+// B = Sobel Y
+__declspec(naked) void SobelXYRow_SSE2(const uint8_t* src_sobelx,
+ const uint8_t* src_sobely,
+ uint8_t* dst_argb,
+ int width) {
+ __asm {
+ push esi
+ mov eax, [esp + 4 + 4] // src_sobelx
+ mov esi, [esp + 4 + 8] // src_sobely
+ mov edx, [esp + 4 + 12] // dst_argb
+ mov ecx, [esp + 4 + 16] // width
+ sub esi, eax
+ pcmpeqb xmm5, xmm5 // alpha 255
+
+ convertloop:
+ movdqu xmm0, [eax] // read 16 pixels src_sobelx
+ movdqu xmm1, [eax + esi] // read 16 pixels src_sobely
+ lea eax, [eax + 16]
+ movdqa xmm2, xmm0
+ paddusb xmm2, xmm1 // sobel = sobelx + sobely
+ movdqa xmm3, xmm0 // XA
+ punpcklbw xmm3, xmm5
+ punpckhbw xmm0, xmm5
+ movdqa xmm4, xmm1 // YS
+ punpcklbw xmm4, xmm2
+ punpckhbw xmm1, xmm2
+ movdqa xmm6, xmm4 // YSXA
+ punpcklwd xmm6, xmm3 // First 4
+ punpckhwd xmm4, xmm3 // Next 4
+ movdqa xmm7, xmm1 // YSXA
+ punpcklwd xmm7, xmm0 // Next 4
+ punpckhwd xmm1, xmm0 // Last 4
+ movdqu [edx], xmm6
+ movdqu [edx + 16], xmm4
+ movdqu [edx + 32], xmm7
+ movdqu [edx + 48], xmm1
+ lea edx, [edx + 64]
+ sub ecx, 16
+ jg convertloop
+
+ pop esi
+ ret
+ }
+}
+#endif // HAS_SOBELXYROW_SSE2
+
+#ifdef HAS_CUMULATIVESUMTOAVERAGEROW_SSE2
+// Consider float CumulativeSum.
+// Consider calling CumulativeSum one row at time as needed.
+// Consider circular CumulativeSum buffer of radius * 2 + 1 height.
+// Convert cumulative sum for an area to an average for 1 pixel.
+// topleft is pointer to top left of CumulativeSum buffer for area.
+// botleft is pointer to bottom left of CumulativeSum buffer.
+// width is offset from left to right of area in CumulativeSum buffer measured
+// in number of ints.
+// area is the number of pixels in the area being averaged.
+// dst points to pixel to store result to.
+// count is number of averaged pixels to produce.
+// Does 4 pixels at a time.
+// This function requires alignment on accumulation buffer pointers.
+void CumulativeSumToAverageRow_SSE2(const int32_t* topleft,
+ const int32_t* botleft,
+ int width,
+ int area,
+ uint8_t* dst,
+ int count) {
+ __asm {
+ mov eax, topleft // eax topleft
+ mov esi, botleft // esi botleft
+ mov edx, width
+ movd xmm5, area
+ mov edi, dst
+ mov ecx, count
+ cvtdq2ps xmm5, xmm5
+ rcpss xmm4, xmm5 // 1.0f / area
+ pshufd xmm4, xmm4, 0
+ sub ecx, 4
+ jl l4b
+
+ cmp area, 128 // 128 pixels will not overflow 15 bits.
+ ja l4
+
+ pshufd xmm5, xmm5, 0 // area
+ pcmpeqb xmm6, xmm6 // constant of 65536.0 - 1 = 65535.0
+ psrld xmm6, 16
+ cvtdq2ps xmm6, xmm6
+ addps xmm5, xmm6 // (65536.0 + area - 1)
+ mulps xmm5, xmm4 // (65536.0 + area - 1) * 1 / area
+ cvtps2dq xmm5, xmm5 // 0.16 fixed point
+ packssdw xmm5, xmm5 // 16 bit shorts
+
+ // 4 pixel loop small blocks.
+ s4:
+ // top left
+ movdqu xmm0, [eax]
+ movdqu xmm1, [eax + 16]
+ movdqu xmm2, [eax + 32]
+ movdqu xmm3, [eax + 48]
+
+ // - top right
+ psubd xmm0, [eax + edx * 4]
+ psubd xmm1, [eax + edx * 4 + 16]
+ psubd xmm2, [eax + edx * 4 + 32]
+ psubd xmm3, [eax + edx * 4 + 48]
+ lea eax, [eax + 64]
+
+ // - bottom left
+ psubd xmm0, [esi]
+ psubd xmm1, [esi + 16]
+ psubd xmm2, [esi + 32]
+ psubd xmm3, [esi + 48]
+
+ // + bottom right
+ paddd xmm0, [esi + edx * 4]
+ paddd xmm1, [esi + edx * 4 + 16]
+ paddd xmm2, [esi + edx * 4 + 32]
+ paddd xmm3, [esi + edx * 4 + 48]
+ lea esi, [esi + 64]
+
+ packssdw xmm0, xmm1 // pack 4 pixels into 2 registers
+ packssdw xmm2, xmm3
+
+ pmulhuw xmm0, xmm5
+ pmulhuw xmm2, xmm5
+
+ packuswb xmm0, xmm2
+ movdqu [edi], xmm0
+ lea edi, [edi + 16]
+ sub ecx, 4
+ jge s4
+
+ jmp l4b
+
+ // 4 pixel loop
+ l4:
+ // top left
+ movdqu xmm0, [eax]
+ movdqu xmm1, [eax + 16]
+ movdqu xmm2, [eax + 32]
+ movdqu xmm3, [eax + 48]
+
+ // - top right
+ psubd xmm0, [eax + edx * 4]
+ psubd xmm1, [eax + edx * 4 + 16]
+ psubd xmm2, [eax + edx * 4 + 32]
+ psubd xmm3, [eax + edx * 4 + 48]
+ lea eax, [eax + 64]
+
+ // - bottom left
+ psubd xmm0, [esi]
+ psubd xmm1, [esi + 16]
+ psubd xmm2, [esi + 32]
+ psubd xmm3, [esi + 48]
+
+ // + bottom right
+ paddd xmm0, [esi + edx * 4]
+ paddd xmm1, [esi + edx * 4 + 16]
+ paddd xmm2, [esi + edx * 4 + 32]
+ paddd xmm3, [esi + edx * 4 + 48]
+ lea esi, [esi + 64]
+
+ cvtdq2ps xmm0, xmm0 // Average = Sum * 1 / Area
+ cvtdq2ps xmm1, xmm1
+ mulps xmm0, xmm4
+ mulps xmm1, xmm4
+ cvtdq2ps xmm2, xmm2
+ cvtdq2ps xmm3, xmm3
+ mulps xmm2, xmm4
+ mulps xmm3, xmm4
+ cvtps2dq xmm0, xmm0
+ cvtps2dq xmm1, xmm1
+ cvtps2dq xmm2, xmm2
+ cvtps2dq xmm3, xmm3
+ packssdw xmm0, xmm1
+ packssdw xmm2, xmm3
+ packuswb xmm0, xmm2
+ movdqu [edi], xmm0
+ lea edi, [edi + 16]
+ sub ecx, 4
+ jge l4
+
+ l4b:
+ add ecx, 4 - 1
+ jl l1b
+
+ // 1 pixel loop
+ l1:
+ movdqu xmm0, [eax]
+ psubd xmm0, [eax + edx * 4]
+ lea eax, [eax + 16]
+ psubd xmm0, [esi]
+ paddd xmm0, [esi + edx * 4]
+ lea esi, [esi + 16]
+ cvtdq2ps xmm0, xmm0
+ mulps xmm0, xmm4
+ cvtps2dq xmm0, xmm0
+ packssdw xmm0, xmm0
+ packuswb xmm0, xmm0
+ movd dword ptr [edi], xmm0
+ lea edi, [edi + 4]
+ sub ecx, 1
+ jge l1
+ l1b:
+ }
+}
+#endif // HAS_CUMULATIVESUMTOAVERAGEROW_SSE2
+
+#ifdef HAS_COMPUTECUMULATIVESUMROW_SSE2
+// Creates a table of cumulative sums where each value is a sum of all values
+// above and to the left of the value.
+void ComputeCumulativeSumRow_SSE2(const uint8_t* row,
+ int32_t* cumsum,
+ const int32_t* previous_cumsum,
+ int width) {
+ __asm {
+ mov eax, row
+ mov edx, cumsum
+ mov esi, previous_cumsum
+ mov ecx, width
+ pxor xmm0, xmm0
+ pxor xmm1, xmm1
+
+ sub ecx, 4
+ jl l4b
+ test edx, 15
+ jne l4b
+
+ // 4 pixel loop
+ l4:
+ movdqu xmm2, [eax] // 4 argb pixels 16 bytes.
+ lea eax, [eax + 16]
+ movdqa xmm4, xmm2
+
+ punpcklbw xmm2, xmm1
+ movdqa xmm3, xmm2
+ punpcklwd xmm2, xmm1
+ punpckhwd xmm3, xmm1
+
+ punpckhbw xmm4, xmm1
+ movdqa xmm5, xmm4
+ punpcklwd xmm4, xmm1
+ punpckhwd xmm5, xmm1
+
+ paddd xmm0, xmm2
+ movdqu xmm2, [esi] // previous row above.
+ paddd xmm2, xmm0
+
+ paddd xmm0, xmm3
+ movdqu xmm3, [esi + 16]
+ paddd xmm3, xmm0
+
+ paddd xmm0, xmm4
+ movdqu xmm4, [esi + 32]
+ paddd xmm4, xmm0
+
+ paddd xmm0, xmm5
+ movdqu xmm5, [esi + 48]
+ lea esi, [esi + 64]
+ paddd xmm5, xmm0
+
+ movdqu [edx], xmm2
+ movdqu [edx + 16], xmm3
+ movdqu [edx + 32], xmm4
+ movdqu [edx + 48], xmm5
+
+ lea edx, [edx + 64]
+ sub ecx, 4
+ jge l4
+
+ l4b:
+ add ecx, 4 - 1
+ jl l1b
+
+ // 1 pixel loop
+ l1:
+ movd xmm2, dword ptr [eax] // 1 argb pixel 4 bytes.
+ lea eax, [eax + 4]
+ punpcklbw xmm2, xmm1
+ punpcklwd xmm2, xmm1
+ paddd xmm0, xmm2
+ movdqu xmm2, [esi]
+ lea esi, [esi + 16]
+ paddd xmm2, xmm0
+ movdqu [edx], xmm2
+ lea edx, [edx + 16]
+ sub ecx, 1
+ jge l1
+
+ l1b:
+ }
+}
+#endif // HAS_COMPUTECUMULATIVESUMROW_SSE2
+
+#ifdef HAS_ARGBAFFINEROW_SSE2
+// Copy ARGB pixels from source image with slope to a row of destination.
+__declspec(naked) LIBYUV_API void ARGBAffineRow_SSE2(const uint8_t* src_argb,
+ int src_argb_stride,
+ uint8_t* dst_argb,
+ const float* uv_dudv,
+ int width) {
+ __asm {
+ push esi
+ push edi
+ mov eax, [esp + 12] // src_argb
+ mov esi, [esp + 16] // stride
+ mov edx, [esp + 20] // dst_argb
+ mov ecx, [esp + 24] // pointer to uv_dudv
+ movq xmm2, qword ptr [ecx] // uv
+ movq xmm7, qword ptr [ecx + 8] // dudv
+ mov ecx, [esp + 28] // width
+ shl esi, 16 // 4, stride
+ add esi, 4
+ movd xmm5, esi
+ sub ecx, 4
+ jl l4b
+
+ // setup for 4 pixel loop
+ pshufd xmm7, xmm7, 0x44 // dup dudv
+ pshufd xmm5, xmm5, 0 // dup 4, stride
+ movdqa xmm0, xmm2 // x0, y0, x1, y1
+ addps xmm0, xmm7
+ movlhps xmm2, xmm0
+ movdqa xmm4, xmm7
+ addps xmm4, xmm4 // dudv *= 2
+ movdqa xmm3, xmm2 // x2, y2, x3, y3
+ addps xmm3, xmm4
+ addps xmm4, xmm4 // dudv *= 4
+
+ // 4 pixel loop
+ l4:
+ cvttps2dq xmm0, xmm2 // x, y float to int first 2
+ cvttps2dq xmm1, xmm3 // x, y float to int next 2
+ packssdw xmm0, xmm1 // x, y as 8 shorts
+ pmaddwd xmm0, xmm5 // offsets = x * 4 + y * stride.
+ movd esi, xmm0
+ pshufd xmm0, xmm0, 0x39 // shift right
+ movd edi, xmm0
+ pshufd xmm0, xmm0, 0x39 // shift right
+ movd xmm1, [eax + esi] // read pixel 0
+ movd xmm6, [eax + edi] // read pixel 1
+ punpckldq xmm1, xmm6 // combine pixel 0 and 1
+ addps xmm2, xmm4 // x, y += dx, dy first 2
+ movq qword ptr [edx], xmm1
+ movd esi, xmm0
+ pshufd xmm0, xmm0, 0x39 // shift right
+ movd edi, xmm0
+ movd xmm6, [eax + esi] // read pixel 2
+ movd xmm0, [eax + edi] // read pixel 3
+ punpckldq xmm6, xmm0 // combine pixel 2 and 3
+ addps xmm3, xmm4 // x, y += dx, dy next 2
+ movq qword ptr 8[edx], xmm6
+ lea edx, [edx + 16]
+ sub ecx, 4
+ jge l4
+
+ l4b:
+ add ecx, 4 - 1
+ jl l1b
+
+ // 1 pixel loop
+ l1:
+ cvttps2dq xmm0, xmm2 // x, y float to int
+ packssdw xmm0, xmm0 // x, y as shorts
+ pmaddwd xmm0, xmm5 // offset = x * 4 + y * stride
+ addps xmm2, xmm7 // x, y += dx, dy
+ movd esi, xmm0
+ movd xmm0, [eax + esi] // copy a pixel
+ movd [edx], xmm0
+ lea edx, [edx + 4]
+ sub ecx, 1
+ jge l1
+ l1b:
+ pop edi
+ pop esi
+ ret
+ }
+}
+#endif // HAS_ARGBAFFINEROW_SSE2
+
+#ifdef HAS_INTERPOLATEROW_AVX2
+// Bilinear filter 32x2 -> 32x1
+__declspec(naked) void InterpolateRow_AVX2(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ int dst_width,
+ int source_y_fraction) {
+ __asm {
+ push esi
+ push edi
+ mov edi, [esp + 8 + 4] // dst_ptr
+ mov esi, [esp + 8 + 8] // src_ptr
+ mov edx, [esp + 8 + 12] // src_stride
+ mov ecx, [esp + 8 + 16] // dst_width
+ mov eax, [esp + 8 + 20] // source_y_fraction (0..255)
+ // Dispatch to specialized filters if applicable.
+ cmp eax, 0
+ je xloop100 // 0 / 256. Blend 100 / 0.
+ sub edi, esi
+ cmp eax, 128
+ je xloop50 // 128 /256 is 0.50. Blend 50 / 50.
+
+ vmovd xmm0, eax // high fraction 0..255
+ neg eax
+ add eax, 256
+ vmovd xmm5, eax // low fraction 256..1
+ vpunpcklbw xmm5, xmm5, xmm0
+ vpunpcklwd xmm5, xmm5, xmm5
+ vbroadcastss ymm5, xmm5
+
+ mov eax, 0x80808080 // 128b for bias and rounding.
+ vmovd xmm4, eax
+ vbroadcastss ymm4, xmm4
+
+ xloop:
+ vmovdqu ymm0, [esi]
+ vmovdqu ymm2, [esi + edx]
+ vpunpckhbw ymm1, ymm0, ymm2 // mutates
+ vpunpcklbw ymm0, ymm0, ymm2
+ vpsubb ymm1, ymm1, ymm4 // bias to signed image
+ vpsubb ymm0, ymm0, ymm4
+ vpmaddubsw ymm1, ymm5, ymm1
+ vpmaddubsw ymm0, ymm5, ymm0
+ vpaddw ymm1, ymm1, ymm4 // unbias and round
+ vpaddw ymm0, ymm0, ymm4
+ vpsrlw ymm1, ymm1, 8
+ vpsrlw ymm0, ymm0, 8
+ vpackuswb ymm0, ymm0, ymm1 // unmutates
+ vmovdqu [esi + edi], ymm0
+ lea esi, [esi + 32]
+ sub ecx, 32
+ jg xloop
+ jmp xloop99
+
+ // Blend 50 / 50.
+ xloop50:
+ vmovdqu ymm0, [esi]
+ vpavgb ymm0, ymm0, [esi + edx]
+ vmovdqu [esi + edi], ymm0
+ lea esi, [esi + 32]
+ sub ecx, 32
+ jg xloop50
+ jmp xloop99
+
+ // Blend 100 / 0 - Copy row unchanged.
+ xloop100:
+ rep movsb
+
+ xloop99:
+ pop edi
+ pop esi
+ vzeroupper
+ ret
+ }
+}
+#endif // HAS_INTERPOLATEROW_AVX2
+
+// Bilinear filter 16x2 -> 16x1
+// TODO(fbarchard): Consider allowing 256 using memcpy.
+__declspec(naked) void InterpolateRow_SSSE3(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ int dst_width,
+ int source_y_fraction) {
+ __asm {
+ push esi
+ push edi
+
+ mov edi, [esp + 8 + 4] // dst_ptr
+ mov esi, [esp + 8 + 8] // src_ptr
+ mov edx, [esp + 8 + 12] // src_stride
+ mov ecx, [esp + 8 + 16] // dst_width
+ mov eax, [esp + 8 + 20] // source_y_fraction (0..255)
+ sub edi, esi
+ // Dispatch to specialized filters if applicable.
+ cmp eax, 0
+ je xloop100 // 0 /256. Blend 100 / 0.
+ cmp eax, 128
+ je xloop50 // 128 / 256 is 0.50. Blend 50 / 50.
+
+ movd xmm0, eax // high fraction 0..255
+ neg eax
+ add eax, 256
+ movd xmm5, eax // low fraction 255..1
+ punpcklbw xmm5, xmm0
+ punpcklwd xmm5, xmm5
+ pshufd xmm5, xmm5, 0
+ mov eax, 0x80808080 // 128 for biasing image to signed.
+ movd xmm4, eax
+ pshufd xmm4, xmm4, 0x00
+
+ xloop:
+ movdqu xmm0, [esi]
+ movdqu xmm2, [esi + edx]
+ movdqu xmm1, xmm0
+ punpcklbw xmm0, xmm2
+ punpckhbw xmm1, xmm2
+ psubb xmm0, xmm4 // bias image by -128
+ psubb xmm1, xmm4
+ movdqa xmm2, xmm5
+ movdqa xmm3, xmm5
+ pmaddubsw xmm2, xmm0
+ pmaddubsw xmm3, xmm1
+ paddw xmm2, xmm4
+ paddw xmm3, xmm4
+ psrlw xmm2, 8
+ psrlw xmm3, 8
+ packuswb xmm2, xmm3
+ movdqu [esi + edi], xmm2
+ lea esi, [esi + 16]
+ sub ecx, 16
+ jg xloop
+ jmp xloop99
+
+ // Blend 50 / 50.
+ xloop50:
+ movdqu xmm0, [esi]
+ movdqu xmm1, [esi + edx]
+ pavgb xmm0, xmm1
+ movdqu [esi + edi], xmm0
+ lea esi, [esi + 16]
+ sub ecx, 16
+ jg xloop50
+ jmp xloop99
+
+ // Blend 100 / 0 - Copy row unchanged.
+ xloop100:
+ movdqu xmm0, [esi]
+ movdqu [esi + edi], xmm0
+ lea esi, [esi + 16]
+ sub ecx, 16
+ jg xloop100
+
+ xloop99:
+ pop edi
+ pop esi
+ ret
+ }
+}
+
+// For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA.
+__declspec(naked) void ARGBShuffleRow_SSSE3(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ const uint8_t* shuffler,
+ int width) {
+ __asm {
+ mov eax, [esp + 4] // src_argb
+ mov edx, [esp + 8] // dst_argb
+ mov ecx, [esp + 12] // shuffler
+ movdqu xmm5, [ecx]
+ mov ecx, [esp + 16] // width
+
+ wloop:
+ movdqu xmm0, [eax]
+ movdqu xmm1, [eax + 16]
+ lea eax, [eax + 32]
+ pshufb xmm0, xmm5
+ pshufb xmm1, xmm5
+ movdqu [edx], xmm0
+ movdqu [edx + 16], xmm1
+ lea edx, [edx + 32]
+ sub ecx, 8
+ jg wloop
+ ret
+ }
+}
+
+#ifdef HAS_ARGBSHUFFLEROW_AVX2
+__declspec(naked) void ARGBShuffleRow_AVX2(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ const uint8_t* shuffler,
+ int width) {
+ __asm {
+ mov eax, [esp + 4] // src_argb
+ mov edx, [esp + 8] // dst_argb
+ mov ecx, [esp + 12] // shuffler
+ vbroadcastf128 ymm5, [ecx] // same shuffle in high as low.
+ mov ecx, [esp + 16] // width
+
+ wloop:
+ vmovdqu ymm0, [eax]
+ vmovdqu ymm1, [eax + 32]
+ lea eax, [eax + 64]
+ vpshufb ymm0, ymm0, ymm5
+ vpshufb ymm1, ymm1, ymm5
+ vmovdqu [edx], ymm0
+ vmovdqu [edx + 32], ymm1
+ lea edx, [edx + 64]
+ sub ecx, 16
+ jg wloop
+
+ vzeroupper
+ ret
+ }
+}
+#endif // HAS_ARGBSHUFFLEROW_AVX2
+
+// YUY2 - Macro-pixel = 2 image pixels
+// Y0U0Y1V0....Y2U2Y3V2...Y4U4Y5V4....
+
+// UYVY - Macro-pixel = 2 image pixels
+// U0Y0V0Y1
+
+__declspec(naked) void I422ToYUY2Row_SSE2(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_frame,
+ int width) {
+ __asm {
+ push esi
+ push edi
+ mov eax, [esp + 8 + 4] // src_y
+ mov esi, [esp + 8 + 8] // src_u
+ mov edx, [esp + 8 + 12] // src_v
+ mov edi, [esp + 8 + 16] // dst_frame
+ mov ecx, [esp + 8 + 20] // width
+ sub edx, esi
+
+ convertloop:
+ movq xmm2, qword ptr [esi] // U
+ movq xmm3, qword ptr [esi + edx] // V
+ lea esi, [esi + 8]
+ punpcklbw xmm2, xmm3 // UV
+ movdqu xmm0, [eax] // Y
+ lea eax, [eax + 16]
+ movdqa xmm1, xmm0
+ punpcklbw xmm0, xmm2 // YUYV
+ punpckhbw xmm1, xmm2
+ movdqu [edi], xmm0
+ movdqu [edi + 16], xmm1
+ lea edi, [edi + 32]
+ sub ecx, 16
+ jg convertloop
+
+ pop edi
+ pop esi
+ ret
+ }
+}
+
+__declspec(naked) void I422ToUYVYRow_SSE2(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_frame,
+ int width) {
+ __asm {
+ push esi
+ push edi
+ mov eax, [esp + 8 + 4] // src_y
+ mov esi, [esp + 8 + 8] // src_u
+ mov edx, [esp + 8 + 12] // src_v
+ mov edi, [esp + 8 + 16] // dst_frame
+ mov ecx, [esp + 8 + 20] // width
+ sub edx, esi
+
+ convertloop:
+ movq xmm2, qword ptr [esi] // U
+ movq xmm3, qword ptr [esi + edx] // V
+ lea esi, [esi + 8]
+ punpcklbw xmm2, xmm3 // UV
+ movdqu xmm0, [eax] // Y
+ movdqa xmm1, xmm2
+ lea eax, [eax + 16]
+ punpcklbw xmm1, xmm0 // UYVY
+ punpckhbw xmm2, xmm0
+ movdqu [edi], xmm1
+ movdqu [edi + 16], xmm2
+ lea edi, [edi + 32]
+ sub ecx, 16
+ jg convertloop
+
+ pop edi
+ pop esi
+ ret
+ }
+}
+
+#ifdef HAS_ARGBPOLYNOMIALROW_SSE2
+__declspec(naked) void ARGBPolynomialRow_SSE2(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ const float* poly,
+ int width) {
+ __asm {
+ push esi
+ mov eax, [esp + 4 + 4] /* src_argb */
+ mov edx, [esp + 4 + 8] /* dst_argb */
+ mov esi, [esp + 4 + 12] /* poly */
+ mov ecx, [esp + 4 + 16] /* width */
+ pxor xmm3, xmm3 // 0 constant for zero extending bytes to ints.
+
+ // 2 pixel loop.
+ convertloop:
+ // pmovzxbd xmm0, dword ptr [eax] // BGRA pixel
+ // pmovzxbd xmm4, dword ptr [eax + 4] // BGRA pixel
+ movq xmm0, qword ptr [eax] // BGRABGRA
+ lea eax, [eax + 8]
+ punpcklbw xmm0, xmm3
+ movdqa xmm4, xmm0
+ punpcklwd xmm0, xmm3 // pixel 0
+ punpckhwd xmm4, xmm3 // pixel 1
+ cvtdq2ps xmm0, xmm0 // 4 floats
+ cvtdq2ps xmm4, xmm4
+ movdqa xmm1, xmm0 // X
+ movdqa xmm5, xmm4
+ mulps xmm0, [esi + 16] // C1 * X
+ mulps xmm4, [esi + 16]
+ addps xmm0, [esi] // result = C0 + C1 * X
+ addps xmm4, [esi]
+ movdqa xmm2, xmm1
+ movdqa xmm6, xmm5
+ mulps xmm2, xmm1 // X * X
+ mulps xmm6, xmm5
+ mulps xmm1, xmm2 // X * X * X
+ mulps xmm5, xmm6
+ mulps xmm2, [esi + 32] // C2 * X * X
+ mulps xmm6, [esi + 32]
+ mulps xmm1, [esi + 48] // C3 * X * X * X
+ mulps xmm5, [esi + 48]
+ addps xmm0, xmm2 // result += C2 * X * X
+ addps xmm4, xmm6
+ addps xmm0, xmm1 // result += C3 * X * X * X
+ addps xmm4, xmm5
+ cvttps2dq xmm0, xmm0
+ cvttps2dq xmm4, xmm4
+ packuswb xmm0, xmm4
+ packuswb xmm0, xmm0
+ movq qword ptr [edx], xmm0
+ lea edx, [edx + 8]
+ sub ecx, 2
+ jg convertloop
+ pop esi
+ ret
+ }
+}
+#endif // HAS_ARGBPOLYNOMIALROW_SSE2
+
+#ifdef HAS_ARGBPOLYNOMIALROW_AVX2
+__declspec(naked) void ARGBPolynomialRow_AVX2(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ const float* poly,
+ int width) {
+ __asm {
+ mov eax, [esp + 4] /* src_argb */
+ mov edx, [esp + 8] /* dst_argb */
+ mov ecx, [esp + 12] /* poly */
+ vbroadcastf128 ymm4, [ecx] // C0
+ vbroadcastf128 ymm5, [ecx + 16] // C1
+ vbroadcastf128 ymm6, [ecx + 32] // C2
+ vbroadcastf128 ymm7, [ecx + 48] // C3
+ mov ecx, [esp + 16] /* width */
+
+ // 2 pixel loop.
+ convertloop:
+ vpmovzxbd ymm0, qword ptr [eax] // 2 BGRA pixels
+ lea eax, [eax + 8]
+ vcvtdq2ps ymm0, ymm0 // X 8 floats
+ vmulps ymm2, ymm0, ymm0 // X * X
+ vmulps ymm3, ymm0, ymm7 // C3 * X
+ vfmadd132ps ymm0, ymm4, ymm5 // result = C0 + C1 * X
+ vfmadd231ps ymm0, ymm2, ymm6 // result += C2 * X * X
+ vfmadd231ps ymm0, ymm2, ymm3 // result += C3 * X * X * X
+ vcvttps2dq ymm0, ymm0
+ vpackusdw ymm0, ymm0, ymm0 // b0g0r0a0_00000000_b0g0r0a0_00000000
+ vpermq ymm0, ymm0, 0xd8 // b0g0r0a0_b0g0r0a0_00000000_00000000
+ vpackuswb xmm0, xmm0, xmm0 // bgrabgra_00000000_00000000_00000000
+ vmovq qword ptr [edx], xmm0
+ lea edx, [edx + 8]
+ sub ecx, 2
+ jg convertloop
+ vzeroupper
+ ret
+ }
+}
+#endif // HAS_ARGBPOLYNOMIALROW_AVX2
+
+#ifdef HAS_HALFFLOATROW_SSE2
+static float kExpBias = 1.9259299444e-34f;
+__declspec(naked) void HalfFloatRow_SSE2(const uint16_t* src,
+ uint16_t* dst,
+ float scale,
+ int width) {
+ __asm {
+ mov eax, [esp + 4] /* src */
+ mov edx, [esp + 8] /* dst */
+ movd xmm4, dword ptr [esp + 12] /* scale */
+ mov ecx, [esp + 16] /* width */
+ mulss xmm4, kExpBias
+ pshufd xmm4, xmm4, 0
+ pxor xmm5, xmm5
+ sub edx, eax
+
+ // 8 pixel loop.
+ convertloop:
+ movdqu xmm2, xmmword ptr [eax] // 8 shorts
+ add eax, 16
+ movdqa xmm3, xmm2
+ punpcklwd xmm2, xmm5
+ cvtdq2ps xmm2, xmm2 // convert 8 ints to floats
+ punpckhwd xmm3, xmm5
+ cvtdq2ps xmm3, xmm3
+ mulps xmm2, xmm4
+ mulps xmm3, xmm4
+ psrld xmm2, 13
+ psrld xmm3, 13
+ packssdw xmm2, xmm3
+ movdqu [eax + edx - 16], xmm2
+ sub ecx, 8
+ jg convertloop
+ ret
+ }
+}
+#endif // HAS_HALFFLOATROW_SSE2
+
+#ifdef HAS_HALFFLOATROW_AVX2
+__declspec(naked) void HalfFloatRow_AVX2(const uint16_t* src,
+ uint16_t* dst,
+ float scale,
+ int width) {
+ __asm {
+ mov eax, [esp + 4] /* src */
+ mov edx, [esp + 8] /* dst */
+ movd xmm4, dword ptr [esp + 12] /* scale */
+ mov ecx, [esp + 16] /* width */
+
+ vmulss xmm4, xmm4, kExpBias
+ vbroadcastss ymm4, xmm4
+ vpxor ymm5, ymm5, ymm5
+ sub edx, eax
+
+ // 16 pixel loop.
+ convertloop:
+ vmovdqu ymm2, [eax] // 16 shorts
+ add eax, 32
+ vpunpckhwd ymm3, ymm2, ymm5 // convert 16 shorts to 16 ints
+ vpunpcklwd ymm2, ymm2, ymm5
+ vcvtdq2ps ymm3, ymm3 // convert 16 ints to floats
+ vcvtdq2ps ymm2, ymm2
+ vmulps ymm3, ymm3, ymm4 // scale to adjust exponent for 5 bit range.
+ vmulps ymm2, ymm2, ymm4
+ vpsrld ymm3, ymm3, 13 // float convert to 8 half floats truncate
+ vpsrld ymm2, ymm2, 13
+ vpackssdw ymm2, ymm2, ymm3
+ vmovdqu [eax + edx - 32], ymm2
+ sub ecx, 16
+ jg convertloop
+ vzeroupper
+ ret
+ }
+}
+#endif // HAS_HALFFLOATROW_AVX2
+
+#ifdef HAS_HALFFLOATROW_F16C
+__declspec(naked) void HalfFloatRow_F16C(const uint16_t* src,
+ uint16_t* dst,
+ float scale,
+ int width) {
+ __asm {
+ mov eax, [esp + 4] /* src */
+ mov edx, [esp + 8] /* dst */
+ vbroadcastss ymm4, [esp + 12] /* scale */
+ mov ecx, [esp + 16] /* width */
+ sub edx, eax
+
+ // 16 pixel loop.
+ convertloop:
+ vpmovzxwd ymm2, xmmword ptr [eax] // 8 shorts -> 8 ints
+ vpmovzxwd ymm3, xmmword ptr [eax + 16] // 8 more shorts
+ add eax, 32
+ vcvtdq2ps ymm2, ymm2 // convert 8 ints to floats
+ vcvtdq2ps ymm3, ymm3
+ vmulps ymm2, ymm2, ymm4 // scale to normalized range 0 to 1
+ vmulps ymm3, ymm3, ymm4
+ vcvtps2ph xmm2, ymm2, 3 // float convert to 8 half floats truncate
+ vcvtps2ph xmm3, ymm3, 3
+ vmovdqu [eax + edx + 32], xmm2
+ vmovdqu [eax + edx + 32 + 16], xmm3
+ sub ecx, 16
+ jg convertloop
+ vzeroupper
+ ret
+ }
+}
+#endif // HAS_HALFFLOATROW_F16C
+
+#ifdef HAS_ARGBCOLORTABLEROW_X86
+// Tranform ARGB pixels with color table.
+__declspec(naked) void ARGBColorTableRow_X86(uint8_t* dst_argb,
+ const uint8_t* table_argb,
+ int width) {
+ __asm {
+ push esi
+ mov eax, [esp + 4 + 4] /* dst_argb */
+ mov esi, [esp + 4 + 8] /* table_argb */
+ mov ecx, [esp + 4 + 12] /* width */
+
+ // 1 pixel loop.
+ convertloop:
+ movzx edx, byte ptr [eax]
+ lea eax, [eax + 4]
+ movzx edx, byte ptr [esi + edx * 4]
+ mov byte ptr [eax - 4], dl
+ movzx edx, byte ptr [eax - 4 + 1]
+ movzx edx, byte ptr [esi + edx * 4 + 1]
+ mov byte ptr [eax - 4 + 1], dl
+ movzx edx, byte ptr [eax - 4 + 2]
+ movzx edx, byte ptr [esi + edx * 4 + 2]
+ mov byte ptr [eax - 4 + 2], dl
+ movzx edx, byte ptr [eax - 4 + 3]
+ movzx edx, byte ptr [esi + edx * 4 + 3]
+ mov byte ptr [eax - 4 + 3], dl
+ dec ecx
+ jg convertloop
+ pop esi
+ ret
+ }
+}
+#endif // HAS_ARGBCOLORTABLEROW_X86
+
+#ifdef HAS_RGBCOLORTABLEROW_X86
+// Tranform RGB pixels with color table.
+__declspec(naked) void RGBColorTableRow_X86(uint8_t* dst_argb,
+ const uint8_t* table_argb,
+ int width) {
+ __asm {
+ push esi
+ mov eax, [esp + 4 + 4] /* dst_argb */
+ mov esi, [esp + 4 + 8] /* table_argb */
+ mov ecx, [esp + 4 + 12] /* width */
+
+ // 1 pixel loop.
+ convertloop:
+ movzx edx, byte ptr [eax]
+ lea eax, [eax + 4]
+ movzx edx, byte ptr [esi + edx * 4]
+ mov byte ptr [eax - 4], dl
+ movzx edx, byte ptr [eax - 4 + 1]
+ movzx edx, byte ptr [esi + edx * 4 + 1]
+ mov byte ptr [eax - 4 + 1], dl
+ movzx edx, byte ptr [eax - 4 + 2]
+ movzx edx, byte ptr [esi + edx * 4 + 2]
+ mov byte ptr [eax - 4 + 2], dl
+ dec ecx
+ jg convertloop
+
+ pop esi
+ ret
+ }
+}
+#endif // HAS_RGBCOLORTABLEROW_X86
+
+#ifdef HAS_ARGBLUMACOLORTABLEROW_SSSE3
+// Tranform RGB pixels with luma table.
+__declspec(naked) void ARGBLumaColorTableRow_SSSE3(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ int width,
+ const uint8_t* luma,
+ uint32_t lumacoeff) {
+ __asm {
+ push esi
+ push edi
+ mov eax, [esp + 8 + 4] /* src_argb */
+ mov edi, [esp + 8 + 8] /* dst_argb */
+ mov ecx, [esp + 8 + 12] /* width */
+ movd xmm2, dword ptr [esp + 8 + 16] // luma table
+ movd xmm3, dword ptr [esp + 8 + 20] // lumacoeff
+ pshufd xmm2, xmm2, 0
+ pshufd xmm3, xmm3, 0
+ pcmpeqb xmm4, xmm4 // generate mask 0xff00ff00
+ psllw xmm4, 8
+ pxor xmm5, xmm5
+
+ // 4 pixel loop.
+ convertloop:
+ movdqu xmm0, xmmword ptr [eax] // generate luma ptr
+ pmaddubsw xmm0, xmm3
+ phaddw xmm0, xmm0
+ pand xmm0, xmm4 // mask out low bits
+ punpcklwd xmm0, xmm5
+ paddd xmm0, xmm2 // add table base
+ movd esi, xmm0
+ pshufd xmm0, xmm0, 0x39 // 00111001 to rotate right 32
+
+ movzx edx, byte ptr [eax]
+ movzx edx, byte ptr [esi + edx]
+ mov byte ptr [edi], dl
+ movzx edx, byte ptr [eax + 1]
+ movzx edx, byte ptr [esi + edx]
+ mov byte ptr [edi + 1], dl
+ movzx edx, byte ptr [eax + 2]
+ movzx edx, byte ptr [esi + edx]
+ mov byte ptr [edi + 2], dl
+ movzx edx, byte ptr [eax + 3] // copy alpha.
+ mov byte ptr [edi + 3], dl
+
+ movd esi, xmm0
+ pshufd xmm0, xmm0, 0x39 // 00111001 to rotate right 32
+
+ movzx edx, byte ptr [eax + 4]
+ movzx edx, byte ptr [esi + edx]
+ mov byte ptr [edi + 4], dl
+ movzx edx, byte ptr [eax + 5]
+ movzx edx, byte ptr [esi + edx]
+ mov byte ptr [edi + 5], dl
+ movzx edx, byte ptr [eax + 6]
+ movzx edx, byte ptr [esi + edx]
+ mov byte ptr [edi + 6], dl
+ movzx edx, byte ptr [eax + 7] // copy alpha.
+ mov byte ptr [edi + 7], dl
+
+ movd esi, xmm0
+ pshufd xmm0, xmm0, 0x39 // 00111001 to rotate right 32
+
+ movzx edx, byte ptr [eax + 8]
+ movzx edx, byte ptr [esi + edx]
+ mov byte ptr [edi + 8], dl
+ movzx edx, byte ptr [eax + 9]
+ movzx edx, byte ptr [esi + edx]
+ mov byte ptr [edi + 9], dl
+ movzx edx, byte ptr [eax + 10]
+ movzx edx, byte ptr [esi + edx]
+ mov byte ptr [edi + 10], dl
+ movzx edx, byte ptr [eax + 11] // copy alpha.
+ mov byte ptr [edi + 11], dl
+
+ movd esi, xmm0
+
+ movzx edx, byte ptr [eax + 12]
+ movzx edx, byte ptr [esi + edx]
+ mov byte ptr [edi + 12], dl
+ movzx edx, byte ptr [eax + 13]
+ movzx edx, byte ptr [esi + edx]
+ mov byte ptr [edi + 13], dl
+ movzx edx, byte ptr [eax + 14]
+ movzx edx, byte ptr [esi + edx]
+ mov byte ptr [edi + 14], dl
+ movzx edx, byte ptr [eax + 15] // copy alpha.
+ mov byte ptr [edi + 15], dl
+
+ lea eax, [eax + 16]
+ lea edi, [edi + 16]
+ sub ecx, 4
+ jg convertloop
+
+ pop edi
+ pop esi
+ ret
+ }
+}
+#endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3
+
+#endif // defined(_M_X64)
+
+#ifdef __cplusplus
+} // extern "C"
+} // namespace libyuv
+#endif
+
+#endif // !defined(LIBYUV_DISABLE_X86) && (defined(_M_IX86) || defined(_M_X64))
diff --git a/media/libvpx/libvpx/third_party/libyuv/source/scale.cc b/media/libvpx/libvpx/third_party/libyuv/source/scale.cc
new file mode 100644
index 0000000000..2cfa1c6cb1
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/libyuv/source/scale.cc
@@ -0,0 +1,1741 @@
+/*
+ * Copyright 2011 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/scale.h"
+
+#include <assert.h>
+#include <string.h>
+
+#include "libyuv/cpu_id.h"
+#include "libyuv/planar_functions.h" // For CopyPlane
+#include "libyuv/row.h"
+#include "libyuv/scale_row.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+static __inline int Abs(int v) {
+ return v >= 0 ? v : -v;
+}
+
+#define SUBSAMPLE(v, a, s) (v < 0) ? (-((-v + a) >> s)) : ((v + a) >> s)
+
+// Scale plane, 1/2
+// This is an optimized version for scaling down a plane to 1/2 of
+// its original size.
+
+static void ScalePlaneDown2(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ int src_stride,
+ int dst_stride,
+ const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ enum FilterMode filtering) {
+ int y;
+ void (*ScaleRowDown2)(const uint8_t* src_ptr, ptrdiff_t src_stride,
+ uint8_t* dst_ptr, int dst_width) =
+ filtering == kFilterNone
+ ? ScaleRowDown2_C
+ : (filtering == kFilterLinear ? ScaleRowDown2Linear_C
+ : ScaleRowDown2Box_C);
+ int row_stride = src_stride << 1;
+ (void)src_width;
+ (void)src_height;
+ if (!filtering) {
+ src_ptr += src_stride; // Point to odd rows.
+ src_stride = 0;
+ }
+
+#if defined(HAS_SCALEROWDOWN2_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ScaleRowDown2 =
+ filtering == kFilterNone
+ ? ScaleRowDown2_Any_NEON
+ : (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_NEON
+ : ScaleRowDown2Box_Any_NEON);
+ if (IS_ALIGNED(dst_width, 16)) {
+ ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_NEON
+ : (filtering == kFilterLinear
+ ? ScaleRowDown2Linear_NEON
+ : ScaleRowDown2Box_NEON);
+ }
+ }
+#endif
+#if defined(HAS_SCALEROWDOWN2_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ ScaleRowDown2 =
+ filtering == kFilterNone
+ ? ScaleRowDown2_Any_SSSE3
+ : (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_SSSE3
+ : ScaleRowDown2Box_Any_SSSE3);
+ if (IS_ALIGNED(dst_width, 16)) {
+ ScaleRowDown2 =
+ filtering == kFilterNone
+ ? ScaleRowDown2_SSSE3
+ : (filtering == kFilterLinear ? ScaleRowDown2Linear_SSSE3
+ : ScaleRowDown2Box_SSSE3);
+ }
+ }
+#endif
+#if defined(HAS_SCALEROWDOWN2_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ScaleRowDown2 =
+ filtering == kFilterNone
+ ? ScaleRowDown2_Any_AVX2
+ : (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_AVX2
+ : ScaleRowDown2Box_Any_AVX2);
+ if (IS_ALIGNED(dst_width, 32)) {
+ ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_AVX2
+ : (filtering == kFilterLinear
+ ? ScaleRowDown2Linear_AVX2
+ : ScaleRowDown2Box_AVX2);
+ }
+ }
+#endif
+#if defined(HAS_SCALEROWDOWN2_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ ScaleRowDown2 =
+ filtering == kFilterNone
+ ? ScaleRowDown2_Any_MSA
+ : (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_MSA
+ : ScaleRowDown2Box_Any_MSA);
+ if (IS_ALIGNED(dst_width, 32)) {
+ ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_MSA
+ : (filtering == kFilterLinear
+ ? ScaleRowDown2Linear_MSA
+ : ScaleRowDown2Box_MSA);
+ }
+ }
+#endif
+
+ if (filtering == kFilterLinear) {
+ src_stride = 0;
+ }
+ // TODO(fbarchard): Loop through source height to allow odd height.
+ for (y = 0; y < dst_height; ++y) {
+ ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width);
+ src_ptr += row_stride;
+ dst_ptr += dst_stride;
+ }
+}
+
+static void ScalePlaneDown2_16(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ int src_stride,
+ int dst_stride,
+ const uint16_t* src_ptr,
+ uint16_t* dst_ptr,
+ enum FilterMode filtering) {
+ int y;
+ void (*ScaleRowDown2)(const uint16_t* src_ptr, ptrdiff_t src_stride,
+ uint16_t* dst_ptr, int dst_width) =
+ filtering == kFilterNone
+ ? ScaleRowDown2_16_C
+ : (filtering == kFilterLinear ? ScaleRowDown2Linear_16_C
+ : ScaleRowDown2Box_16_C);
+ int row_stride = src_stride << 1;
+ (void)src_width;
+ (void)src_height;
+ if (!filtering) {
+ src_ptr += src_stride; // Point to odd rows.
+ src_stride = 0;
+ }
+
+#if defined(HAS_SCALEROWDOWN2_16_NEON)
+ if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 16)) {
+ ScaleRowDown2 =
+ filtering ? ScaleRowDown2Box_16_NEON : ScaleRowDown2_16_NEON;
+ }
+#endif
+#if defined(HAS_SCALEROWDOWN2_16_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 16)) {
+ ScaleRowDown2 =
+ filtering == kFilterNone
+ ? ScaleRowDown2_16_SSE2
+ : (filtering == kFilterLinear ? ScaleRowDown2Linear_16_SSE2
+ : ScaleRowDown2Box_16_SSE2);
+ }
+#endif
+
+ if (filtering == kFilterLinear) {
+ src_stride = 0;
+ }
+ // TODO(fbarchard): Loop through source height to allow odd height.
+ for (y = 0; y < dst_height; ++y) {
+ ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width);
+ src_ptr += row_stride;
+ dst_ptr += dst_stride;
+ }
+}
+
+// Scale plane, 1/4
+// This is an optimized version for scaling down a plane to 1/4 of
+// its original size.
+
+static void ScalePlaneDown4(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ int src_stride,
+ int dst_stride,
+ const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ enum FilterMode filtering) {
+ int y;
+ void (*ScaleRowDown4)(const uint8_t* src_ptr, ptrdiff_t src_stride,
+ uint8_t* dst_ptr, int dst_width) =
+ filtering ? ScaleRowDown4Box_C : ScaleRowDown4_C;
+ int row_stride = src_stride << 2;
+ (void)src_width;
+ (void)src_height;
+ if (!filtering) {
+ src_ptr += src_stride * 2; // Point to row 2.
+ src_stride = 0;
+ }
+#if defined(HAS_SCALEROWDOWN4_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ScaleRowDown4 =
+ filtering ? ScaleRowDown4Box_Any_NEON : ScaleRowDown4_Any_NEON;
+ if (IS_ALIGNED(dst_width, 8)) {
+ ScaleRowDown4 = filtering ? ScaleRowDown4Box_NEON : ScaleRowDown4_NEON;
+ }
+ }
+#endif
+#if defined(HAS_SCALEROWDOWN4_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ ScaleRowDown4 =
+ filtering ? ScaleRowDown4Box_Any_SSSE3 : ScaleRowDown4_Any_SSSE3;
+ if (IS_ALIGNED(dst_width, 8)) {
+ ScaleRowDown4 = filtering ? ScaleRowDown4Box_SSSE3 : ScaleRowDown4_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_SCALEROWDOWN4_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ScaleRowDown4 =
+ filtering ? ScaleRowDown4Box_Any_AVX2 : ScaleRowDown4_Any_AVX2;
+ if (IS_ALIGNED(dst_width, 16)) {
+ ScaleRowDown4 = filtering ? ScaleRowDown4Box_AVX2 : ScaleRowDown4_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_SCALEROWDOWN4_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ ScaleRowDown4 =
+ filtering ? ScaleRowDown4Box_Any_MSA : ScaleRowDown4_Any_MSA;
+ if (IS_ALIGNED(dst_width, 16)) {
+ ScaleRowDown4 = filtering ? ScaleRowDown4Box_MSA : ScaleRowDown4_MSA;
+ }
+ }
+#endif
+
+ if (filtering == kFilterLinear) {
+ src_stride = 0;
+ }
+ for (y = 0; y < dst_height; ++y) {
+ ScaleRowDown4(src_ptr, src_stride, dst_ptr, dst_width);
+ src_ptr += row_stride;
+ dst_ptr += dst_stride;
+ }
+}
+
+static void ScalePlaneDown4_16(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ int src_stride,
+ int dst_stride,
+ const uint16_t* src_ptr,
+ uint16_t* dst_ptr,
+ enum FilterMode filtering) {
+ int y;
+ void (*ScaleRowDown4)(const uint16_t* src_ptr, ptrdiff_t src_stride,
+ uint16_t* dst_ptr, int dst_width) =
+ filtering ? ScaleRowDown4Box_16_C : ScaleRowDown4_16_C;
+ int row_stride = src_stride << 2;
+ (void)src_width;
+ (void)src_height;
+ if (!filtering) {
+ src_ptr += src_stride * 2; // Point to row 2.
+ src_stride = 0;
+ }
+#if defined(HAS_SCALEROWDOWN4_16_NEON)
+ if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8)) {
+ ScaleRowDown4 =
+ filtering ? ScaleRowDown4Box_16_NEON : ScaleRowDown4_16_NEON;
+ }
+#endif
+#if defined(HAS_SCALEROWDOWN4_16_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
+ ScaleRowDown4 =
+ filtering ? ScaleRowDown4Box_16_SSE2 : ScaleRowDown4_16_SSE2;
+ }
+#endif
+
+ if (filtering == kFilterLinear) {
+ src_stride = 0;
+ }
+ for (y = 0; y < dst_height; ++y) {
+ ScaleRowDown4(src_ptr, src_stride, dst_ptr, dst_width);
+ src_ptr += row_stride;
+ dst_ptr += dst_stride;
+ }
+}
+
+// Scale plane down, 3/4
+static void ScalePlaneDown34(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ int src_stride,
+ int dst_stride,
+ const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ enum FilterMode filtering) {
+ int y;
+ void (*ScaleRowDown34_0)(const uint8_t* src_ptr, ptrdiff_t src_stride,
+ uint8_t* dst_ptr, int dst_width);
+ void (*ScaleRowDown34_1)(const uint8_t* src_ptr, ptrdiff_t src_stride,
+ uint8_t* dst_ptr, int dst_width);
+ const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
+ (void)src_width;
+ (void)src_height;
+ assert(dst_width % 3 == 0);
+ if (!filtering) {
+ ScaleRowDown34_0 = ScaleRowDown34_C;
+ ScaleRowDown34_1 = ScaleRowDown34_C;
+ } else {
+ ScaleRowDown34_0 = ScaleRowDown34_0_Box_C;
+ ScaleRowDown34_1 = ScaleRowDown34_1_Box_C;
+ }
+#if defined(HAS_SCALEROWDOWN34_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ if (!filtering) {
+ ScaleRowDown34_0 = ScaleRowDown34_Any_NEON;
+ ScaleRowDown34_1 = ScaleRowDown34_Any_NEON;
+ } else {
+ ScaleRowDown34_0 = ScaleRowDown34_0_Box_Any_NEON;
+ ScaleRowDown34_1 = ScaleRowDown34_1_Box_Any_NEON;
+ }
+ if (dst_width % 24 == 0) {
+ if (!filtering) {
+ ScaleRowDown34_0 = ScaleRowDown34_NEON;
+ ScaleRowDown34_1 = ScaleRowDown34_NEON;
+ } else {
+ ScaleRowDown34_0 = ScaleRowDown34_0_Box_NEON;
+ ScaleRowDown34_1 = ScaleRowDown34_1_Box_NEON;
+ }
+ }
+ }
+#endif
+#if defined(HAS_SCALEROWDOWN34_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ if (!filtering) {
+ ScaleRowDown34_0 = ScaleRowDown34_Any_MSA;
+ ScaleRowDown34_1 = ScaleRowDown34_Any_MSA;
+ } else {
+ ScaleRowDown34_0 = ScaleRowDown34_0_Box_Any_MSA;
+ ScaleRowDown34_1 = ScaleRowDown34_1_Box_Any_MSA;
+ }
+ if (dst_width % 48 == 0) {
+ if (!filtering) {
+ ScaleRowDown34_0 = ScaleRowDown34_MSA;
+ ScaleRowDown34_1 = ScaleRowDown34_MSA;
+ } else {
+ ScaleRowDown34_0 = ScaleRowDown34_0_Box_MSA;
+ ScaleRowDown34_1 = ScaleRowDown34_1_Box_MSA;
+ }
+ }
+ }
+#endif
+#if defined(HAS_SCALEROWDOWN34_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ if (!filtering) {
+ ScaleRowDown34_0 = ScaleRowDown34_Any_SSSE3;
+ ScaleRowDown34_1 = ScaleRowDown34_Any_SSSE3;
+ } else {
+ ScaleRowDown34_0 = ScaleRowDown34_0_Box_Any_SSSE3;
+ ScaleRowDown34_1 = ScaleRowDown34_1_Box_Any_SSSE3;
+ }
+ if (dst_width % 24 == 0) {
+ if (!filtering) {
+ ScaleRowDown34_0 = ScaleRowDown34_SSSE3;
+ ScaleRowDown34_1 = ScaleRowDown34_SSSE3;
+ } else {
+ ScaleRowDown34_0 = ScaleRowDown34_0_Box_SSSE3;
+ ScaleRowDown34_1 = ScaleRowDown34_1_Box_SSSE3;
+ }
+ }
+ }
+#endif
+
+ for (y = 0; y < dst_height - 2; y += 3) {
+ ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
+ src_ptr += src_stride;
+ dst_ptr += dst_stride;
+ ScaleRowDown34_1(src_ptr, filter_stride, dst_ptr, dst_width);
+ src_ptr += src_stride;
+ dst_ptr += dst_stride;
+ ScaleRowDown34_0(src_ptr + src_stride, -filter_stride, dst_ptr, dst_width);
+ src_ptr += src_stride * 2;
+ dst_ptr += dst_stride;
+ }
+
+ // Remainder 1 or 2 rows with last row vertically unfiltered
+ if ((dst_height % 3) == 2) {
+ ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
+ src_ptr += src_stride;
+ dst_ptr += dst_stride;
+ ScaleRowDown34_1(src_ptr, 0, dst_ptr, dst_width);
+ } else if ((dst_height % 3) == 1) {
+ ScaleRowDown34_0(src_ptr, 0, dst_ptr, dst_width);
+ }
+}
+
+static void ScalePlaneDown34_16(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ int src_stride,
+ int dst_stride,
+ const uint16_t* src_ptr,
+ uint16_t* dst_ptr,
+ enum FilterMode filtering) {
+ int y;
+ void (*ScaleRowDown34_0)(const uint16_t* src_ptr, ptrdiff_t src_stride,
+ uint16_t* dst_ptr, int dst_width);
+ void (*ScaleRowDown34_1)(const uint16_t* src_ptr, ptrdiff_t src_stride,
+ uint16_t* dst_ptr, int dst_width);
+ const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
+ (void)src_width;
+ (void)src_height;
+ assert(dst_width % 3 == 0);
+ if (!filtering) {
+ ScaleRowDown34_0 = ScaleRowDown34_16_C;
+ ScaleRowDown34_1 = ScaleRowDown34_16_C;
+ } else {
+ ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_C;
+ ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_C;
+ }
+#if defined(HAS_SCALEROWDOWN34_16_NEON)
+ if (TestCpuFlag(kCpuHasNEON) && (dst_width % 24 == 0)) {
+ if (!filtering) {
+ ScaleRowDown34_0 = ScaleRowDown34_16_NEON;
+ ScaleRowDown34_1 = ScaleRowDown34_16_NEON;
+ } else {
+ ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_NEON;
+ ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_NEON;
+ }
+ }
+#endif
+#if defined(HAS_SCALEROWDOWN34_16_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0)) {
+ if (!filtering) {
+ ScaleRowDown34_0 = ScaleRowDown34_16_SSSE3;
+ ScaleRowDown34_1 = ScaleRowDown34_16_SSSE3;
+ } else {
+ ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_SSSE3;
+ ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_SSSE3;
+ }
+ }
+#endif
+
+ for (y = 0; y < dst_height - 2; y += 3) {
+ ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
+ src_ptr += src_stride;
+ dst_ptr += dst_stride;
+ ScaleRowDown34_1(src_ptr, filter_stride, dst_ptr, dst_width);
+ src_ptr += src_stride;
+ dst_ptr += dst_stride;
+ ScaleRowDown34_0(src_ptr + src_stride, -filter_stride, dst_ptr, dst_width);
+ src_ptr += src_stride * 2;
+ dst_ptr += dst_stride;
+ }
+
+ // Remainder 1 or 2 rows with last row vertically unfiltered
+ if ((dst_height % 3) == 2) {
+ ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
+ src_ptr += src_stride;
+ dst_ptr += dst_stride;
+ ScaleRowDown34_1(src_ptr, 0, dst_ptr, dst_width);
+ } else if ((dst_height % 3) == 1) {
+ ScaleRowDown34_0(src_ptr, 0, dst_ptr, dst_width);
+ }
+}
+
+// Scale plane, 3/8
+// This is an optimized version for scaling down a plane to 3/8
+// of its original size.
+//
+// Uses box filter arranges like this
+// aaabbbcc -> abc
+// aaabbbcc def
+// aaabbbcc ghi
+// dddeeeff
+// dddeeeff
+// dddeeeff
+// ggghhhii
+// ggghhhii
+// Boxes are 3x3, 2x3, 3x2 and 2x2
+
+static void ScalePlaneDown38(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ int src_stride,
+ int dst_stride,
+ const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ enum FilterMode filtering) {
+ int y;
+ void (*ScaleRowDown38_3)(const uint8_t* src_ptr, ptrdiff_t src_stride,
+ uint8_t* dst_ptr, int dst_width);
+ void (*ScaleRowDown38_2)(const uint8_t* src_ptr, ptrdiff_t src_stride,
+ uint8_t* dst_ptr, int dst_width);
+ const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
+ assert(dst_width % 3 == 0);
+ (void)src_width;
+ (void)src_height;
+ if (!filtering) {
+ ScaleRowDown38_3 = ScaleRowDown38_C;
+ ScaleRowDown38_2 = ScaleRowDown38_C;
+ } else {
+ ScaleRowDown38_3 = ScaleRowDown38_3_Box_C;
+ ScaleRowDown38_2 = ScaleRowDown38_2_Box_C;
+ }
+
+#if defined(HAS_SCALEROWDOWN38_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ if (!filtering) {
+ ScaleRowDown38_3 = ScaleRowDown38_Any_NEON;
+ ScaleRowDown38_2 = ScaleRowDown38_Any_NEON;
+ } else {
+ ScaleRowDown38_3 = ScaleRowDown38_3_Box_Any_NEON;
+ ScaleRowDown38_2 = ScaleRowDown38_2_Box_Any_NEON;
+ }
+ if (dst_width % 12 == 0) {
+ if (!filtering) {
+ ScaleRowDown38_3 = ScaleRowDown38_NEON;
+ ScaleRowDown38_2 = ScaleRowDown38_NEON;
+ } else {
+ ScaleRowDown38_3 = ScaleRowDown38_3_Box_NEON;
+ ScaleRowDown38_2 = ScaleRowDown38_2_Box_NEON;
+ }
+ }
+ }
+#endif
+#if defined(HAS_SCALEROWDOWN38_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ if (!filtering) {
+ ScaleRowDown38_3 = ScaleRowDown38_Any_SSSE3;
+ ScaleRowDown38_2 = ScaleRowDown38_Any_SSSE3;
+ } else {
+ ScaleRowDown38_3 = ScaleRowDown38_3_Box_Any_SSSE3;
+ ScaleRowDown38_2 = ScaleRowDown38_2_Box_Any_SSSE3;
+ }
+ if (dst_width % 12 == 0 && !filtering) {
+ ScaleRowDown38_3 = ScaleRowDown38_SSSE3;
+ ScaleRowDown38_2 = ScaleRowDown38_SSSE3;
+ }
+ if (dst_width % 6 == 0 && filtering) {
+ ScaleRowDown38_3 = ScaleRowDown38_3_Box_SSSE3;
+ ScaleRowDown38_2 = ScaleRowDown38_2_Box_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_SCALEROWDOWN38_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ if (!filtering) {
+ ScaleRowDown38_3 = ScaleRowDown38_Any_MSA;
+ ScaleRowDown38_2 = ScaleRowDown38_Any_MSA;
+ } else {
+ ScaleRowDown38_3 = ScaleRowDown38_3_Box_Any_MSA;
+ ScaleRowDown38_2 = ScaleRowDown38_2_Box_Any_MSA;
+ }
+ if (dst_width % 12 == 0) {
+ if (!filtering) {
+ ScaleRowDown38_3 = ScaleRowDown38_MSA;
+ ScaleRowDown38_2 = ScaleRowDown38_MSA;
+ } else {
+ ScaleRowDown38_3 = ScaleRowDown38_3_Box_MSA;
+ ScaleRowDown38_2 = ScaleRowDown38_2_Box_MSA;
+ }
+ }
+ }
+#endif
+
+ for (y = 0; y < dst_height - 2; y += 3) {
+ ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
+ src_ptr += src_stride * 3;
+ dst_ptr += dst_stride;
+ ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
+ src_ptr += src_stride * 3;
+ dst_ptr += dst_stride;
+ ScaleRowDown38_2(src_ptr, filter_stride, dst_ptr, dst_width);
+ src_ptr += src_stride * 2;
+ dst_ptr += dst_stride;
+ }
+
+ // Remainder 1 or 2 rows with last row vertically unfiltered
+ if ((dst_height % 3) == 2) {
+ ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
+ src_ptr += src_stride * 3;
+ dst_ptr += dst_stride;
+ ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
+ } else if ((dst_height % 3) == 1) {
+ ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
+ }
+}
+
+static void ScalePlaneDown38_16(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ int src_stride,
+ int dst_stride,
+ const uint16_t* src_ptr,
+ uint16_t* dst_ptr,
+ enum FilterMode filtering) {
+ int y;
+ void (*ScaleRowDown38_3)(const uint16_t* src_ptr, ptrdiff_t src_stride,
+ uint16_t* dst_ptr, int dst_width);
+ void (*ScaleRowDown38_2)(const uint16_t* src_ptr, ptrdiff_t src_stride,
+ uint16_t* dst_ptr, int dst_width);
+ const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
+ (void)src_width;
+ (void)src_height;
+ assert(dst_width % 3 == 0);
+ if (!filtering) {
+ ScaleRowDown38_3 = ScaleRowDown38_16_C;
+ ScaleRowDown38_2 = ScaleRowDown38_16_C;
+ } else {
+ ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_C;
+ ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_C;
+ }
+#if defined(HAS_SCALEROWDOWN38_16_NEON)
+ if (TestCpuFlag(kCpuHasNEON) && (dst_width % 12 == 0)) {
+ if (!filtering) {
+ ScaleRowDown38_3 = ScaleRowDown38_16_NEON;
+ ScaleRowDown38_2 = ScaleRowDown38_16_NEON;
+ } else {
+ ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_NEON;
+ ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_NEON;
+ }
+ }
+#endif
+#if defined(HAS_SCALEROWDOWN38_16_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0)) {
+ if (!filtering) {
+ ScaleRowDown38_3 = ScaleRowDown38_16_SSSE3;
+ ScaleRowDown38_2 = ScaleRowDown38_16_SSSE3;
+ } else {
+ ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_SSSE3;
+ ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_SSSE3;
+ }
+ }
+#endif
+
+ for (y = 0; y < dst_height - 2; y += 3) {
+ ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
+ src_ptr += src_stride * 3;
+ dst_ptr += dst_stride;
+ ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
+ src_ptr += src_stride * 3;
+ dst_ptr += dst_stride;
+ ScaleRowDown38_2(src_ptr, filter_stride, dst_ptr, dst_width);
+ src_ptr += src_stride * 2;
+ dst_ptr += dst_stride;
+ }
+
+ // Remainder 1 or 2 rows with last row vertically unfiltered
+ if ((dst_height % 3) == 2) {
+ ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
+ src_ptr += src_stride * 3;
+ dst_ptr += dst_stride;
+ ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
+ } else if ((dst_height % 3) == 1) {
+ ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
+ }
+}
+
+#define MIN1(x) ((x) < 1 ? 1 : (x))
+
+static __inline uint32_t SumPixels(int iboxwidth, const uint16_t* src_ptr) {
+ uint32_t sum = 0u;
+ int x;
+ assert(iboxwidth > 0);
+ for (x = 0; x < iboxwidth; ++x) {
+ sum += src_ptr[x];
+ }
+ return sum;
+}
+
+static __inline uint32_t SumPixels_16(int iboxwidth, const uint32_t* src_ptr) {
+ uint32_t sum = 0u;
+ int x;
+ assert(iboxwidth > 0);
+ for (x = 0; x < iboxwidth; ++x) {
+ sum += src_ptr[x];
+ }
+ return sum;
+}
+
+static void ScaleAddCols2_C(int dst_width,
+ int boxheight,
+ int x,
+ int dx,
+ const uint16_t* src_ptr,
+ uint8_t* dst_ptr) {
+ int i;
+ int scaletbl[2];
+ int minboxwidth = dx >> 16;
+ int boxwidth;
+ scaletbl[0] = 65536 / (MIN1(minboxwidth) * boxheight);
+ scaletbl[1] = 65536 / (MIN1(minboxwidth + 1) * boxheight);
+ for (i = 0; i < dst_width; ++i) {
+ int ix = x >> 16;
+ x += dx;
+ boxwidth = MIN1((x >> 16) - ix);
+ *dst_ptr++ =
+ SumPixels(boxwidth, src_ptr + ix) * scaletbl[boxwidth - minboxwidth] >>
+ 16;
+ }
+}
+
+static void ScaleAddCols2_16_C(int dst_width,
+ int boxheight,
+ int x,
+ int dx,
+ const uint32_t* src_ptr,
+ uint16_t* dst_ptr) {
+ int i;
+ int scaletbl[2];
+ int minboxwidth = dx >> 16;
+ int boxwidth;
+ scaletbl[0] = 65536 / (MIN1(minboxwidth) * boxheight);
+ scaletbl[1] = 65536 / (MIN1(minboxwidth + 1) * boxheight);
+ for (i = 0; i < dst_width; ++i) {
+ int ix = x >> 16;
+ x += dx;
+ boxwidth = MIN1((x >> 16) - ix);
+ *dst_ptr++ = SumPixels_16(boxwidth, src_ptr + ix) *
+ scaletbl[boxwidth - minboxwidth] >>
+ 16;
+ }
+}
+
+static void ScaleAddCols0_C(int dst_width,
+ int boxheight,
+ int x,
+ int dx,
+ const uint16_t* src_ptr,
+ uint8_t* dst_ptr) {
+ int scaleval = 65536 / boxheight;
+ int i;
+ (void)dx;
+ src_ptr += (x >> 16);
+ for (i = 0; i < dst_width; ++i) {
+ *dst_ptr++ = src_ptr[i] * scaleval >> 16;
+ }
+}
+
+static void ScaleAddCols1_C(int dst_width,
+ int boxheight,
+ int x,
+ int dx,
+ const uint16_t* src_ptr,
+ uint8_t* dst_ptr) {
+ int boxwidth = MIN1(dx >> 16);
+ int scaleval = 65536 / (boxwidth * boxheight);
+ int i;
+ x >>= 16;
+ for (i = 0; i < dst_width; ++i) {
+ *dst_ptr++ = SumPixels(boxwidth, src_ptr + x) * scaleval >> 16;
+ x += boxwidth;
+ }
+}
+
+static void ScaleAddCols1_16_C(int dst_width,
+ int boxheight,
+ int x,
+ int dx,
+ const uint32_t* src_ptr,
+ uint16_t* dst_ptr) {
+ int boxwidth = MIN1(dx >> 16);
+ int scaleval = 65536 / (boxwidth * boxheight);
+ int i;
+ for (i = 0; i < dst_width; ++i) {
+ *dst_ptr++ = SumPixels_16(boxwidth, src_ptr + x) * scaleval >> 16;
+ x += boxwidth;
+ }
+}
+
+// Scale plane down to any dimensions, with interpolation.
+// (boxfilter).
+//
+// Same method as SimpleScale, which is fixed point, outputting
+// one pixel of destination using fixed point (16.16) to step
+// through source, sampling a box of pixel with simple
+// averaging.
+static void ScalePlaneBox(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ int src_stride,
+ int dst_stride,
+ const uint8_t* src_ptr,
+ uint8_t* dst_ptr) {
+ int j, k;
+ // Initial source x/y coordinate and step values as 16.16 fixed point.
+ int x = 0;
+ int y = 0;
+ int dx = 0;
+ int dy = 0;
+ const int max_y = (src_height << 16);
+ ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox, &x, &y,
+ &dx, &dy);
+ src_width = Abs(src_width);
+ {
+ // Allocate a row buffer of uint16_t.
+ align_buffer_64(row16, src_width * 2);
+ void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx,
+ const uint16_t* src_ptr, uint8_t* dst_ptr) =
+ (dx & 0xffff) ? ScaleAddCols2_C
+ : ((dx != 0x10000) ? ScaleAddCols1_C : ScaleAddCols0_C);
+ void (*ScaleAddRow)(const uint8_t* src_ptr, uint16_t* dst_ptr,
+ int src_width) = ScaleAddRow_C;
+#if defined(HAS_SCALEADDROW_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ ScaleAddRow = ScaleAddRow_Any_SSE2;
+ if (IS_ALIGNED(src_width, 16)) {
+ ScaleAddRow = ScaleAddRow_SSE2;
+ }
+ }
+#endif
+#if defined(HAS_SCALEADDROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ScaleAddRow = ScaleAddRow_Any_AVX2;
+ if (IS_ALIGNED(src_width, 32)) {
+ ScaleAddRow = ScaleAddRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_SCALEADDROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ScaleAddRow = ScaleAddRow_Any_NEON;
+ if (IS_ALIGNED(src_width, 16)) {
+ ScaleAddRow = ScaleAddRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_SCALEADDROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ ScaleAddRow = ScaleAddRow_Any_MSA;
+ if (IS_ALIGNED(src_width, 16)) {
+ ScaleAddRow = ScaleAddRow_MSA;
+ }
+ }
+#endif
+
+ for (j = 0; j < dst_height; ++j) {
+ int boxheight;
+ int iy = y >> 16;
+ const uint8_t* src = src_ptr + iy * src_stride;
+ y += dy;
+ if (y > max_y) {
+ y = max_y;
+ }
+ boxheight = MIN1((y >> 16) - iy);
+ memset(row16, 0, src_width * 2);
+ for (k = 0; k < boxheight; ++k) {
+ ScaleAddRow(src, (uint16_t*)(row16), src_width);
+ src += src_stride;
+ }
+ ScaleAddCols(dst_width, boxheight, x, dx, (uint16_t*)(row16), dst_ptr);
+ dst_ptr += dst_stride;
+ }
+ free_aligned_buffer_64(row16);
+ }
+}
+
+static void ScalePlaneBox_16(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ int src_stride,
+ int dst_stride,
+ const uint16_t* src_ptr,
+ uint16_t* dst_ptr) {
+ int j, k;
+ // Initial source x/y coordinate and step values as 16.16 fixed point.
+ int x = 0;
+ int y = 0;
+ int dx = 0;
+ int dy = 0;
+ const int max_y = (src_height << 16);
+ ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox, &x, &y,
+ &dx, &dy);
+ src_width = Abs(src_width);
+ {
+ // Allocate a row buffer of uint32_t.
+ align_buffer_64(row32, src_width * 4);
+ void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx,
+ const uint32_t* src_ptr, uint16_t* dst_ptr) =
+ (dx & 0xffff) ? ScaleAddCols2_16_C : ScaleAddCols1_16_C;
+ void (*ScaleAddRow)(const uint16_t* src_ptr, uint32_t* dst_ptr,
+ int src_width) = ScaleAddRow_16_C;
+
+#if defined(HAS_SCALEADDROW_16_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(src_width, 16)) {
+ ScaleAddRow = ScaleAddRow_16_SSE2;
+ }
+#endif
+
+ for (j = 0; j < dst_height; ++j) {
+ int boxheight;
+ int iy = y >> 16;
+ const uint16_t* src = src_ptr + iy * src_stride;
+ y += dy;
+ if (y > max_y) {
+ y = max_y;
+ }
+ boxheight = MIN1((y >> 16) - iy);
+ memset(row32, 0, src_width * 4);
+ for (k = 0; k < boxheight; ++k) {
+ ScaleAddRow(src, (uint32_t*)(row32), src_width);
+ src += src_stride;
+ }
+ ScaleAddCols(dst_width, boxheight, x, dx, (uint32_t*)(row32), dst_ptr);
+ dst_ptr += dst_stride;
+ }
+ free_aligned_buffer_64(row32);
+ }
+}
+
+// Scale plane down with bilinear interpolation.
+void ScalePlaneBilinearDown(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ int src_stride,
+ int dst_stride,
+ const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ enum FilterMode filtering) {
+ // Initial source x/y coordinate and step values as 16.16 fixed point.
+ int x = 0;
+ int y = 0;
+ int dx = 0;
+ int dy = 0;
+ // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
+ // Allocate a row buffer.
+ align_buffer_64(row, src_width);
+
+ const int max_y = (src_height - 1) << 16;
+ int j;
+ void (*ScaleFilterCols)(uint8_t * dst_ptr, const uint8_t* src_ptr,
+ int dst_width, int x, int dx) =
+ (src_width >= 32768) ? ScaleFilterCols64_C : ScaleFilterCols_C;
+ void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr,
+ ptrdiff_t src_stride, int dst_width,
+ int source_y_fraction) = InterpolateRow_C;
+ ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
+ &dx, &dy);
+ src_width = Abs(src_width);
+
+#if defined(HAS_INTERPOLATEROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ InterpolateRow = InterpolateRow_Any_SSSE3;
+ if (IS_ALIGNED(src_width, 16)) {
+ InterpolateRow = InterpolateRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_INTERPOLATEROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ InterpolateRow = InterpolateRow_Any_AVX2;
+ if (IS_ALIGNED(src_width, 32)) {
+ InterpolateRow = InterpolateRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_INTERPOLATEROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ InterpolateRow = InterpolateRow_Any_NEON;
+ if (IS_ALIGNED(src_width, 16)) {
+ InterpolateRow = InterpolateRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_INTERPOLATEROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ InterpolateRow = InterpolateRow_Any_MSA;
+ if (IS_ALIGNED(src_width, 32)) {
+ InterpolateRow = InterpolateRow_MSA;
+ }
+ }
+#endif
+
+#if defined(HAS_SCALEFILTERCOLS_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
+ ScaleFilterCols = ScaleFilterCols_SSSE3;
+ }
+#endif
+#if defined(HAS_SCALEFILTERCOLS_NEON)
+ if (TestCpuFlag(kCpuHasNEON) && src_width < 32768) {
+ ScaleFilterCols = ScaleFilterCols_Any_NEON;
+ if (IS_ALIGNED(dst_width, 8)) {
+ ScaleFilterCols = ScaleFilterCols_NEON;
+ }
+ }
+#endif
+#if defined(HAS_SCALEFILTERCOLS_MSA)
+ if (TestCpuFlag(kCpuHasMSA) && src_width < 32768) {
+ ScaleFilterCols = ScaleFilterCols_Any_MSA;
+ if (IS_ALIGNED(dst_width, 16)) {
+ ScaleFilterCols = ScaleFilterCols_MSA;
+ }
+ }
+#endif
+ if (y > max_y) {
+ y = max_y;
+ }
+
+ for (j = 0; j < dst_height; ++j) {
+ int yi = y >> 16;
+ const uint8_t* src = src_ptr + yi * src_stride;
+ if (filtering == kFilterLinear) {
+ ScaleFilterCols(dst_ptr, src, dst_width, x, dx);
+ } else {
+ int yf = (y >> 8) & 255;
+ InterpolateRow(row, src, src_stride, src_width, yf);
+ ScaleFilterCols(dst_ptr, row, dst_width, x, dx);
+ }
+ dst_ptr += dst_stride;
+ y += dy;
+ if (y > max_y) {
+ y = max_y;
+ }
+ }
+ free_aligned_buffer_64(row);
+}
+
+void ScalePlaneBilinearDown_16(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ int src_stride,
+ int dst_stride,
+ const uint16_t* src_ptr,
+ uint16_t* dst_ptr,
+ enum FilterMode filtering) {
+ // Initial source x/y coordinate and step values as 16.16 fixed point.
+ int x = 0;
+ int y = 0;
+ int dx = 0;
+ int dy = 0;
+ // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
+ // Allocate a row buffer.
+ align_buffer_64(row, src_width * 2);
+
+ const int max_y = (src_height - 1) << 16;
+ int j;
+ void (*ScaleFilterCols)(uint16_t * dst_ptr, const uint16_t* src_ptr,
+ int dst_width, int x, int dx) =
+ (src_width >= 32768) ? ScaleFilterCols64_16_C : ScaleFilterCols_16_C;
+ void (*InterpolateRow)(uint16_t * dst_ptr, const uint16_t* src_ptr,
+ ptrdiff_t src_stride, int dst_width,
+ int source_y_fraction) = InterpolateRow_16_C;
+ ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
+ &dx, &dy);
+ src_width = Abs(src_width);
+
+#if defined(HAS_INTERPOLATEROW_16_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ InterpolateRow = InterpolateRow_Any_16_SSE2;
+ if (IS_ALIGNED(src_width, 16)) {
+ InterpolateRow = InterpolateRow_16_SSE2;
+ }
+ }
+#endif
+#if defined(HAS_INTERPOLATEROW_16_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ InterpolateRow = InterpolateRow_Any_16_SSSE3;
+ if (IS_ALIGNED(src_width, 16)) {
+ InterpolateRow = InterpolateRow_16_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_INTERPOLATEROW_16_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ InterpolateRow = InterpolateRow_Any_16_AVX2;
+ if (IS_ALIGNED(src_width, 32)) {
+ InterpolateRow = InterpolateRow_16_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_INTERPOLATEROW_16_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ InterpolateRow = InterpolateRow_Any_16_NEON;
+ if (IS_ALIGNED(src_width, 16)) {
+ InterpolateRow = InterpolateRow_16_NEON;
+ }
+ }
+#endif
+
+#if defined(HAS_SCALEFILTERCOLS_16_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
+ ScaleFilterCols = ScaleFilterCols_16_SSSE3;
+ }
+#endif
+ if (y > max_y) {
+ y = max_y;
+ }
+
+ for (j = 0; j < dst_height; ++j) {
+ int yi = y >> 16;
+ const uint16_t* src = src_ptr + yi * src_stride;
+ if (filtering == kFilterLinear) {
+ ScaleFilterCols(dst_ptr, src, dst_width, x, dx);
+ } else {
+ int yf = (y >> 8) & 255;
+ InterpolateRow((uint16_t*)row, src, src_stride, src_width, yf);
+ ScaleFilterCols(dst_ptr, (uint16_t*)row, dst_width, x, dx);
+ }
+ dst_ptr += dst_stride;
+ y += dy;
+ if (y > max_y) {
+ y = max_y;
+ }
+ }
+ free_aligned_buffer_64(row);
+}
+
+// Scale up down with bilinear interpolation.
+void ScalePlaneBilinearUp(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ int src_stride,
+ int dst_stride,
+ const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ enum FilterMode filtering) {
+ int j;
+ // Initial source x/y coordinate and step values as 16.16 fixed point.
+ int x = 0;
+ int y = 0;
+ int dx = 0;
+ int dy = 0;
+ const int max_y = (src_height - 1) << 16;
+ void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr,
+ ptrdiff_t src_stride, int dst_width,
+ int source_y_fraction) = InterpolateRow_C;
+ void (*ScaleFilterCols)(uint8_t * dst_ptr, const uint8_t* src_ptr,
+ int dst_width, int x, int dx) =
+ filtering ? ScaleFilterCols_C : ScaleCols_C;
+ ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
+ &dx, &dy);
+ src_width = Abs(src_width);
+
+#if defined(HAS_INTERPOLATEROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ InterpolateRow = InterpolateRow_Any_SSSE3;
+ if (IS_ALIGNED(dst_width, 16)) {
+ InterpolateRow = InterpolateRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_INTERPOLATEROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ InterpolateRow = InterpolateRow_Any_AVX2;
+ if (IS_ALIGNED(dst_width, 32)) {
+ InterpolateRow = InterpolateRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_INTERPOLATEROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ InterpolateRow = InterpolateRow_Any_NEON;
+ if (IS_ALIGNED(dst_width, 16)) {
+ InterpolateRow = InterpolateRow_NEON;
+ }
+ }
+#endif
+
+ if (filtering && src_width >= 32768) {
+ ScaleFilterCols = ScaleFilterCols64_C;
+ }
+#if defined(HAS_SCALEFILTERCOLS_SSSE3)
+ if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
+ ScaleFilterCols = ScaleFilterCols_SSSE3;
+ }
+#endif
+#if defined(HAS_SCALEFILTERCOLS_NEON)
+ if (filtering && TestCpuFlag(kCpuHasNEON) && src_width < 32768) {
+ ScaleFilterCols = ScaleFilterCols_Any_NEON;
+ if (IS_ALIGNED(dst_width, 8)) {
+ ScaleFilterCols = ScaleFilterCols_NEON;
+ }
+ }
+#endif
+#if defined(HAS_SCALEFILTERCOLS_MSA)
+ if (filtering && TestCpuFlag(kCpuHasMSA) && src_width < 32768) {
+ ScaleFilterCols = ScaleFilterCols_Any_MSA;
+ if (IS_ALIGNED(dst_width, 16)) {
+ ScaleFilterCols = ScaleFilterCols_MSA;
+ }
+ }
+#endif
+ if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
+ ScaleFilterCols = ScaleColsUp2_C;
+#if defined(HAS_SCALECOLS_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
+ ScaleFilterCols = ScaleColsUp2_SSE2;
+ }
+#endif
+ }
+
+ if (y > max_y) {
+ y = max_y;
+ }
+ {
+ int yi = y >> 16;
+ const uint8_t* src = src_ptr + yi * src_stride;
+
+ // Allocate 2 row buffers.
+ const int kRowSize = (dst_width + 31) & ~31;
+ align_buffer_64(row, kRowSize * 2);
+
+ uint8_t* rowptr = row;
+ int rowstride = kRowSize;
+ int lasty = yi;
+
+ ScaleFilterCols(rowptr, src, dst_width, x, dx);
+ if (src_height > 1) {
+ src += src_stride;
+ }
+ ScaleFilterCols(rowptr + rowstride, src, dst_width, x, dx);
+ src += src_stride;
+
+ for (j = 0; j < dst_height; ++j) {
+ yi = y >> 16;
+ if (yi != lasty) {
+ if (y > max_y) {
+ y = max_y;
+ yi = y >> 16;
+ src = src_ptr + yi * src_stride;
+ }
+ if (yi != lasty) {
+ ScaleFilterCols(rowptr, src, dst_width, x, dx);
+ rowptr += rowstride;
+ rowstride = -rowstride;
+ lasty = yi;
+ src += src_stride;
+ }
+ }
+ if (filtering == kFilterLinear) {
+ InterpolateRow(dst_ptr, rowptr, 0, dst_width, 0);
+ } else {
+ int yf = (y >> 8) & 255;
+ InterpolateRow(dst_ptr, rowptr, rowstride, dst_width, yf);
+ }
+ dst_ptr += dst_stride;
+ y += dy;
+ }
+ free_aligned_buffer_64(row);
+ }
+}
+
+void ScalePlaneBilinearUp_16(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ int src_stride,
+ int dst_stride,
+ const uint16_t* src_ptr,
+ uint16_t* dst_ptr,
+ enum FilterMode filtering) {
+ int j;
+ // Initial source x/y coordinate and step values as 16.16 fixed point.
+ int x = 0;
+ int y = 0;
+ int dx = 0;
+ int dy = 0;
+ const int max_y = (src_height - 1) << 16;
+ void (*InterpolateRow)(uint16_t * dst_ptr, const uint16_t* src_ptr,
+ ptrdiff_t src_stride, int dst_width,
+ int source_y_fraction) = InterpolateRow_16_C;
+ void (*ScaleFilterCols)(uint16_t * dst_ptr, const uint16_t* src_ptr,
+ int dst_width, int x, int dx) =
+ filtering ? ScaleFilterCols_16_C : ScaleCols_16_C;
+ ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
+ &dx, &dy);
+ src_width = Abs(src_width);
+
+#if defined(HAS_INTERPOLATEROW_16_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ InterpolateRow = InterpolateRow_Any_16_SSE2;
+ if (IS_ALIGNED(dst_width, 16)) {
+ InterpolateRow = InterpolateRow_16_SSE2;
+ }
+ }
+#endif
+#if defined(HAS_INTERPOLATEROW_16_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ InterpolateRow = InterpolateRow_Any_16_SSSE3;
+ if (IS_ALIGNED(dst_width, 16)) {
+ InterpolateRow = InterpolateRow_16_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_INTERPOLATEROW_16_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ InterpolateRow = InterpolateRow_Any_16_AVX2;
+ if (IS_ALIGNED(dst_width, 32)) {
+ InterpolateRow = InterpolateRow_16_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_INTERPOLATEROW_16_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ InterpolateRow = InterpolateRow_Any_16_NEON;
+ if (IS_ALIGNED(dst_width, 16)) {
+ InterpolateRow = InterpolateRow_16_NEON;
+ }
+ }
+#endif
+
+ if (filtering && src_width >= 32768) {
+ ScaleFilterCols = ScaleFilterCols64_16_C;
+ }
+#if defined(HAS_SCALEFILTERCOLS_16_SSSE3)
+ if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
+ ScaleFilterCols = ScaleFilterCols_16_SSSE3;
+ }
+#endif
+ if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
+ ScaleFilterCols = ScaleColsUp2_16_C;
+#if defined(HAS_SCALECOLS_16_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
+ ScaleFilterCols = ScaleColsUp2_16_SSE2;
+ }
+#endif
+ }
+
+ if (y > max_y) {
+ y = max_y;
+ }
+ {
+ int yi = y >> 16;
+ const uint16_t* src = src_ptr + yi * src_stride;
+
+ // Allocate 2 row buffers.
+ const int kRowSize = (dst_width + 31) & ~31;
+ align_buffer_64(row, kRowSize * 4);
+
+ uint16_t* rowptr = (uint16_t*)row;
+ int rowstride = kRowSize;
+ int lasty = yi;
+
+ ScaleFilterCols(rowptr, src, dst_width, x, dx);
+ if (src_height > 1) {
+ src += src_stride;
+ }
+ ScaleFilterCols(rowptr + rowstride, src, dst_width, x, dx);
+ src += src_stride;
+
+ for (j = 0; j < dst_height; ++j) {
+ yi = y >> 16;
+ if (yi != lasty) {
+ if (y > max_y) {
+ y = max_y;
+ yi = y >> 16;
+ src = src_ptr + yi * src_stride;
+ }
+ if (yi != lasty) {
+ ScaleFilterCols(rowptr, src, dst_width, x, dx);
+ rowptr += rowstride;
+ rowstride = -rowstride;
+ lasty = yi;
+ src += src_stride;
+ }
+ }
+ if (filtering == kFilterLinear) {
+ InterpolateRow(dst_ptr, rowptr, 0, dst_width, 0);
+ } else {
+ int yf = (y >> 8) & 255;
+ InterpolateRow(dst_ptr, rowptr, rowstride, dst_width, yf);
+ }
+ dst_ptr += dst_stride;
+ y += dy;
+ }
+ free_aligned_buffer_64(row);
+ }
+}
+
+// Scale Plane to/from any dimensions, without interpolation.
+// Fixed point math is used for performance: The upper 16 bits
+// of x and dx is the integer part of the source position and
+// the lower 16 bits are the fixed decimal part.
+
+static void ScalePlaneSimple(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ int src_stride,
+ int dst_stride,
+ const uint8_t* src_ptr,
+ uint8_t* dst_ptr) {
+ int i;
+ void (*ScaleCols)(uint8_t * dst_ptr, const uint8_t* src_ptr, int dst_width,
+ int x, int dx) = ScaleCols_C;
+ // Initial source x/y coordinate and step values as 16.16 fixed point.
+ int x = 0;
+ int y = 0;
+ int dx = 0;
+ int dy = 0;
+ ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterNone, &x, &y,
+ &dx, &dy);
+ src_width = Abs(src_width);
+
+ if (src_width * 2 == dst_width && x < 0x8000) {
+ ScaleCols = ScaleColsUp2_C;
+#if defined(HAS_SCALECOLS_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
+ ScaleCols = ScaleColsUp2_SSE2;
+ }
+#endif
+ }
+
+ for (i = 0; i < dst_height; ++i) {
+ ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride, dst_width, x, dx);
+ dst_ptr += dst_stride;
+ y += dy;
+ }
+}
+
+static void ScalePlaneSimple_16(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ int src_stride,
+ int dst_stride,
+ const uint16_t* src_ptr,
+ uint16_t* dst_ptr) {
+ int i;
+ void (*ScaleCols)(uint16_t * dst_ptr, const uint16_t* src_ptr, int dst_width,
+ int x, int dx) = ScaleCols_16_C;
+ // Initial source x/y coordinate and step values as 16.16 fixed point.
+ int x = 0;
+ int y = 0;
+ int dx = 0;
+ int dy = 0;
+ ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterNone, &x, &y,
+ &dx, &dy);
+ src_width = Abs(src_width);
+
+ if (src_width * 2 == dst_width && x < 0x8000) {
+ ScaleCols = ScaleColsUp2_16_C;
+#if defined(HAS_SCALECOLS_16_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
+ ScaleCols = ScaleColsUp2_16_SSE2;
+ }
+#endif
+ }
+
+ for (i = 0; i < dst_height; ++i) {
+ ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride, dst_width, x, dx);
+ dst_ptr += dst_stride;
+ y += dy;
+ }
+}
+
+// Scale a plane.
+// This function dispatches to a specialized scaler based on scale factor.
+
+LIBYUV_API
+void ScalePlane(const uint8_t* src,
+ int src_stride,
+ int src_width,
+ int src_height,
+ uint8_t* dst,
+ int dst_stride,
+ int dst_width,
+ int dst_height,
+ enum FilterMode filtering) {
+ // Simplify filtering when possible.
+ filtering = ScaleFilterReduce(src_width, src_height, dst_width, dst_height,
+ filtering);
+
+ // Negative height means invert the image.
+ if (src_height < 0) {
+ src_height = -src_height;
+ src = src + (src_height - 1) * src_stride;
+ src_stride = -src_stride;
+ }
+
+ // Use specialized scales to improve performance for common resolutions.
+ // For example, all the 1/2 scalings will use ScalePlaneDown2()
+ if (dst_width == src_width && dst_height == src_height) {
+ // Straight copy.
+ CopyPlane(src, src_stride, dst, dst_stride, dst_width, dst_height);
+ return;
+ }
+ if (dst_width == src_width && filtering != kFilterBox) {
+ int dy = FixedDiv(src_height, dst_height);
+ // Arbitrary scale vertically, but unscaled horizontally.
+ ScalePlaneVertical(src_height, dst_width, dst_height, src_stride,
+ dst_stride, src, dst, 0, 0, dy, 1, filtering);
+ return;
+ }
+ if (dst_width <= Abs(src_width) && dst_height <= src_height) {
+ // Scale down.
+ if (4 * dst_width == 3 * src_width && 4 * dst_height == 3 * src_height) {
+ // optimized, 3/4
+ ScalePlaneDown34(src_width, src_height, dst_width, dst_height, src_stride,
+ dst_stride, src, dst, filtering);
+ return;
+ }
+ if (2 * dst_width == src_width && 2 * dst_height == src_height) {
+ // optimized, 1/2
+ ScalePlaneDown2(src_width, src_height, dst_width, dst_height, src_stride,
+ dst_stride, src, dst, filtering);
+ return;
+ }
+ // 3/8 rounded up for odd sized chroma height.
+ if (8 * dst_width == 3 * src_width && 8 * dst_height == 3 * src_height) {
+ // optimized, 3/8
+ ScalePlaneDown38(src_width, src_height, dst_width, dst_height, src_stride,
+ dst_stride, src, dst, filtering);
+ return;
+ }
+ if (4 * dst_width == src_width && 4 * dst_height == src_height &&
+ (filtering == kFilterBox || filtering == kFilterNone)) {
+ // optimized, 1/4
+ ScalePlaneDown4(src_width, src_height, dst_width, dst_height, src_stride,
+ dst_stride, src, dst, filtering);
+ return;
+ }
+ }
+ if (filtering == kFilterBox && dst_height * 2 < src_height) {
+ ScalePlaneBox(src_width, src_height, dst_width, dst_height, src_stride,
+ dst_stride, src, dst);
+ return;
+ }
+ if (filtering && dst_height > src_height) {
+ ScalePlaneBilinearUp(src_width, src_height, dst_width, dst_height,
+ src_stride, dst_stride, src, dst, filtering);
+ return;
+ }
+ if (filtering) {
+ ScalePlaneBilinearDown(src_width, src_height, dst_width, dst_height,
+ src_stride, dst_stride, src, dst, filtering);
+ return;
+ }
+ ScalePlaneSimple(src_width, src_height, dst_width, dst_height, src_stride,
+ dst_stride, src, dst);
+}
+
+LIBYUV_API
+void ScalePlane_16(const uint16_t* src,
+ int src_stride,
+ int src_width,
+ int src_height,
+ uint16_t* dst,
+ int dst_stride,
+ int dst_width,
+ int dst_height,
+ enum FilterMode filtering) {
+ // Simplify filtering when possible.
+ filtering = ScaleFilterReduce(src_width, src_height, dst_width, dst_height,
+ filtering);
+
+ // Negative height means invert the image.
+ if (src_height < 0) {
+ src_height = -src_height;
+ src = src + (src_height - 1) * src_stride;
+ src_stride = -src_stride;
+ }
+
+ // Use specialized scales to improve performance for common resolutions.
+ // For example, all the 1/2 scalings will use ScalePlaneDown2()
+ if (dst_width == src_width && dst_height == src_height) {
+ // Straight copy.
+ CopyPlane_16(src, src_stride, dst, dst_stride, dst_width, dst_height);
+ return;
+ }
+ if (dst_width == src_width && filtering != kFilterBox) {
+ int dy = FixedDiv(src_height, dst_height);
+ // Arbitrary scale vertically, but unscaled vertically.
+ ScalePlaneVertical_16(src_height, dst_width, dst_height, src_stride,
+ dst_stride, src, dst, 0, 0, dy, 1, filtering);
+ return;
+ }
+ if (dst_width <= Abs(src_width) && dst_height <= src_height) {
+ // Scale down.
+ if (4 * dst_width == 3 * src_width && 4 * dst_height == 3 * src_height) {
+ // optimized, 3/4
+ ScalePlaneDown34_16(src_width, src_height, dst_width, dst_height,
+ src_stride, dst_stride, src, dst, filtering);
+ return;
+ }
+ if (2 * dst_width == src_width && 2 * dst_height == src_height) {
+ // optimized, 1/2
+ ScalePlaneDown2_16(src_width, src_height, dst_width, dst_height,
+ src_stride, dst_stride, src, dst, filtering);
+ return;
+ }
+ // 3/8 rounded up for odd sized chroma height.
+ if (8 * dst_width == 3 * src_width && 8 * dst_height == 3 * src_height) {
+ // optimized, 3/8
+ ScalePlaneDown38_16(src_width, src_height, dst_width, dst_height,
+ src_stride, dst_stride, src, dst, filtering);
+ return;
+ }
+ if (4 * dst_width == src_width && 4 * dst_height == src_height &&
+ (filtering == kFilterBox || filtering == kFilterNone)) {
+ // optimized, 1/4
+ ScalePlaneDown4_16(src_width, src_height, dst_width, dst_height,
+ src_stride, dst_stride, src, dst, filtering);
+ return;
+ }
+ }
+ if (filtering == kFilterBox && dst_height * 2 < src_height) {
+ ScalePlaneBox_16(src_width, src_height, dst_width, dst_height, src_stride,
+ dst_stride, src, dst);
+ return;
+ }
+ if (filtering && dst_height > src_height) {
+ ScalePlaneBilinearUp_16(src_width, src_height, dst_width, dst_height,
+ src_stride, dst_stride, src, dst, filtering);
+ return;
+ }
+ if (filtering) {
+ ScalePlaneBilinearDown_16(src_width, src_height, dst_width, dst_height,
+ src_stride, dst_stride, src, dst, filtering);
+ return;
+ }
+ ScalePlaneSimple_16(src_width, src_height, dst_width, dst_height, src_stride,
+ dst_stride, src, dst);
+}
+
+// Scale an I420 image.
+// This function in turn calls a scaling function for each plane.
+
+LIBYUV_API
+int I420Scale(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ int src_width,
+ int src_height,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int dst_width,
+ int dst_height,
+ enum FilterMode filtering) {
+ int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
+ int src_halfheight = SUBSAMPLE(src_height, 1, 1);
+ int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
+ int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
+ if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 ||
+ src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
+ dst_width <= 0 || dst_height <= 0) {
+ return -1;
+ }
+
+ ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y,
+ dst_width, dst_height, filtering);
+ ScalePlane(src_u, src_stride_u, src_halfwidth, src_halfheight, dst_u,
+ dst_stride_u, dst_halfwidth, dst_halfheight, filtering);
+ ScalePlane(src_v, src_stride_v, src_halfwidth, src_halfheight, dst_v,
+ dst_stride_v, dst_halfwidth, dst_halfheight, filtering);
+ return 0;
+}
+
+LIBYUV_API
+int I420Scale_16(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ int src_width,
+ int src_height,
+ uint16_t* dst_y,
+ int dst_stride_y,
+ uint16_t* dst_u,
+ int dst_stride_u,
+ uint16_t* dst_v,
+ int dst_stride_v,
+ int dst_width,
+ int dst_height,
+ enum FilterMode filtering) {
+ int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
+ int src_halfheight = SUBSAMPLE(src_height, 1, 1);
+ int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
+ int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
+ if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 ||
+ src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
+ dst_width <= 0 || dst_height <= 0) {
+ return -1;
+ }
+
+ ScalePlane_16(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y,
+ dst_width, dst_height, filtering);
+ ScalePlane_16(src_u, src_stride_u, src_halfwidth, src_halfheight, dst_u,
+ dst_stride_u, dst_halfwidth, dst_halfheight, filtering);
+ ScalePlane_16(src_v, src_stride_v, src_halfwidth, src_halfheight, dst_v,
+ dst_stride_v, dst_halfwidth, dst_halfheight, filtering);
+ return 0;
+}
+
+// Deprecated api
+LIBYUV_API
+int Scale(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ int src_stride_y,
+ int src_stride_u,
+ int src_stride_v,
+ int src_width,
+ int src_height,
+ uint8_t* dst_y,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int dst_stride_y,
+ int dst_stride_u,
+ int dst_stride_v,
+ int dst_width,
+ int dst_height,
+ LIBYUV_BOOL interpolate) {
+ return I420Scale(src_y, src_stride_y, src_u, src_stride_u, src_v,
+ src_stride_v, src_width, src_height, dst_y, dst_stride_y,
+ dst_u, dst_stride_u, dst_v, dst_stride_v, dst_width,
+ dst_height, interpolate ? kFilterBox : kFilterNone);
+}
+
+#ifdef __cplusplus
+} // extern "C"
+} // namespace libyuv
+#endif
diff --git a/media/libvpx/libvpx/third_party/libyuv/source/scale_any.cc b/media/libvpx/libvpx/third_party/libyuv/source/scale_any.cc
new file mode 100644
index 0000000000..53ad136404
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/libyuv/source/scale_any.cc
@@ -0,0 +1,464 @@
+/*
+ * Copyright 2015 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/scale.h"
+#include "libyuv/scale_row.h"
+
+#include "libyuv/basic_types.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+// Definition for ScaleFilterCols, ScaleARGBCols and ScaleARGBFilterCols
+#define CANY(NAMEANY, TERP_SIMD, TERP_C, BPP, MASK) \
+ void NAMEANY(uint8_t* dst_ptr, const uint8_t* src_ptr, int dst_width, int x, \
+ int dx) { \
+ int r = dst_width & MASK; \
+ int n = dst_width & ~MASK; \
+ if (n > 0) { \
+ TERP_SIMD(dst_ptr, src_ptr, n, x, dx); \
+ } \
+ TERP_C(dst_ptr + n * BPP, src_ptr, r, x + n * dx, dx); \
+ }
+
+#ifdef HAS_SCALEFILTERCOLS_NEON
+CANY(ScaleFilterCols_Any_NEON, ScaleFilterCols_NEON, ScaleFilterCols_C, 1, 7)
+#endif
+#ifdef HAS_SCALEFILTERCOLS_MSA
+CANY(ScaleFilterCols_Any_MSA, ScaleFilterCols_MSA, ScaleFilterCols_C, 1, 15)
+#endif
+#ifdef HAS_SCALEARGBCOLS_NEON
+CANY(ScaleARGBCols_Any_NEON, ScaleARGBCols_NEON, ScaleARGBCols_C, 4, 7)
+#endif
+#ifdef HAS_SCALEARGBCOLS_MSA
+CANY(ScaleARGBCols_Any_MSA, ScaleARGBCols_MSA, ScaleARGBCols_C, 4, 3)
+#endif
+#ifdef HAS_SCALEARGBFILTERCOLS_NEON
+CANY(ScaleARGBFilterCols_Any_NEON,
+ ScaleARGBFilterCols_NEON,
+ ScaleARGBFilterCols_C,
+ 4,
+ 3)
+#endif
+#ifdef HAS_SCALEARGBFILTERCOLS_MSA
+CANY(ScaleARGBFilterCols_Any_MSA,
+ ScaleARGBFilterCols_MSA,
+ ScaleARGBFilterCols_C,
+ 4,
+ 7)
+#endif
+#undef CANY
+
+// Fixed scale down.
+// Mask may be non-power of 2, so use MOD
+#define SDANY(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, FACTOR, BPP, MASK) \
+ void NAMEANY(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, \
+ int dst_width) { \
+ int r = (int)((unsigned int)dst_width % (MASK + 1)); /* NOLINT */ \
+ int n = dst_width - r; \
+ if (n > 0) { \
+ SCALEROWDOWN_SIMD(src_ptr, src_stride, dst_ptr, n); \
+ } \
+ SCALEROWDOWN_C(src_ptr + (n * FACTOR) * BPP, src_stride, \
+ dst_ptr + n * BPP, r); \
+ }
+
+// Fixed scale down for odd source width. Used by I420Blend subsampling.
+// Since dst_width is (width + 1) / 2, this function scales one less pixel
+// and copies the last pixel.
+#define SDODD(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, FACTOR, BPP, MASK) \
+ void NAMEANY(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, \
+ int dst_width) { \
+ int r = (int)((unsigned int)(dst_width - 1) % (MASK + 1)); /* NOLINT */ \
+ int n = (dst_width - 1) - r; \
+ if (n > 0) { \
+ SCALEROWDOWN_SIMD(src_ptr, src_stride, dst_ptr, n); \
+ } \
+ SCALEROWDOWN_C(src_ptr + (n * FACTOR) * BPP, src_stride, \
+ dst_ptr + n * BPP, r + 1); \
+ }
+
+#ifdef HAS_SCALEROWDOWN2_SSSE3
+SDANY(ScaleRowDown2_Any_SSSE3, ScaleRowDown2_SSSE3, ScaleRowDown2_C, 2, 1, 15)
+SDANY(ScaleRowDown2Linear_Any_SSSE3,
+ ScaleRowDown2Linear_SSSE3,
+ ScaleRowDown2Linear_C,
+ 2,
+ 1,
+ 15)
+SDANY(ScaleRowDown2Box_Any_SSSE3,
+ ScaleRowDown2Box_SSSE3,
+ ScaleRowDown2Box_C,
+ 2,
+ 1,
+ 15)
+SDODD(ScaleRowDown2Box_Odd_SSSE3,
+ ScaleRowDown2Box_SSSE3,
+ ScaleRowDown2Box_Odd_C,
+ 2,
+ 1,
+ 15)
+#endif
+#ifdef HAS_SCALEROWDOWN2_AVX2
+SDANY(ScaleRowDown2_Any_AVX2, ScaleRowDown2_AVX2, ScaleRowDown2_C, 2, 1, 31)
+SDANY(ScaleRowDown2Linear_Any_AVX2,
+ ScaleRowDown2Linear_AVX2,
+ ScaleRowDown2Linear_C,
+ 2,
+ 1,
+ 31)
+SDANY(ScaleRowDown2Box_Any_AVX2,
+ ScaleRowDown2Box_AVX2,
+ ScaleRowDown2Box_C,
+ 2,
+ 1,
+ 31)
+SDODD(ScaleRowDown2Box_Odd_AVX2,
+ ScaleRowDown2Box_AVX2,
+ ScaleRowDown2Box_Odd_C,
+ 2,
+ 1,
+ 31)
+#endif
+#ifdef HAS_SCALEROWDOWN2_NEON
+SDANY(ScaleRowDown2_Any_NEON, ScaleRowDown2_NEON, ScaleRowDown2_C, 2, 1, 15)
+SDANY(ScaleRowDown2Linear_Any_NEON,
+ ScaleRowDown2Linear_NEON,
+ ScaleRowDown2Linear_C,
+ 2,
+ 1,
+ 15)
+SDANY(ScaleRowDown2Box_Any_NEON,
+ ScaleRowDown2Box_NEON,
+ ScaleRowDown2Box_C,
+ 2,
+ 1,
+ 15)
+SDODD(ScaleRowDown2Box_Odd_NEON,
+ ScaleRowDown2Box_NEON,
+ ScaleRowDown2Box_Odd_C,
+ 2,
+ 1,
+ 15)
+#endif
+#ifdef HAS_SCALEROWDOWN2_MSA
+SDANY(ScaleRowDown2_Any_MSA, ScaleRowDown2_MSA, ScaleRowDown2_C, 2, 1, 31)
+SDANY(ScaleRowDown2Linear_Any_MSA,
+ ScaleRowDown2Linear_MSA,
+ ScaleRowDown2Linear_C,
+ 2,
+ 1,
+ 31)
+SDANY(ScaleRowDown2Box_Any_MSA,
+ ScaleRowDown2Box_MSA,
+ ScaleRowDown2Box_C,
+ 2,
+ 1,
+ 31)
+#endif
+#ifdef HAS_SCALEROWDOWN4_SSSE3
+SDANY(ScaleRowDown4_Any_SSSE3, ScaleRowDown4_SSSE3, ScaleRowDown4_C, 4, 1, 7)
+SDANY(ScaleRowDown4Box_Any_SSSE3,
+ ScaleRowDown4Box_SSSE3,
+ ScaleRowDown4Box_C,
+ 4,
+ 1,
+ 7)
+#endif
+#ifdef HAS_SCALEROWDOWN4_AVX2
+SDANY(ScaleRowDown4_Any_AVX2, ScaleRowDown4_AVX2, ScaleRowDown4_C, 4, 1, 15)
+SDANY(ScaleRowDown4Box_Any_AVX2,
+ ScaleRowDown4Box_AVX2,
+ ScaleRowDown4Box_C,
+ 4,
+ 1,
+ 15)
+#endif
+#ifdef HAS_SCALEROWDOWN4_NEON
+SDANY(ScaleRowDown4_Any_NEON, ScaleRowDown4_NEON, ScaleRowDown4_C, 4, 1, 7)
+SDANY(ScaleRowDown4Box_Any_NEON,
+ ScaleRowDown4Box_NEON,
+ ScaleRowDown4Box_C,
+ 4,
+ 1,
+ 7)
+#endif
+#ifdef HAS_SCALEROWDOWN4_MSA
+SDANY(ScaleRowDown4_Any_MSA, ScaleRowDown4_MSA, ScaleRowDown4_C, 4, 1, 15)
+SDANY(ScaleRowDown4Box_Any_MSA,
+ ScaleRowDown4Box_MSA,
+ ScaleRowDown4Box_C,
+ 4,
+ 1,
+ 15)
+#endif
+#ifdef HAS_SCALEROWDOWN34_SSSE3
+SDANY(ScaleRowDown34_Any_SSSE3,
+ ScaleRowDown34_SSSE3,
+ ScaleRowDown34_C,
+ 4 / 3,
+ 1,
+ 23)
+SDANY(ScaleRowDown34_0_Box_Any_SSSE3,
+ ScaleRowDown34_0_Box_SSSE3,
+ ScaleRowDown34_0_Box_C,
+ 4 / 3,
+ 1,
+ 23)
+SDANY(ScaleRowDown34_1_Box_Any_SSSE3,
+ ScaleRowDown34_1_Box_SSSE3,
+ ScaleRowDown34_1_Box_C,
+ 4 / 3,
+ 1,
+ 23)
+#endif
+#ifdef HAS_SCALEROWDOWN34_NEON
+SDANY(ScaleRowDown34_Any_NEON,
+ ScaleRowDown34_NEON,
+ ScaleRowDown34_C,
+ 4 / 3,
+ 1,
+ 23)
+SDANY(ScaleRowDown34_0_Box_Any_NEON,
+ ScaleRowDown34_0_Box_NEON,
+ ScaleRowDown34_0_Box_C,
+ 4 / 3,
+ 1,
+ 23)
+SDANY(ScaleRowDown34_1_Box_Any_NEON,
+ ScaleRowDown34_1_Box_NEON,
+ ScaleRowDown34_1_Box_C,
+ 4 / 3,
+ 1,
+ 23)
+#endif
+#ifdef HAS_SCALEROWDOWN34_MSA
+SDANY(ScaleRowDown34_Any_MSA,
+ ScaleRowDown34_MSA,
+ ScaleRowDown34_C,
+ 4 / 3,
+ 1,
+ 47)
+SDANY(ScaleRowDown34_0_Box_Any_MSA,
+ ScaleRowDown34_0_Box_MSA,
+ ScaleRowDown34_0_Box_C,
+ 4 / 3,
+ 1,
+ 47)
+SDANY(ScaleRowDown34_1_Box_Any_MSA,
+ ScaleRowDown34_1_Box_MSA,
+ ScaleRowDown34_1_Box_C,
+ 4 / 3,
+ 1,
+ 47)
+#endif
+#ifdef HAS_SCALEROWDOWN38_SSSE3
+SDANY(ScaleRowDown38_Any_SSSE3,
+ ScaleRowDown38_SSSE3,
+ ScaleRowDown38_C,
+ 8 / 3,
+ 1,
+ 11)
+SDANY(ScaleRowDown38_3_Box_Any_SSSE3,
+ ScaleRowDown38_3_Box_SSSE3,
+ ScaleRowDown38_3_Box_C,
+ 8 / 3,
+ 1,
+ 5)
+SDANY(ScaleRowDown38_2_Box_Any_SSSE3,
+ ScaleRowDown38_2_Box_SSSE3,
+ ScaleRowDown38_2_Box_C,
+ 8 / 3,
+ 1,
+ 5)
+#endif
+#ifdef HAS_SCALEROWDOWN38_NEON
+SDANY(ScaleRowDown38_Any_NEON,
+ ScaleRowDown38_NEON,
+ ScaleRowDown38_C,
+ 8 / 3,
+ 1,
+ 11)
+SDANY(ScaleRowDown38_3_Box_Any_NEON,
+ ScaleRowDown38_3_Box_NEON,
+ ScaleRowDown38_3_Box_C,
+ 8 / 3,
+ 1,
+ 11)
+SDANY(ScaleRowDown38_2_Box_Any_NEON,
+ ScaleRowDown38_2_Box_NEON,
+ ScaleRowDown38_2_Box_C,
+ 8 / 3,
+ 1,
+ 11)
+#endif
+#ifdef HAS_SCALEROWDOWN38_MSA
+SDANY(ScaleRowDown38_Any_MSA,
+ ScaleRowDown38_MSA,
+ ScaleRowDown38_C,
+ 8 / 3,
+ 1,
+ 11)
+SDANY(ScaleRowDown38_3_Box_Any_MSA,
+ ScaleRowDown38_3_Box_MSA,
+ ScaleRowDown38_3_Box_C,
+ 8 / 3,
+ 1,
+ 11)
+SDANY(ScaleRowDown38_2_Box_Any_MSA,
+ ScaleRowDown38_2_Box_MSA,
+ ScaleRowDown38_2_Box_C,
+ 8 / 3,
+ 1,
+ 11)
+#endif
+
+#ifdef HAS_SCALEARGBROWDOWN2_SSE2
+SDANY(ScaleARGBRowDown2_Any_SSE2,
+ ScaleARGBRowDown2_SSE2,
+ ScaleARGBRowDown2_C,
+ 2,
+ 4,
+ 3)
+SDANY(ScaleARGBRowDown2Linear_Any_SSE2,
+ ScaleARGBRowDown2Linear_SSE2,
+ ScaleARGBRowDown2Linear_C,
+ 2,
+ 4,
+ 3)
+SDANY(ScaleARGBRowDown2Box_Any_SSE2,
+ ScaleARGBRowDown2Box_SSE2,
+ ScaleARGBRowDown2Box_C,
+ 2,
+ 4,
+ 3)
+#endif
+#ifdef HAS_SCALEARGBROWDOWN2_NEON
+SDANY(ScaleARGBRowDown2_Any_NEON,
+ ScaleARGBRowDown2_NEON,
+ ScaleARGBRowDown2_C,
+ 2,
+ 4,
+ 7)
+SDANY(ScaleARGBRowDown2Linear_Any_NEON,
+ ScaleARGBRowDown2Linear_NEON,
+ ScaleARGBRowDown2Linear_C,
+ 2,
+ 4,
+ 7)
+SDANY(ScaleARGBRowDown2Box_Any_NEON,
+ ScaleARGBRowDown2Box_NEON,
+ ScaleARGBRowDown2Box_C,
+ 2,
+ 4,
+ 7)
+#endif
+#ifdef HAS_SCALEARGBROWDOWN2_MSA
+SDANY(ScaleARGBRowDown2_Any_MSA,
+ ScaleARGBRowDown2_MSA,
+ ScaleARGBRowDown2_C,
+ 2,
+ 4,
+ 3)
+SDANY(ScaleARGBRowDown2Linear_Any_MSA,
+ ScaleARGBRowDown2Linear_MSA,
+ ScaleARGBRowDown2Linear_C,
+ 2,
+ 4,
+ 3)
+SDANY(ScaleARGBRowDown2Box_Any_MSA,
+ ScaleARGBRowDown2Box_MSA,
+ ScaleARGBRowDown2Box_C,
+ 2,
+ 4,
+ 3)
+#endif
+#undef SDANY
+
+// Scale down by even scale factor.
+#define SDAANY(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, BPP, MASK) \
+ void NAMEANY(const uint8_t* src_ptr, ptrdiff_t src_stride, int src_stepx, \
+ uint8_t* dst_ptr, int dst_width) { \
+ int r = dst_width & MASK; \
+ int n = dst_width & ~MASK; \
+ if (n > 0) { \
+ SCALEROWDOWN_SIMD(src_ptr, src_stride, src_stepx, dst_ptr, n); \
+ } \
+ SCALEROWDOWN_C(src_ptr + (n * src_stepx) * BPP, src_stride, src_stepx, \
+ dst_ptr + n * BPP, r); \
+ }
+
+#ifdef HAS_SCALEARGBROWDOWNEVEN_SSE2
+SDAANY(ScaleARGBRowDownEven_Any_SSE2,
+ ScaleARGBRowDownEven_SSE2,
+ ScaleARGBRowDownEven_C,
+ 4,
+ 3)
+SDAANY(ScaleARGBRowDownEvenBox_Any_SSE2,
+ ScaleARGBRowDownEvenBox_SSE2,
+ ScaleARGBRowDownEvenBox_C,
+ 4,
+ 3)
+#endif
+#ifdef HAS_SCALEARGBROWDOWNEVEN_NEON
+SDAANY(ScaleARGBRowDownEven_Any_NEON,
+ ScaleARGBRowDownEven_NEON,
+ ScaleARGBRowDownEven_C,
+ 4,
+ 3)
+SDAANY(ScaleARGBRowDownEvenBox_Any_NEON,
+ ScaleARGBRowDownEvenBox_NEON,
+ ScaleARGBRowDownEvenBox_C,
+ 4,
+ 3)
+#endif
+#ifdef HAS_SCALEARGBROWDOWNEVEN_MSA
+SDAANY(ScaleARGBRowDownEven_Any_MSA,
+ ScaleARGBRowDownEven_MSA,
+ ScaleARGBRowDownEven_C,
+ 4,
+ 3)
+SDAANY(ScaleARGBRowDownEvenBox_Any_MSA,
+ ScaleARGBRowDownEvenBox_MSA,
+ ScaleARGBRowDownEvenBox_C,
+ 4,
+ 3)
+#endif
+
+// Add rows box filter scale down.
+#define SAANY(NAMEANY, SCALEADDROW_SIMD, SCALEADDROW_C, MASK) \
+ void NAMEANY(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width) { \
+ int n = src_width & ~MASK; \
+ if (n > 0) { \
+ SCALEADDROW_SIMD(src_ptr, dst_ptr, n); \
+ } \
+ SCALEADDROW_C(src_ptr + n, dst_ptr + n, src_width & MASK); \
+ }
+
+#ifdef HAS_SCALEADDROW_SSE2
+SAANY(ScaleAddRow_Any_SSE2, ScaleAddRow_SSE2, ScaleAddRow_C, 15)
+#endif
+#ifdef HAS_SCALEADDROW_AVX2
+SAANY(ScaleAddRow_Any_AVX2, ScaleAddRow_AVX2, ScaleAddRow_C, 31)
+#endif
+#ifdef HAS_SCALEADDROW_NEON
+SAANY(ScaleAddRow_Any_NEON, ScaleAddRow_NEON, ScaleAddRow_C, 15)
+#endif
+#ifdef HAS_SCALEADDROW_MSA
+SAANY(ScaleAddRow_Any_MSA, ScaleAddRow_MSA, ScaleAddRow_C, 15)
+#endif
+#undef SAANY
+
+#ifdef __cplusplus
+} // extern "C"
+} // namespace libyuv
+#endif
diff --git a/media/libvpx/libvpx/third_party/libyuv/source/scale_argb.cc b/media/libvpx/libvpx/third_party/libyuv/source/scale_argb.cc
new file mode 100644
index 0000000000..53a22e8b41
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/libyuv/source/scale_argb.cc
@@ -0,0 +1,1010 @@
+/*
+ * Copyright 2011 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/scale.h"
+
+#include <assert.h>
+#include <string.h>
+
+#include "libyuv/cpu_id.h"
+#include "libyuv/planar_functions.h" // For CopyARGB
+#include "libyuv/row.h"
+#include "libyuv/scale_row.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+static __inline int Abs(int v) {
+ return v >= 0 ? v : -v;
+}
+
+// ScaleARGB ARGB, 1/2
+// This is an optimized version for scaling down a ARGB to 1/2 of
+// its original size.
+static void ScaleARGBDown2(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ int src_stride,
+ int dst_stride,
+ const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ int x,
+ int dx,
+ int y,
+ int dy,
+ enum FilterMode filtering) {
+ int j;
+ int row_stride = src_stride * (dy >> 16);
+ void (*ScaleARGBRowDown2)(const uint8_t* src_argb, ptrdiff_t src_stride,
+ uint8_t* dst_argb, int dst_width) =
+ filtering == kFilterNone
+ ? ScaleARGBRowDown2_C
+ : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_C
+ : ScaleARGBRowDown2Box_C);
+ (void)src_width;
+ (void)src_height;
+ (void)dx;
+ assert(dx == 65536 * 2); // Test scale factor of 2.
+ assert((dy & 0x1ffff) == 0); // Test vertical scale is multiple of 2.
+ // Advance to odd row, even column.
+ if (filtering == kFilterBilinear) {
+ src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
+ } else {
+ src_argb += (y >> 16) * src_stride + ((x >> 16) - 1) * 4;
+ }
+
+#if defined(HAS_SCALEARGBROWDOWN2_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ ScaleARGBRowDown2 =
+ filtering == kFilterNone
+ ? ScaleARGBRowDown2_Any_SSE2
+ : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_Any_SSE2
+ : ScaleARGBRowDown2Box_Any_SSE2);
+ if (IS_ALIGNED(dst_width, 4)) {
+ ScaleARGBRowDown2 =
+ filtering == kFilterNone
+ ? ScaleARGBRowDown2_SSE2
+ : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_SSE2
+ : ScaleARGBRowDown2Box_SSE2);
+ }
+ }
+#endif
+#if defined(HAS_SCALEARGBROWDOWN2_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ScaleARGBRowDown2 =
+ filtering == kFilterNone
+ ? ScaleARGBRowDown2_Any_NEON
+ : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_Any_NEON
+ : ScaleARGBRowDown2Box_Any_NEON);
+ if (IS_ALIGNED(dst_width, 8)) {
+ ScaleARGBRowDown2 =
+ filtering == kFilterNone
+ ? ScaleARGBRowDown2_NEON
+ : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_NEON
+ : ScaleARGBRowDown2Box_NEON);
+ }
+ }
+#endif
+#if defined(HAS_SCALEARGBROWDOWN2_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ ScaleARGBRowDown2 =
+ filtering == kFilterNone
+ ? ScaleARGBRowDown2_Any_MSA
+ : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_Any_MSA
+ : ScaleARGBRowDown2Box_Any_MSA);
+ if (IS_ALIGNED(dst_width, 4)) {
+ ScaleARGBRowDown2 =
+ filtering == kFilterNone
+ ? ScaleARGBRowDown2_MSA
+ : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_MSA
+ : ScaleARGBRowDown2Box_MSA);
+ }
+ }
+#endif
+
+ if (filtering == kFilterLinear) {
+ src_stride = 0;
+ }
+ for (j = 0; j < dst_height; ++j) {
+ ScaleARGBRowDown2(src_argb, src_stride, dst_argb, dst_width);
+ src_argb += row_stride;
+ dst_argb += dst_stride;
+ }
+}
+
+// ScaleARGB ARGB, 1/4
+// This is an optimized version for scaling down a ARGB to 1/4 of
+// its original size.
+static void ScaleARGBDown4Box(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ int src_stride,
+ int dst_stride,
+ const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ int x,
+ int dx,
+ int y,
+ int dy) {
+ int j;
+ // Allocate 2 rows of ARGB.
+ const int kRowSize = (dst_width * 2 * 4 + 31) & ~31;
+ align_buffer_64(row, kRowSize * 2);
+ int row_stride = src_stride * (dy >> 16);
+ void (*ScaleARGBRowDown2)(const uint8_t* src_argb, ptrdiff_t src_stride,
+ uint8_t* dst_argb, int dst_width) =
+ ScaleARGBRowDown2Box_C;
+ // Advance to odd row, even column.
+ src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
+ (void)src_width;
+ (void)src_height;
+ (void)dx;
+ assert(dx == 65536 * 4); // Test scale factor of 4.
+ assert((dy & 0x3ffff) == 0); // Test vertical scale is multiple of 4.
+#if defined(HAS_SCALEARGBROWDOWN2_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ ScaleARGBRowDown2 = ScaleARGBRowDown2Box_Any_SSE2;
+ if (IS_ALIGNED(dst_width, 4)) {
+ ScaleARGBRowDown2 = ScaleARGBRowDown2Box_SSE2;
+ }
+ }
+#endif
+#if defined(HAS_SCALEARGBROWDOWN2_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ScaleARGBRowDown2 = ScaleARGBRowDown2Box_Any_NEON;
+ if (IS_ALIGNED(dst_width, 8)) {
+ ScaleARGBRowDown2 = ScaleARGBRowDown2Box_NEON;
+ }
+ }
+#endif
+
+ for (j = 0; j < dst_height; ++j) {
+ ScaleARGBRowDown2(src_argb, src_stride, row, dst_width * 2);
+ ScaleARGBRowDown2(src_argb + src_stride * 2, src_stride, row + kRowSize,
+ dst_width * 2);
+ ScaleARGBRowDown2(row, kRowSize, dst_argb, dst_width);
+ src_argb += row_stride;
+ dst_argb += dst_stride;
+ }
+ free_aligned_buffer_64(row);
+}
+
+// ScaleARGB ARGB Even
+// This is an optimized version for scaling down a ARGB to even
+// multiple of its original size.
+static void ScaleARGBDownEven(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ int src_stride,
+ int dst_stride,
+ const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ int x,
+ int dx,
+ int y,
+ int dy,
+ enum FilterMode filtering) {
+ int j;
+ int col_step = dx >> 16;
+ int row_stride = (dy >> 16) * src_stride;
+ void (*ScaleARGBRowDownEven)(const uint8_t* src_argb, ptrdiff_t src_stride,
+ int src_step, uint8_t* dst_argb, int dst_width) =
+ filtering ? ScaleARGBRowDownEvenBox_C : ScaleARGBRowDownEven_C;
+ (void)src_width;
+ (void)src_height;
+ assert(IS_ALIGNED(src_width, 2));
+ assert(IS_ALIGNED(src_height, 2));
+ src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
+#if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_SSE2
+ : ScaleARGBRowDownEven_Any_SSE2;
+ if (IS_ALIGNED(dst_width, 4)) {
+ ScaleARGBRowDownEven =
+ filtering ? ScaleARGBRowDownEvenBox_SSE2 : ScaleARGBRowDownEven_SSE2;
+ }
+ }
+#endif
+#if defined(HAS_SCALEARGBROWDOWNEVEN_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_NEON
+ : ScaleARGBRowDownEven_Any_NEON;
+ if (IS_ALIGNED(dst_width, 4)) {
+ ScaleARGBRowDownEven =
+ filtering ? ScaleARGBRowDownEvenBox_NEON : ScaleARGBRowDownEven_NEON;
+ }
+ }
+#endif
+#if defined(HAS_SCALEARGBROWDOWNEVEN_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_MSA
+ : ScaleARGBRowDownEven_Any_MSA;
+ if (IS_ALIGNED(dst_width, 4)) {
+ ScaleARGBRowDownEven =
+ filtering ? ScaleARGBRowDownEvenBox_MSA : ScaleARGBRowDownEven_MSA;
+ }
+ }
+#endif
+
+ if (filtering == kFilterLinear) {
+ src_stride = 0;
+ }
+ for (j = 0; j < dst_height; ++j) {
+ ScaleARGBRowDownEven(src_argb, src_stride, col_step, dst_argb, dst_width);
+ src_argb += row_stride;
+ dst_argb += dst_stride;
+ }
+}
+
+// Scale ARGB down with bilinear interpolation.
+static void ScaleARGBBilinearDown(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ int src_stride,
+ int dst_stride,
+ const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ int x,
+ int dx,
+ int y,
+ int dy,
+ enum FilterMode filtering) {
+ int j;
+ void (*InterpolateRow)(uint8_t * dst_argb, const uint8_t* src_argb,
+ ptrdiff_t src_stride, int dst_width,
+ int source_y_fraction) = InterpolateRow_C;
+ void (*ScaleARGBFilterCols)(uint8_t * dst_argb, const uint8_t* src_argb,
+ int dst_width, int x, int dx) =
+ (src_width >= 32768) ? ScaleARGBFilterCols64_C : ScaleARGBFilterCols_C;
+ int64_t xlast = x + (int64_t)(dst_width - 1) * dx;
+ int64_t xl = (dx >= 0) ? x : xlast;
+ int64_t xr = (dx >= 0) ? xlast : x;
+ int clip_src_width;
+ xl = (xl >> 16) & ~3; // Left edge aligned.
+ xr = (xr >> 16) + 1; // Right most pixel used. Bilinear uses 2 pixels.
+ xr = (xr + 1 + 3) & ~3; // 1 beyond 4 pixel aligned right most pixel.
+ if (xr > src_width) {
+ xr = src_width;
+ }
+ clip_src_width = (int)(xr - xl) * 4; // Width aligned to 4.
+ src_argb += xl * 4;
+ x -= (int)(xl << 16);
+#if defined(HAS_INTERPOLATEROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ InterpolateRow = InterpolateRow_Any_SSSE3;
+ if (IS_ALIGNED(clip_src_width, 16)) {
+ InterpolateRow = InterpolateRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_INTERPOLATEROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ InterpolateRow = InterpolateRow_Any_AVX2;
+ if (IS_ALIGNED(clip_src_width, 32)) {
+ InterpolateRow = InterpolateRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_INTERPOLATEROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ InterpolateRow = InterpolateRow_Any_NEON;
+ if (IS_ALIGNED(clip_src_width, 16)) {
+ InterpolateRow = InterpolateRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_INTERPOLATEROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ InterpolateRow = InterpolateRow_Any_MSA;
+ if (IS_ALIGNED(clip_src_width, 32)) {
+ InterpolateRow = InterpolateRow_MSA;
+ }
+ }
+#endif
+#if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
+ ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
+ }
+#endif
+#if defined(HAS_SCALEARGBFILTERCOLS_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ScaleARGBFilterCols = ScaleARGBFilterCols_Any_NEON;
+ if (IS_ALIGNED(dst_width, 4)) {
+ ScaleARGBFilterCols = ScaleARGBFilterCols_NEON;
+ }
+ }
+#endif
+#if defined(HAS_SCALEARGBFILTERCOLS_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ ScaleARGBFilterCols = ScaleARGBFilterCols_Any_MSA;
+ if (IS_ALIGNED(dst_width, 8)) {
+ ScaleARGBFilterCols = ScaleARGBFilterCols_MSA;
+ }
+ }
+#endif
+ // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
+ // Allocate a row of ARGB.
+ {
+ align_buffer_64(row, clip_src_width * 4);
+
+ const int max_y = (src_height - 1) << 16;
+ if (y > max_y) {
+ y = max_y;
+ }
+ for (j = 0; j < dst_height; ++j) {
+ int yi = y >> 16;
+ const uint8_t* src = src_argb + yi * src_stride;
+ if (filtering == kFilterLinear) {
+ ScaleARGBFilterCols(dst_argb, src, dst_width, x, dx);
+ } else {
+ int yf = (y >> 8) & 255;
+ InterpolateRow(row, src, src_stride, clip_src_width, yf);
+ ScaleARGBFilterCols(dst_argb, row, dst_width, x, dx);
+ }
+ dst_argb += dst_stride;
+ y += dy;
+ if (y > max_y) {
+ y = max_y;
+ }
+ }
+ free_aligned_buffer_64(row);
+ }
+}
+
+// Scale ARGB up with bilinear interpolation.
+static void ScaleARGBBilinearUp(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ int src_stride,
+ int dst_stride,
+ const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ int x,
+ int dx,
+ int y,
+ int dy,
+ enum FilterMode filtering) {
+ int j;
+ void (*InterpolateRow)(uint8_t * dst_argb, const uint8_t* src_argb,
+ ptrdiff_t src_stride, int dst_width,
+ int source_y_fraction) = InterpolateRow_C;
+ void (*ScaleARGBFilterCols)(uint8_t * dst_argb, const uint8_t* src_argb,
+ int dst_width, int x, int dx) =
+ filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
+ const int max_y = (src_height - 1) << 16;
+#if defined(HAS_INTERPOLATEROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ InterpolateRow = InterpolateRow_Any_SSSE3;
+ if (IS_ALIGNED(dst_width, 4)) {
+ InterpolateRow = InterpolateRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_INTERPOLATEROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ InterpolateRow = InterpolateRow_Any_AVX2;
+ if (IS_ALIGNED(dst_width, 8)) {
+ InterpolateRow = InterpolateRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_INTERPOLATEROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ InterpolateRow = InterpolateRow_Any_NEON;
+ if (IS_ALIGNED(dst_width, 4)) {
+ InterpolateRow = InterpolateRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_INTERPOLATEROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ InterpolateRow = InterpolateRow_Any_MSA;
+ if (IS_ALIGNED(dst_width, 8)) {
+ InterpolateRow = InterpolateRow_MSA;
+ }
+ }
+#endif
+ if (src_width >= 32768) {
+ ScaleARGBFilterCols =
+ filtering ? ScaleARGBFilterCols64_C : ScaleARGBCols64_C;
+ }
+#if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
+ if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
+ ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
+ }
+#endif
+#if defined(HAS_SCALEARGBFILTERCOLS_NEON)
+ if (filtering && TestCpuFlag(kCpuHasNEON)) {
+ ScaleARGBFilterCols = ScaleARGBFilterCols_Any_NEON;
+ if (IS_ALIGNED(dst_width, 4)) {
+ ScaleARGBFilterCols = ScaleARGBFilterCols_NEON;
+ }
+ }
+#endif
+#if defined(HAS_SCALEARGBFILTERCOLS_MSA)
+ if (filtering && TestCpuFlag(kCpuHasMSA)) {
+ ScaleARGBFilterCols = ScaleARGBFilterCols_Any_MSA;
+ if (IS_ALIGNED(dst_width, 8)) {
+ ScaleARGBFilterCols = ScaleARGBFilterCols_MSA;
+ }
+ }
+#endif
+#if defined(HAS_SCALEARGBCOLS_SSE2)
+ if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
+ ScaleARGBFilterCols = ScaleARGBCols_SSE2;
+ }
+#endif
+#if defined(HAS_SCALEARGBCOLS_NEON)
+ if (!filtering && TestCpuFlag(kCpuHasNEON)) {
+ ScaleARGBFilterCols = ScaleARGBCols_Any_NEON;
+ if (IS_ALIGNED(dst_width, 8)) {
+ ScaleARGBFilterCols = ScaleARGBCols_NEON;
+ }
+ }
+#endif
+#if defined(HAS_SCALEARGBCOLS_MSA)
+ if (!filtering && TestCpuFlag(kCpuHasMSA)) {
+ ScaleARGBFilterCols = ScaleARGBCols_Any_MSA;
+ if (IS_ALIGNED(dst_width, 4)) {
+ ScaleARGBFilterCols = ScaleARGBCols_MSA;
+ }
+ }
+#endif
+ if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
+ ScaleARGBFilterCols = ScaleARGBColsUp2_C;
+#if defined(HAS_SCALEARGBCOLSUP2_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
+ ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2;
+ }
+#endif
+ }
+
+ if (y > max_y) {
+ y = max_y;
+ }
+
+ {
+ int yi = y >> 16;
+ const uint8_t* src = src_argb + yi * src_stride;
+
+ // Allocate 2 rows of ARGB.
+ const int kRowSize = (dst_width * 4 + 31) & ~31;
+ align_buffer_64(row, kRowSize * 2);
+
+ uint8_t* rowptr = row;
+ int rowstride = kRowSize;
+ int lasty = yi;
+
+ ScaleARGBFilterCols(rowptr, src, dst_width, x, dx);
+ if (src_height > 1) {
+ src += src_stride;
+ }
+ ScaleARGBFilterCols(rowptr + rowstride, src, dst_width, x, dx);
+ src += src_stride;
+
+ for (j = 0; j < dst_height; ++j) {
+ yi = y >> 16;
+ if (yi != lasty) {
+ if (y > max_y) {
+ y = max_y;
+ yi = y >> 16;
+ src = src_argb + yi * src_stride;
+ }
+ if (yi != lasty) {
+ ScaleARGBFilterCols(rowptr, src, dst_width, x, dx);
+ rowptr += rowstride;
+ rowstride = -rowstride;
+ lasty = yi;
+ src += src_stride;
+ }
+ }
+ if (filtering == kFilterLinear) {
+ InterpolateRow(dst_argb, rowptr, 0, dst_width * 4, 0);
+ } else {
+ int yf = (y >> 8) & 255;
+ InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf);
+ }
+ dst_argb += dst_stride;
+ y += dy;
+ }
+ free_aligned_buffer_64(row);
+ }
+}
+
+#ifdef YUVSCALEUP
+// Scale YUV to ARGB up with bilinear interpolation.
+static void ScaleYUVToARGBBilinearUp(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ int src_stride_y,
+ int src_stride_u,
+ int src_stride_v,
+ int dst_stride_argb,
+ const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb,
+ int x,
+ int dx,
+ int y,
+ int dy,
+ enum FilterMode filtering) {
+ int j;
+ void (*I422ToARGBRow)(const uint8_t* y_buf, const uint8_t* u_buf,
+ const uint8_t* v_buf, uint8_t* rgb_buf, int width) =
+ I422ToARGBRow_C;
+#if defined(HAS_I422TOARGBROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
+ if (IS_ALIGNED(src_width, 8)) {
+ I422ToARGBRow = I422ToARGBRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_I422TOARGBROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ I422ToARGBRow = I422ToARGBRow_Any_AVX2;
+ if (IS_ALIGNED(src_width, 16)) {
+ I422ToARGBRow = I422ToARGBRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_I422TOARGBROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ I422ToARGBRow = I422ToARGBRow_Any_NEON;
+ if (IS_ALIGNED(src_width, 8)) {
+ I422ToARGBRow = I422ToARGBRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_I422TOARGBROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ I422ToARGBRow = I422ToARGBRow_Any_MSA;
+ if (IS_ALIGNED(src_width, 8)) {
+ I422ToARGBRow = I422ToARGBRow_MSA;
+ }
+ }
+#endif
+
+ void (*InterpolateRow)(uint8_t * dst_argb, const uint8_t* src_argb,
+ ptrdiff_t src_stride, int dst_width,
+ int source_y_fraction) = InterpolateRow_C;
+#if defined(HAS_INTERPOLATEROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ InterpolateRow = InterpolateRow_Any_SSSE3;
+ if (IS_ALIGNED(dst_width, 4)) {
+ InterpolateRow = InterpolateRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_INTERPOLATEROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ InterpolateRow = InterpolateRow_Any_AVX2;
+ if (IS_ALIGNED(dst_width, 8)) {
+ InterpolateRow = InterpolateRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_INTERPOLATEROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ InterpolateRow = InterpolateRow_Any_NEON;
+ if (IS_ALIGNED(dst_width, 4)) {
+ InterpolateRow = InterpolateRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_INTERPOLATEROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ InterpolateRow = InterpolateRow_Any_MSA;
+ if (IS_ALIGNED(dst_width, 8)) {
+ InterpolateRow = InterpolateRow_MSA;
+ }
+ }
+#endif
+
+ void (*ScaleARGBFilterCols)(uint8_t * dst_argb, const uint8_t* src_argb,
+ int dst_width, int x, int dx) =
+ filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
+ if (src_width >= 32768) {
+ ScaleARGBFilterCols =
+ filtering ? ScaleARGBFilterCols64_C : ScaleARGBCols64_C;
+ }
+#if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
+ if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
+ ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
+ }
+#endif
+#if defined(HAS_SCALEARGBFILTERCOLS_NEON)
+ if (filtering && TestCpuFlag(kCpuHasNEON)) {
+ ScaleARGBFilterCols = ScaleARGBFilterCols_Any_NEON;
+ if (IS_ALIGNED(dst_width, 4)) {
+ ScaleARGBFilterCols = ScaleARGBFilterCols_NEON;
+ }
+ }
+#endif
+#if defined(HAS_SCALEARGBFILTERCOLS_MSA)
+ if (filtering && TestCpuFlag(kCpuHasMSA)) {
+ ScaleARGBFilterCols = ScaleARGBFilterCols_Any_MSA;
+ if (IS_ALIGNED(dst_width, 8)) {
+ ScaleARGBFilterCols = ScaleARGBFilterCols_MSA;
+ }
+ }
+#endif
+#if defined(HAS_SCALEARGBCOLS_SSE2)
+ if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
+ ScaleARGBFilterCols = ScaleARGBCols_SSE2;
+ }
+#endif
+#if defined(HAS_SCALEARGBCOLS_NEON)
+ if (!filtering && TestCpuFlag(kCpuHasNEON)) {
+ ScaleARGBFilterCols = ScaleARGBCols_Any_NEON;
+ if (IS_ALIGNED(dst_width, 8)) {
+ ScaleARGBFilterCols = ScaleARGBCols_NEON;
+ }
+ }
+#endif
+#if defined(HAS_SCALEARGBCOLS_MSA)
+ if (!filtering && TestCpuFlag(kCpuHasMSA)) {
+ ScaleARGBFilterCols = ScaleARGBCols_Any_MSA;
+ if (IS_ALIGNED(dst_width, 4)) {
+ ScaleARGBFilterCols = ScaleARGBCols_MSA;
+ }
+ }
+#endif
+ if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
+ ScaleARGBFilterCols = ScaleARGBColsUp2_C;
+#if defined(HAS_SCALEARGBCOLSUP2_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
+ ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2;
+ }
+#endif
+ }
+
+ const int max_y = (src_height - 1) << 16;
+ if (y > max_y) {
+ y = max_y;
+ }
+ const int kYShift = 1; // Shift Y by 1 to convert Y plane to UV coordinate.
+ int yi = y >> 16;
+ int uv_yi = yi >> kYShift;
+ const uint8_t* src_row_y = src_y + yi * src_stride_y;
+ const uint8_t* src_row_u = src_u + uv_yi * src_stride_u;
+ const uint8_t* src_row_v = src_v + uv_yi * src_stride_v;
+
+ // Allocate 2 rows of ARGB.
+ const int kRowSize = (dst_width * 4 + 31) & ~31;
+ align_buffer_64(row, kRowSize * 2);
+
+ // Allocate 1 row of ARGB for source conversion.
+ align_buffer_64(argb_row, src_width * 4);
+
+ uint8_t* rowptr = row;
+ int rowstride = kRowSize;
+ int lasty = yi;
+
+ // TODO(fbarchard): Convert first 2 rows of YUV to ARGB.
+ ScaleARGBFilterCols(rowptr, src_row_y, dst_width, x, dx);
+ if (src_height > 1) {
+ src_row_y += src_stride_y;
+ if (yi & 1) {
+ src_row_u += src_stride_u;
+ src_row_v += src_stride_v;
+ }
+ }
+ ScaleARGBFilterCols(rowptr + rowstride, src_row_y, dst_width, x, dx);
+ if (src_height > 2) {
+ src_row_y += src_stride_y;
+ if (!(yi & 1)) {
+ src_row_u += src_stride_u;
+ src_row_v += src_stride_v;
+ }
+ }
+
+ for (j = 0; j < dst_height; ++j) {
+ yi = y >> 16;
+ if (yi != lasty) {
+ if (y > max_y) {
+ y = max_y;
+ yi = y >> 16;
+ uv_yi = yi >> kYShift;
+ src_row_y = src_y + yi * src_stride_y;
+ src_row_u = src_u + uv_yi * src_stride_u;
+ src_row_v = src_v + uv_yi * src_stride_v;
+ }
+ if (yi != lasty) {
+ // TODO(fbarchard): Convert the clipped region of row.
+ I422ToARGBRow(src_row_y, src_row_u, src_row_v, argb_row, src_width);
+ ScaleARGBFilterCols(rowptr, argb_row, dst_width, x, dx);
+ rowptr += rowstride;
+ rowstride = -rowstride;
+ lasty = yi;
+ src_row_y += src_stride_y;
+ if (yi & 1) {
+ src_row_u += src_stride_u;
+ src_row_v += src_stride_v;
+ }
+ }
+ }
+ if (filtering == kFilterLinear) {
+ InterpolateRow(dst_argb, rowptr, 0, dst_width * 4, 0);
+ } else {
+ int yf = (y >> 8) & 255;
+ InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf);
+ }
+ dst_argb += dst_stride_argb;
+ y += dy;
+ }
+ free_aligned_buffer_64(row);
+ free_aligned_buffer_64(row_argb);
+}
+#endif
+
+// Scale ARGB to/from any dimensions, without interpolation.
+// Fixed point math is used for performance: The upper 16 bits
+// of x and dx is the integer part of the source position and
+// the lower 16 bits are the fixed decimal part.
+
+static void ScaleARGBSimple(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ int src_stride,
+ int dst_stride,
+ const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ int x,
+ int dx,
+ int y,
+ int dy) {
+ int j;
+ void (*ScaleARGBCols)(uint8_t * dst_argb, const uint8_t* src_argb,
+ int dst_width, int x, int dx) =
+ (src_width >= 32768) ? ScaleARGBCols64_C : ScaleARGBCols_C;
+ (void)src_height;
+#if defined(HAS_SCALEARGBCOLS_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
+ ScaleARGBCols = ScaleARGBCols_SSE2;
+ }
+#endif
+#if defined(HAS_SCALEARGBCOLS_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ScaleARGBCols = ScaleARGBCols_Any_NEON;
+ if (IS_ALIGNED(dst_width, 8)) {
+ ScaleARGBCols = ScaleARGBCols_NEON;
+ }
+ }
+#endif
+#if defined(HAS_SCALEARGBCOLS_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ ScaleARGBCols = ScaleARGBCols_Any_MSA;
+ if (IS_ALIGNED(dst_width, 4)) {
+ ScaleARGBCols = ScaleARGBCols_MSA;
+ }
+ }
+#endif
+ if (src_width * 2 == dst_width && x < 0x8000) {
+ ScaleARGBCols = ScaleARGBColsUp2_C;
+#if defined(HAS_SCALEARGBCOLSUP2_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
+ ScaleARGBCols = ScaleARGBColsUp2_SSE2;
+ }
+#endif
+ }
+
+ for (j = 0; j < dst_height; ++j) {
+ ScaleARGBCols(dst_argb, src_argb + (y >> 16) * src_stride, dst_width, x,
+ dx);
+ dst_argb += dst_stride;
+ y += dy;
+ }
+}
+
+// ScaleARGB a ARGB.
+// This function in turn calls a scaling function
+// suitable for handling the desired resolutions.
+static void ScaleARGB(const uint8_t* src,
+ int src_stride,
+ int src_width,
+ int src_height,
+ uint8_t* dst,
+ int dst_stride,
+ int dst_width,
+ int dst_height,
+ int clip_x,
+ int clip_y,
+ int clip_width,
+ int clip_height,
+ enum FilterMode filtering) {
+ // Initial source x/y coordinate and step values as 16.16 fixed point.
+ int x = 0;
+ int y = 0;
+ int dx = 0;
+ int dy = 0;
+ // ARGB does not support box filter yet, but allow the user to pass it.
+ // Simplify filtering when possible.
+ filtering = ScaleFilterReduce(src_width, src_height, dst_width, dst_height,
+ filtering);
+
+ // Negative src_height means invert the image.
+ if (src_height < 0) {
+ src_height = -src_height;
+ src = src + (src_height - 1) * src_stride;
+ src_stride = -src_stride;
+ }
+ ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
+ &dx, &dy);
+ src_width = Abs(src_width);
+ if (clip_x) {
+ int64_t clipf = (int64_t)(clip_x)*dx;
+ x += (clipf & 0xffff);
+ src += (clipf >> 16) * 4;
+ dst += clip_x * 4;
+ }
+ if (clip_y) {
+ int64_t clipf = (int64_t)(clip_y)*dy;
+ y += (clipf & 0xffff);
+ src += (clipf >> 16) * src_stride;
+ dst += clip_y * dst_stride;
+ }
+
+ // Special case for integer step values.
+ if (((dx | dy) & 0xffff) == 0) {
+ if (!dx || !dy) { // 1 pixel wide and/or tall.
+ filtering = kFilterNone;
+ } else {
+ // Optimized even scale down. ie 2, 4, 6, 8, 10x.
+ if (!(dx & 0x10000) && !(dy & 0x10000)) {
+ if (dx == 0x20000) {
+ // Optimized 1/2 downsample.
+ ScaleARGBDown2(src_width, src_height, clip_width, clip_height,
+ src_stride, dst_stride, src, dst, x, dx, y, dy,
+ filtering);
+ return;
+ }
+ if (dx == 0x40000 && filtering == kFilterBox) {
+ // Optimized 1/4 box downsample.
+ ScaleARGBDown4Box(src_width, src_height, clip_width, clip_height,
+ src_stride, dst_stride, src, dst, x, dx, y, dy);
+ return;
+ }
+ ScaleARGBDownEven(src_width, src_height, clip_width, clip_height,
+ src_stride, dst_stride, src, dst, x, dx, y, dy,
+ filtering);
+ return;
+ }
+ // Optimized odd scale down. ie 3, 5, 7, 9x.
+ if ((dx & 0x10000) && (dy & 0x10000)) {
+ filtering = kFilterNone;
+ if (dx == 0x10000 && dy == 0x10000) {
+ // Straight copy.
+ ARGBCopy(src + (y >> 16) * src_stride + (x >> 16) * 4, src_stride,
+ dst, dst_stride, clip_width, clip_height);
+ return;
+ }
+ }
+ }
+ }
+ if (dx == 0x10000 && (x & 0xffff) == 0) {
+ // Arbitrary scale vertically, but unscaled vertically.
+ ScalePlaneVertical(src_height, clip_width, clip_height, src_stride,
+ dst_stride, src, dst, x, y, dy, 4, filtering);
+ return;
+ }
+ if (filtering && dy < 65536) {
+ ScaleARGBBilinearUp(src_width, src_height, clip_width, clip_height,
+ src_stride, dst_stride, src, dst, x, dx, y, dy,
+ filtering);
+ return;
+ }
+ if (filtering) {
+ ScaleARGBBilinearDown(src_width, src_height, clip_width, clip_height,
+ src_stride, dst_stride, src, dst, x, dx, y, dy,
+ filtering);
+ return;
+ }
+ ScaleARGBSimple(src_width, src_height, clip_width, clip_height, src_stride,
+ dst_stride, src, dst, x, dx, y, dy);
+}
+
+LIBYUV_API
+int ARGBScaleClip(const uint8_t* src_argb,
+ int src_stride_argb,
+ int src_width,
+ int src_height,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int dst_width,
+ int dst_height,
+ int clip_x,
+ int clip_y,
+ int clip_width,
+ int clip_height,
+ enum FilterMode filtering) {
+ if (!src_argb || src_width == 0 || src_height == 0 || !dst_argb ||
+ dst_width <= 0 || dst_height <= 0 || clip_x < 0 || clip_y < 0 ||
+ clip_width > 32768 || clip_height > 32768 ||
+ (clip_x + clip_width) > dst_width ||
+ (clip_y + clip_height) > dst_height) {
+ return -1;
+ }
+ ScaleARGB(src_argb, src_stride_argb, src_width, src_height, dst_argb,
+ dst_stride_argb, dst_width, dst_height, clip_x, clip_y, clip_width,
+ clip_height, filtering);
+ return 0;
+}
+
+// Scale an ARGB image.
+LIBYUV_API
+int ARGBScale(const uint8_t* src_argb,
+ int src_stride_argb,
+ int src_width,
+ int src_height,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int dst_width,
+ int dst_height,
+ enum FilterMode filtering) {
+ if (!src_argb || src_width == 0 || src_height == 0 || src_width > 32768 ||
+ src_height > 32768 || !dst_argb || dst_width <= 0 || dst_height <= 0) {
+ return -1;
+ }
+ ScaleARGB(src_argb, src_stride_argb, src_width, src_height, dst_argb,
+ dst_stride_argb, dst_width, dst_height, 0, 0, dst_width, dst_height,
+ filtering);
+ return 0;
+}
+
+// Scale with YUV conversion to ARGB and clipping.
+LIBYUV_API
+int YUVToARGBScaleClip(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint32_t src_fourcc,
+ int src_width,
+ int src_height,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ uint32_t dst_fourcc,
+ int dst_width,
+ int dst_height,
+ int clip_x,
+ int clip_y,
+ int clip_width,
+ int clip_height,
+ enum FilterMode filtering) {
+ uint8_t* argb_buffer = (uint8_t*)malloc(src_width * src_height * 4);
+ int r;
+ (void)src_fourcc; // TODO(fbarchard): implement and/or assert.
+ (void)dst_fourcc;
+ I420ToARGB(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v,
+ argb_buffer, src_width * 4, src_width, src_height);
+
+ r = ARGBScaleClip(argb_buffer, src_width * 4, src_width, src_height, dst_argb,
+ dst_stride_argb, dst_width, dst_height, clip_x, clip_y,
+ clip_width, clip_height, filtering);
+ free(argb_buffer);
+ return r;
+}
+
+#ifdef __cplusplus
+} // extern "C"
+} // namespace libyuv
+#endif
diff --git a/media/libvpx/libvpx/third_party/libyuv/source/scale_common.cc b/media/libvpx/libvpx/third_party/libyuv/source/scale_common.cc
new file mode 100644
index 0000000000..b28d7da41f
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/libyuv/source/scale_common.cc
@@ -0,0 +1,1323 @@
+/*
+ * Copyright 2013 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/scale.h"
+
+#include <assert.h>
+#include <string.h>
+
+#include "libyuv/cpu_id.h"
+#include "libyuv/planar_functions.h" // For CopyARGB
+#include "libyuv/row.h"
+#include "libyuv/scale_row.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+static __inline int Abs(int v) {
+ return v >= 0 ? v : -v;
+}
+
+// CPU agnostic row functions
+void ScaleRowDown2_C(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width) {
+ int x;
+ (void)src_stride;
+ for (x = 0; x < dst_width - 1; x += 2) {
+ dst[0] = src_ptr[1];
+ dst[1] = src_ptr[3];
+ dst += 2;
+ src_ptr += 4;
+ }
+ if (dst_width & 1) {
+ dst[0] = src_ptr[1];
+ }
+}
+
+void ScaleRowDown2_16_C(const uint16_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint16_t* dst,
+ int dst_width) {
+ int x;
+ (void)src_stride;
+ for (x = 0; x < dst_width - 1; x += 2) {
+ dst[0] = src_ptr[1];
+ dst[1] = src_ptr[3];
+ dst += 2;
+ src_ptr += 4;
+ }
+ if (dst_width & 1) {
+ dst[0] = src_ptr[1];
+ }
+}
+
+void ScaleRowDown2Linear_C(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width) {
+ const uint8_t* s = src_ptr;
+ int x;
+ (void)src_stride;
+ for (x = 0; x < dst_width - 1; x += 2) {
+ dst[0] = (s[0] + s[1] + 1) >> 1;
+ dst[1] = (s[2] + s[3] + 1) >> 1;
+ dst += 2;
+ s += 4;
+ }
+ if (dst_width & 1) {
+ dst[0] = (s[0] + s[1] + 1) >> 1;
+ }
+}
+
+void ScaleRowDown2Linear_16_C(const uint16_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint16_t* dst,
+ int dst_width) {
+ const uint16_t* s = src_ptr;
+ int x;
+ (void)src_stride;
+ for (x = 0; x < dst_width - 1; x += 2) {
+ dst[0] = (s[0] + s[1] + 1) >> 1;
+ dst[1] = (s[2] + s[3] + 1) >> 1;
+ dst += 2;
+ s += 4;
+ }
+ if (dst_width & 1) {
+ dst[0] = (s[0] + s[1] + 1) >> 1;
+ }
+}
+
+void ScaleRowDown2Box_C(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width) {
+ const uint8_t* s = src_ptr;
+ const uint8_t* t = src_ptr + src_stride;
+ int x;
+ for (x = 0; x < dst_width - 1; x += 2) {
+ dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
+ dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2;
+ dst += 2;
+ s += 4;
+ t += 4;
+ }
+ if (dst_width & 1) {
+ dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
+ }
+}
+
+void ScaleRowDown2Box_Odd_C(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width) {
+ const uint8_t* s = src_ptr;
+ const uint8_t* t = src_ptr + src_stride;
+ int x;
+ dst_width -= 1;
+ for (x = 0; x < dst_width - 1; x += 2) {
+ dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
+ dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2;
+ dst += 2;
+ s += 4;
+ t += 4;
+ }
+ if (dst_width & 1) {
+ dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
+ dst += 1;
+ s += 2;
+ t += 2;
+ }
+ dst[0] = (s[0] + t[0] + 1) >> 1;
+}
+
+void ScaleRowDown2Box_16_C(const uint16_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint16_t* dst,
+ int dst_width) {
+ const uint16_t* s = src_ptr;
+ const uint16_t* t = src_ptr + src_stride;
+ int x;
+ for (x = 0; x < dst_width - 1; x += 2) {
+ dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
+ dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2;
+ dst += 2;
+ s += 4;
+ t += 4;
+ }
+ if (dst_width & 1) {
+ dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
+ }
+}
+
+void ScaleRowDown4_C(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width) {
+ int x;
+ (void)src_stride;
+ for (x = 0; x < dst_width - 1; x += 2) {
+ dst[0] = src_ptr[2];
+ dst[1] = src_ptr[6];
+ dst += 2;
+ src_ptr += 8;
+ }
+ if (dst_width & 1) {
+ dst[0] = src_ptr[2];
+ }
+}
+
+void ScaleRowDown4_16_C(const uint16_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint16_t* dst,
+ int dst_width) {
+ int x;
+ (void)src_stride;
+ for (x = 0; x < dst_width - 1; x += 2) {
+ dst[0] = src_ptr[2];
+ dst[1] = src_ptr[6];
+ dst += 2;
+ src_ptr += 8;
+ }
+ if (dst_width & 1) {
+ dst[0] = src_ptr[2];
+ }
+}
+
+void ScaleRowDown4Box_C(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width) {
+ intptr_t stride = src_stride;
+ int x;
+ for (x = 0; x < dst_width - 1; x += 2) {
+ dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
+ src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] +
+ src_ptr[stride + 3] + src_ptr[stride * 2 + 0] +
+ src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] +
+ src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] +
+ src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] +
+ src_ptr[stride * 3 + 3] + 8) >>
+ 4;
+ dst[1] = (src_ptr[4] + src_ptr[5] + src_ptr[6] + src_ptr[7] +
+ src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride + 6] +
+ src_ptr[stride + 7] + src_ptr[stride * 2 + 4] +
+ src_ptr[stride * 2 + 5] + src_ptr[stride * 2 + 6] +
+ src_ptr[stride * 2 + 7] + src_ptr[stride * 3 + 4] +
+ src_ptr[stride * 3 + 5] + src_ptr[stride * 3 + 6] +
+ src_ptr[stride * 3 + 7] + 8) >>
+ 4;
+ dst += 2;
+ src_ptr += 8;
+ }
+ if (dst_width & 1) {
+ dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
+ src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] +
+ src_ptr[stride + 3] + src_ptr[stride * 2 + 0] +
+ src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] +
+ src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] +
+ src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] +
+ src_ptr[stride * 3 + 3] + 8) >>
+ 4;
+ }
+}
+
+void ScaleRowDown4Box_16_C(const uint16_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint16_t* dst,
+ int dst_width) {
+ intptr_t stride = src_stride;
+ int x;
+ for (x = 0; x < dst_width - 1; x += 2) {
+ dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
+ src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] +
+ src_ptr[stride + 3] + src_ptr[stride * 2 + 0] +
+ src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] +
+ src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] +
+ src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] +
+ src_ptr[stride * 3 + 3] + 8) >>
+ 4;
+ dst[1] = (src_ptr[4] + src_ptr[5] + src_ptr[6] + src_ptr[7] +
+ src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride + 6] +
+ src_ptr[stride + 7] + src_ptr[stride * 2 + 4] +
+ src_ptr[stride * 2 + 5] + src_ptr[stride * 2 + 6] +
+ src_ptr[stride * 2 + 7] + src_ptr[stride * 3 + 4] +
+ src_ptr[stride * 3 + 5] + src_ptr[stride * 3 + 6] +
+ src_ptr[stride * 3 + 7] + 8) >>
+ 4;
+ dst += 2;
+ src_ptr += 8;
+ }
+ if (dst_width & 1) {
+ dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
+ src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] +
+ src_ptr[stride + 3] + src_ptr[stride * 2 + 0] +
+ src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] +
+ src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] +
+ src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] +
+ src_ptr[stride * 3 + 3] + 8) >>
+ 4;
+ }
+}
+
+void ScaleRowDown34_C(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width) {
+ int x;
+ (void)src_stride;
+ assert((dst_width % 3 == 0) && (dst_width > 0));
+ for (x = 0; x < dst_width; x += 3) {
+ dst[0] = src_ptr[0];
+ dst[1] = src_ptr[1];
+ dst[2] = src_ptr[3];
+ dst += 3;
+ src_ptr += 4;
+ }
+}
+
+void ScaleRowDown34_16_C(const uint16_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint16_t* dst,
+ int dst_width) {
+ int x;
+ (void)src_stride;
+ assert((dst_width % 3 == 0) && (dst_width > 0));
+ for (x = 0; x < dst_width; x += 3) {
+ dst[0] = src_ptr[0];
+ dst[1] = src_ptr[1];
+ dst[2] = src_ptr[3];
+ dst += 3;
+ src_ptr += 4;
+ }
+}
+
+// Filter rows 0 and 1 together, 3 : 1
+void ScaleRowDown34_0_Box_C(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* d,
+ int dst_width) {
+ const uint8_t* s = src_ptr;
+ const uint8_t* t = src_ptr + src_stride;
+ int x;
+ assert((dst_width % 3 == 0) && (dst_width > 0));
+ for (x = 0; x < dst_width; x += 3) {
+ uint8_t a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
+ uint8_t a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
+ uint8_t a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
+ uint8_t b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
+ uint8_t b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
+ uint8_t b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
+ d[0] = (a0 * 3 + b0 + 2) >> 2;
+ d[1] = (a1 * 3 + b1 + 2) >> 2;
+ d[2] = (a2 * 3 + b2 + 2) >> 2;
+ d += 3;
+ s += 4;
+ t += 4;
+ }
+}
+
+void ScaleRowDown34_0_Box_16_C(const uint16_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint16_t* d,
+ int dst_width) {
+ const uint16_t* s = src_ptr;
+ const uint16_t* t = src_ptr + src_stride;
+ int x;
+ assert((dst_width % 3 == 0) && (dst_width > 0));
+ for (x = 0; x < dst_width; x += 3) {
+ uint16_t a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
+ uint16_t a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
+ uint16_t a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
+ uint16_t b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
+ uint16_t b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
+ uint16_t b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
+ d[0] = (a0 * 3 + b0 + 2) >> 2;
+ d[1] = (a1 * 3 + b1 + 2) >> 2;
+ d[2] = (a2 * 3 + b2 + 2) >> 2;
+ d += 3;
+ s += 4;
+ t += 4;
+ }
+}
+
+// Filter rows 1 and 2 together, 1 : 1
+void ScaleRowDown34_1_Box_C(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* d,
+ int dst_width) {
+ const uint8_t* s = src_ptr;
+ const uint8_t* t = src_ptr + src_stride;
+ int x;
+ assert((dst_width % 3 == 0) && (dst_width > 0));
+ for (x = 0; x < dst_width; x += 3) {
+ uint8_t a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
+ uint8_t a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
+ uint8_t a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
+ uint8_t b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
+ uint8_t b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
+ uint8_t b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
+ d[0] = (a0 + b0 + 1) >> 1;
+ d[1] = (a1 + b1 + 1) >> 1;
+ d[2] = (a2 + b2 + 1) >> 1;
+ d += 3;
+ s += 4;
+ t += 4;
+ }
+}
+
+void ScaleRowDown34_1_Box_16_C(const uint16_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint16_t* d,
+ int dst_width) {
+ const uint16_t* s = src_ptr;
+ const uint16_t* t = src_ptr + src_stride;
+ int x;
+ assert((dst_width % 3 == 0) && (dst_width > 0));
+ for (x = 0; x < dst_width; x += 3) {
+ uint16_t a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
+ uint16_t a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
+ uint16_t a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
+ uint16_t b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
+ uint16_t b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
+ uint16_t b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
+ d[0] = (a0 + b0 + 1) >> 1;
+ d[1] = (a1 + b1 + 1) >> 1;
+ d[2] = (a2 + b2 + 1) >> 1;
+ d += 3;
+ s += 4;
+ t += 4;
+ }
+}
+
+// Scales a single row of pixels using point sampling.
+void ScaleCols_C(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
+ int dst_width,
+ int x,
+ int dx) {
+ int j;
+ for (j = 0; j < dst_width - 1; j += 2) {
+ dst_ptr[0] = src_ptr[x >> 16];
+ x += dx;
+ dst_ptr[1] = src_ptr[x >> 16];
+ x += dx;
+ dst_ptr += 2;
+ }
+ if (dst_width & 1) {
+ dst_ptr[0] = src_ptr[x >> 16];
+ }
+}
+
+void ScaleCols_16_C(uint16_t* dst_ptr,
+ const uint16_t* src_ptr,
+ int dst_width,
+ int x,
+ int dx) {
+ int j;
+ for (j = 0; j < dst_width - 1; j += 2) {
+ dst_ptr[0] = src_ptr[x >> 16];
+ x += dx;
+ dst_ptr[1] = src_ptr[x >> 16];
+ x += dx;
+ dst_ptr += 2;
+ }
+ if (dst_width & 1) {
+ dst_ptr[0] = src_ptr[x >> 16];
+ }
+}
+
+// Scales a single row of pixels up by 2x using point sampling.
+void ScaleColsUp2_C(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
+ int dst_width,
+ int x,
+ int dx) {
+ int j;
+ (void)x;
+ (void)dx;
+ for (j = 0; j < dst_width - 1; j += 2) {
+ dst_ptr[1] = dst_ptr[0] = src_ptr[0];
+ src_ptr += 1;
+ dst_ptr += 2;
+ }
+ if (dst_width & 1) {
+ dst_ptr[0] = src_ptr[0];
+ }
+}
+
+void ScaleColsUp2_16_C(uint16_t* dst_ptr,
+ const uint16_t* src_ptr,
+ int dst_width,
+ int x,
+ int dx) {
+ int j;
+ (void)x;
+ (void)dx;
+ for (j = 0; j < dst_width - 1; j += 2) {
+ dst_ptr[1] = dst_ptr[0] = src_ptr[0];
+ src_ptr += 1;
+ dst_ptr += 2;
+ }
+ if (dst_width & 1) {
+ dst_ptr[0] = src_ptr[0];
+ }
+}
+
+// (1-f)a + fb can be replaced with a + f(b-a)
+#if defined(__arm__) || defined(__aarch64__)
+#define BLENDER(a, b, f) \
+ (uint8_t)((int)(a) + ((((int)((f)) * ((int)(b) - (int)(a))) + 0x8000) >> 16))
+#else
+// Intel uses 7 bit math with rounding.
+#define BLENDER(a, b, f) \
+ (uint8_t)((int)(a) + (((int)((f) >> 9) * ((int)(b) - (int)(a)) + 0x40) >> 7))
+#endif
+
+void ScaleFilterCols_C(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
+ int dst_width,
+ int x,
+ int dx) {
+ int j;
+ for (j = 0; j < dst_width - 1; j += 2) {
+ int xi = x >> 16;
+ int a = src_ptr[xi];
+ int b = src_ptr[xi + 1];
+ dst_ptr[0] = BLENDER(a, b, x & 0xffff);
+ x += dx;
+ xi = x >> 16;
+ a = src_ptr[xi];
+ b = src_ptr[xi + 1];
+ dst_ptr[1] = BLENDER(a, b, x & 0xffff);
+ x += dx;
+ dst_ptr += 2;
+ }
+ if (dst_width & 1) {
+ int xi = x >> 16;
+ int a = src_ptr[xi];
+ int b = src_ptr[xi + 1];
+ dst_ptr[0] = BLENDER(a, b, x & 0xffff);
+ }
+}
+
+void ScaleFilterCols64_C(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
+ int dst_width,
+ int x32,
+ int dx) {
+ int64_t x = (int64_t)(x32);
+ int j;
+ for (j = 0; j < dst_width - 1; j += 2) {
+ int64_t xi = x >> 16;
+ int a = src_ptr[xi];
+ int b = src_ptr[xi + 1];
+ dst_ptr[0] = BLENDER(a, b, x & 0xffff);
+ x += dx;
+ xi = x >> 16;
+ a = src_ptr[xi];
+ b = src_ptr[xi + 1];
+ dst_ptr[1] = BLENDER(a, b, x & 0xffff);
+ x += dx;
+ dst_ptr += 2;
+ }
+ if (dst_width & 1) {
+ int64_t xi = x >> 16;
+ int a = src_ptr[xi];
+ int b = src_ptr[xi + 1];
+ dst_ptr[0] = BLENDER(a, b, x & 0xffff);
+ }
+}
+#undef BLENDER
+
+// Same as 8 bit arm blender but return is cast to uint16_t
+#define BLENDER(a, b, f) \
+ (uint16_t)((int)(a) + ((((int)((f)) * ((int)(b) - (int)(a))) + 0x8000) >> 16))
+
+void ScaleFilterCols_16_C(uint16_t* dst_ptr,
+ const uint16_t* src_ptr,
+ int dst_width,
+ int x,
+ int dx) {
+ int j;
+ for (j = 0; j < dst_width - 1; j += 2) {
+ int xi = x >> 16;
+ int a = src_ptr[xi];
+ int b = src_ptr[xi + 1];
+ dst_ptr[0] = BLENDER(a, b, x & 0xffff);
+ x += dx;
+ xi = x >> 16;
+ a = src_ptr[xi];
+ b = src_ptr[xi + 1];
+ dst_ptr[1] = BLENDER(a, b, x & 0xffff);
+ x += dx;
+ dst_ptr += 2;
+ }
+ if (dst_width & 1) {
+ int xi = x >> 16;
+ int a = src_ptr[xi];
+ int b = src_ptr[xi + 1];
+ dst_ptr[0] = BLENDER(a, b, x & 0xffff);
+ }
+}
+
+void ScaleFilterCols64_16_C(uint16_t* dst_ptr,
+ const uint16_t* src_ptr,
+ int dst_width,
+ int x32,
+ int dx) {
+ int64_t x = (int64_t)(x32);
+ int j;
+ for (j = 0; j < dst_width - 1; j += 2) {
+ int64_t xi = x >> 16;
+ int a = src_ptr[xi];
+ int b = src_ptr[xi + 1];
+ dst_ptr[0] = BLENDER(a, b, x & 0xffff);
+ x += dx;
+ xi = x >> 16;
+ a = src_ptr[xi];
+ b = src_ptr[xi + 1];
+ dst_ptr[1] = BLENDER(a, b, x & 0xffff);
+ x += dx;
+ dst_ptr += 2;
+ }
+ if (dst_width & 1) {
+ int64_t xi = x >> 16;
+ int a = src_ptr[xi];
+ int b = src_ptr[xi + 1];
+ dst_ptr[0] = BLENDER(a, b, x & 0xffff);
+ }
+}
+#undef BLENDER
+
+void ScaleRowDown38_C(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width) {
+ int x;
+ (void)src_stride;
+ assert(dst_width % 3 == 0);
+ for (x = 0; x < dst_width; x += 3) {
+ dst[0] = src_ptr[0];
+ dst[1] = src_ptr[3];
+ dst[2] = src_ptr[6];
+ dst += 3;
+ src_ptr += 8;
+ }
+}
+
+void ScaleRowDown38_16_C(const uint16_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint16_t* dst,
+ int dst_width) {
+ int x;
+ (void)src_stride;
+ assert(dst_width % 3 == 0);
+ for (x = 0; x < dst_width; x += 3) {
+ dst[0] = src_ptr[0];
+ dst[1] = src_ptr[3];
+ dst[2] = src_ptr[6];
+ dst += 3;
+ src_ptr += 8;
+ }
+}
+
+// 8x3 -> 3x1
+void ScaleRowDown38_3_Box_C(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width) {
+ intptr_t stride = src_stride;
+ int i;
+ assert((dst_width % 3 == 0) && (dst_width > 0));
+ for (i = 0; i < dst_width; i += 3) {
+ dst_ptr[0] =
+ (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] +
+ src_ptr[stride + 1] + src_ptr[stride + 2] + src_ptr[stride * 2 + 0] +
+ src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2]) *
+ (65536 / 9) >>
+ 16;
+ dst_ptr[1] =
+ (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] +
+ src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride * 2 + 3] +
+ src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5]) *
+ (65536 / 9) >>
+ 16;
+ dst_ptr[2] =
+ (src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7] +
+ src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7]) *
+ (65536 / 6) >>
+ 16;
+ src_ptr += 8;
+ dst_ptr += 3;
+ }
+}
+
+void ScaleRowDown38_3_Box_16_C(const uint16_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint16_t* dst_ptr,
+ int dst_width) {
+ intptr_t stride = src_stride;
+ int i;
+ assert((dst_width % 3 == 0) && (dst_width > 0));
+ for (i = 0; i < dst_width; i += 3) {
+ dst_ptr[0] =
+ (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] +
+ src_ptr[stride + 1] + src_ptr[stride + 2] + src_ptr[stride * 2 + 0] +
+ src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2]) *
+ (65536 / 9) >>
+ 16;
+ dst_ptr[1] =
+ (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] +
+ src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride * 2 + 3] +
+ src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5]) *
+ (65536 / 9) >>
+ 16;
+ dst_ptr[2] =
+ (src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7] +
+ src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7]) *
+ (65536 / 6) >>
+ 16;
+ src_ptr += 8;
+ dst_ptr += 3;
+ }
+}
+
+// 8x2 -> 3x1
+void ScaleRowDown38_2_Box_C(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width) {
+ intptr_t stride = src_stride;
+ int i;
+ assert((dst_width % 3 == 0) && (dst_width > 0));
+ for (i = 0; i < dst_width; i += 3) {
+ dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] +
+ src_ptr[stride + 1] + src_ptr[stride + 2]) *
+ (65536 / 6) >>
+ 16;
+ dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] +
+ src_ptr[stride + 4] + src_ptr[stride + 5]) *
+ (65536 / 6) >>
+ 16;
+ dst_ptr[2] =
+ (src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7]) *
+ (65536 / 4) >>
+ 16;
+ src_ptr += 8;
+ dst_ptr += 3;
+ }
+}
+
+void ScaleRowDown38_2_Box_16_C(const uint16_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint16_t* dst_ptr,
+ int dst_width) {
+ intptr_t stride = src_stride;
+ int i;
+ assert((dst_width % 3 == 0) && (dst_width > 0));
+ for (i = 0; i < dst_width; i += 3) {
+ dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] +
+ src_ptr[stride + 1] + src_ptr[stride + 2]) *
+ (65536 / 6) >>
+ 16;
+ dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] +
+ src_ptr[stride + 4] + src_ptr[stride + 5]) *
+ (65536 / 6) >>
+ 16;
+ dst_ptr[2] =
+ (src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7]) *
+ (65536 / 4) >>
+ 16;
+ src_ptr += 8;
+ dst_ptr += 3;
+ }
+}
+
+void ScaleAddRow_C(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width) {
+ int x;
+ assert(src_width > 0);
+ for (x = 0; x < src_width - 1; x += 2) {
+ dst_ptr[0] += src_ptr[0];
+ dst_ptr[1] += src_ptr[1];
+ src_ptr += 2;
+ dst_ptr += 2;
+ }
+ if (src_width & 1) {
+ dst_ptr[0] += src_ptr[0];
+ }
+}
+
+void ScaleAddRow_16_C(const uint16_t* src_ptr,
+ uint32_t* dst_ptr,
+ int src_width) {
+ int x;
+ assert(src_width > 0);
+ for (x = 0; x < src_width - 1; x += 2) {
+ dst_ptr[0] += src_ptr[0];
+ dst_ptr[1] += src_ptr[1];
+ src_ptr += 2;
+ dst_ptr += 2;
+ }
+ if (src_width & 1) {
+ dst_ptr[0] += src_ptr[0];
+ }
+}
+
+void ScaleARGBRowDown2_C(const uint8_t* src_argb,
+ ptrdiff_t src_stride,
+ uint8_t* dst_argb,
+ int dst_width) {
+ const uint32_t* src = (const uint32_t*)(src_argb);
+ uint32_t* dst = (uint32_t*)(dst_argb);
+ int x;
+ (void)src_stride;
+ for (x = 0; x < dst_width - 1; x += 2) {
+ dst[0] = src[1];
+ dst[1] = src[3];
+ src += 4;
+ dst += 2;
+ }
+ if (dst_width & 1) {
+ dst[0] = src[1];
+ }
+}
+
+void ScaleARGBRowDown2Linear_C(const uint8_t* src_argb,
+ ptrdiff_t src_stride,
+ uint8_t* dst_argb,
+ int dst_width) {
+ int x;
+ (void)src_stride;
+ for (x = 0; x < dst_width; ++x) {
+ dst_argb[0] = (src_argb[0] + src_argb[4] + 1) >> 1;
+ dst_argb[1] = (src_argb[1] + src_argb[5] + 1) >> 1;
+ dst_argb[2] = (src_argb[2] + src_argb[6] + 1) >> 1;
+ dst_argb[3] = (src_argb[3] + src_argb[7] + 1) >> 1;
+ src_argb += 8;
+ dst_argb += 4;
+ }
+}
+
+void ScaleARGBRowDown2Box_C(const uint8_t* src_argb,
+ ptrdiff_t src_stride,
+ uint8_t* dst_argb,
+ int dst_width) {
+ int x;
+ for (x = 0; x < dst_width; ++x) {
+ dst_argb[0] = (src_argb[0] + src_argb[4] + src_argb[src_stride] +
+ src_argb[src_stride + 4] + 2) >>
+ 2;
+ dst_argb[1] = (src_argb[1] + src_argb[5] + src_argb[src_stride + 1] +
+ src_argb[src_stride + 5] + 2) >>
+ 2;
+ dst_argb[2] = (src_argb[2] + src_argb[6] + src_argb[src_stride + 2] +
+ src_argb[src_stride + 6] + 2) >>
+ 2;
+ dst_argb[3] = (src_argb[3] + src_argb[7] + src_argb[src_stride + 3] +
+ src_argb[src_stride + 7] + 2) >>
+ 2;
+ src_argb += 8;
+ dst_argb += 4;
+ }
+}
+
+void ScaleARGBRowDownEven_C(const uint8_t* src_argb,
+ ptrdiff_t src_stride,
+ int src_stepx,
+ uint8_t* dst_argb,
+ int dst_width) {
+ const uint32_t* src = (const uint32_t*)(src_argb);
+ uint32_t* dst = (uint32_t*)(dst_argb);
+ (void)src_stride;
+ int x;
+ for (x = 0; x < dst_width - 1; x += 2) {
+ dst[0] = src[0];
+ dst[1] = src[src_stepx];
+ src += src_stepx * 2;
+ dst += 2;
+ }
+ if (dst_width & 1) {
+ dst[0] = src[0];
+ }
+}
+
+void ScaleARGBRowDownEvenBox_C(const uint8_t* src_argb,
+ ptrdiff_t src_stride,
+ int src_stepx,
+ uint8_t* dst_argb,
+ int dst_width) {
+ int x;
+ for (x = 0; x < dst_width; ++x) {
+ dst_argb[0] = (src_argb[0] + src_argb[4] + src_argb[src_stride] +
+ src_argb[src_stride + 4] + 2) >>
+ 2;
+ dst_argb[1] = (src_argb[1] + src_argb[5] + src_argb[src_stride + 1] +
+ src_argb[src_stride + 5] + 2) >>
+ 2;
+ dst_argb[2] = (src_argb[2] + src_argb[6] + src_argb[src_stride + 2] +
+ src_argb[src_stride + 6] + 2) >>
+ 2;
+ dst_argb[3] = (src_argb[3] + src_argb[7] + src_argb[src_stride + 3] +
+ src_argb[src_stride + 7] + 2) >>
+ 2;
+ src_argb += src_stepx * 4;
+ dst_argb += 4;
+ }
+}
+
+// Scales a single row of pixels using point sampling.
+void ScaleARGBCols_C(uint8_t* dst_argb,
+ const uint8_t* src_argb,
+ int dst_width,
+ int x,
+ int dx) {
+ const uint32_t* src = (const uint32_t*)(src_argb);
+ uint32_t* dst = (uint32_t*)(dst_argb);
+ int j;
+ for (j = 0; j < dst_width - 1; j += 2) {
+ dst[0] = src[x >> 16];
+ x += dx;
+ dst[1] = src[x >> 16];
+ x += dx;
+ dst += 2;
+ }
+ if (dst_width & 1) {
+ dst[0] = src[x >> 16];
+ }
+}
+
+void ScaleARGBCols64_C(uint8_t* dst_argb,
+ const uint8_t* src_argb,
+ int dst_width,
+ int x32,
+ int dx) {
+ int64_t x = (int64_t)(x32);
+ const uint32_t* src = (const uint32_t*)(src_argb);
+ uint32_t* dst = (uint32_t*)(dst_argb);
+ int j;
+ for (j = 0; j < dst_width - 1; j += 2) {
+ dst[0] = src[x >> 16];
+ x += dx;
+ dst[1] = src[x >> 16];
+ x += dx;
+ dst += 2;
+ }
+ if (dst_width & 1) {
+ dst[0] = src[x >> 16];
+ }
+}
+
+// Scales a single row of pixels up by 2x using point sampling.
+void ScaleARGBColsUp2_C(uint8_t* dst_argb,
+ const uint8_t* src_argb,
+ int dst_width,
+ int x,
+ int dx) {
+ const uint32_t* src = (const uint32_t*)(src_argb);
+ uint32_t* dst = (uint32_t*)(dst_argb);
+ int j;
+ (void)x;
+ (void)dx;
+ for (j = 0; j < dst_width - 1; j += 2) {
+ dst[1] = dst[0] = src[0];
+ src += 1;
+ dst += 2;
+ }
+ if (dst_width & 1) {
+ dst[0] = src[0];
+ }
+}
+
+// TODO(fbarchard): Replace 0x7f ^ f with 128-f. bug=607.
+// Mimics SSSE3 blender
+#define BLENDER1(a, b, f) ((a) * (0x7f ^ f) + (b)*f) >> 7
+#define BLENDERC(a, b, f, s) \
+ (uint32_t)(BLENDER1(((a) >> s) & 255, ((b) >> s) & 255, f) << s)
+#define BLENDER(a, b, f) \
+ BLENDERC(a, b, f, 24) | BLENDERC(a, b, f, 16) | BLENDERC(a, b, f, 8) | \
+ BLENDERC(a, b, f, 0)
+
+void ScaleARGBFilterCols_C(uint8_t* dst_argb,
+ const uint8_t* src_argb,
+ int dst_width,
+ int x,
+ int dx) {
+ const uint32_t* src = (const uint32_t*)(src_argb);
+ uint32_t* dst = (uint32_t*)(dst_argb);
+ int j;
+ for (j = 0; j < dst_width - 1; j += 2) {
+ int xi = x >> 16;
+ int xf = (x >> 9) & 0x7f;
+ uint32_t a = src[xi];
+ uint32_t b = src[xi + 1];
+ dst[0] = BLENDER(a, b, xf);
+ x += dx;
+ xi = x >> 16;
+ xf = (x >> 9) & 0x7f;
+ a = src[xi];
+ b = src[xi + 1];
+ dst[1] = BLENDER(a, b, xf);
+ x += dx;
+ dst += 2;
+ }
+ if (dst_width & 1) {
+ int xi = x >> 16;
+ int xf = (x >> 9) & 0x7f;
+ uint32_t a = src[xi];
+ uint32_t b = src[xi + 1];
+ dst[0] = BLENDER(a, b, xf);
+ }
+}
+
+void ScaleARGBFilterCols64_C(uint8_t* dst_argb,
+ const uint8_t* src_argb,
+ int dst_width,
+ int x32,
+ int dx) {
+ int64_t x = (int64_t)(x32);
+ const uint32_t* src = (const uint32_t*)(src_argb);
+ uint32_t* dst = (uint32_t*)(dst_argb);
+ int j;
+ for (j = 0; j < dst_width - 1; j += 2) {
+ int64_t xi = x >> 16;
+ int xf = (x >> 9) & 0x7f;
+ uint32_t a = src[xi];
+ uint32_t b = src[xi + 1];
+ dst[0] = BLENDER(a, b, xf);
+ x += dx;
+ xi = x >> 16;
+ xf = (x >> 9) & 0x7f;
+ a = src[xi];
+ b = src[xi + 1];
+ dst[1] = BLENDER(a, b, xf);
+ x += dx;
+ dst += 2;
+ }
+ if (dst_width & 1) {
+ int64_t xi = x >> 16;
+ int xf = (x >> 9) & 0x7f;
+ uint32_t a = src[xi];
+ uint32_t b = src[xi + 1];
+ dst[0] = BLENDER(a, b, xf);
+ }
+}
+#undef BLENDER1
+#undef BLENDERC
+#undef BLENDER
+
+// Scale plane vertically with bilinear interpolation.
+void ScalePlaneVertical(int src_height,
+ int dst_width,
+ int dst_height,
+ int src_stride,
+ int dst_stride,
+ const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ int x,
+ int y,
+ int dy,
+ int bpp,
+ enum FilterMode filtering) {
+ // TODO(fbarchard): Allow higher bpp.
+ int dst_width_bytes = dst_width * bpp;
+ void (*InterpolateRow)(uint8_t * dst_argb, const uint8_t* src_argb,
+ ptrdiff_t src_stride, int dst_width,
+ int source_y_fraction) = InterpolateRow_C;
+ const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0;
+ int j;
+ assert(bpp >= 1 && bpp <= 4);
+ assert(src_height != 0);
+ assert(dst_width > 0);
+ assert(dst_height > 0);
+ src_argb += (x >> 16) * bpp;
+#if defined(HAS_INTERPOLATEROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ InterpolateRow = InterpolateRow_Any_SSSE3;
+ if (IS_ALIGNED(dst_width_bytes, 16)) {
+ InterpolateRow = InterpolateRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_INTERPOLATEROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ InterpolateRow = InterpolateRow_Any_AVX2;
+ if (IS_ALIGNED(dst_width_bytes, 32)) {
+ InterpolateRow = InterpolateRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_INTERPOLATEROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ InterpolateRow = InterpolateRow_Any_NEON;
+ if (IS_ALIGNED(dst_width_bytes, 16)) {
+ InterpolateRow = InterpolateRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_INTERPOLATEROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ InterpolateRow = InterpolateRow_Any_MSA;
+ if (IS_ALIGNED(dst_width_bytes, 32)) {
+ InterpolateRow = InterpolateRow_MSA;
+ }
+ }
+#endif
+ for (j = 0; j < dst_height; ++j) {
+ int yi;
+ int yf;
+ if (y > max_y) {
+ y = max_y;
+ }
+ yi = y >> 16;
+ yf = filtering ? ((y >> 8) & 255) : 0;
+ InterpolateRow(dst_argb, src_argb + yi * src_stride, src_stride,
+ dst_width_bytes, yf);
+ dst_argb += dst_stride;
+ y += dy;
+ }
+}
+void ScalePlaneVertical_16(int src_height,
+ int dst_width,
+ int dst_height,
+ int src_stride,
+ int dst_stride,
+ const uint16_t* src_argb,
+ uint16_t* dst_argb,
+ int x,
+ int y,
+ int dy,
+ int wpp,
+ enum FilterMode filtering) {
+ // TODO(fbarchard): Allow higher wpp.
+ int dst_width_words = dst_width * wpp;
+ void (*InterpolateRow)(uint16_t * dst_argb, const uint16_t* src_argb,
+ ptrdiff_t src_stride, int dst_width,
+ int source_y_fraction) = InterpolateRow_16_C;
+ const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0;
+ int j;
+ assert(wpp >= 1 && wpp <= 2);
+ assert(src_height != 0);
+ assert(dst_width > 0);
+ assert(dst_height > 0);
+ src_argb += (x >> 16) * wpp;
+#if defined(HAS_INTERPOLATEROW_16_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ InterpolateRow = InterpolateRow_Any_16_SSE2;
+ if (IS_ALIGNED(dst_width_bytes, 16)) {
+ InterpolateRow = InterpolateRow_16_SSE2;
+ }
+ }
+#endif
+#if defined(HAS_INTERPOLATEROW_16_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ InterpolateRow = InterpolateRow_Any_16_SSSE3;
+ if (IS_ALIGNED(dst_width_bytes, 16)) {
+ InterpolateRow = InterpolateRow_16_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_INTERPOLATEROW_16_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ InterpolateRow = InterpolateRow_Any_16_AVX2;
+ if (IS_ALIGNED(dst_width_bytes, 32)) {
+ InterpolateRow = InterpolateRow_16_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_INTERPOLATEROW_16_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ InterpolateRow = InterpolateRow_Any_16_NEON;
+ if (IS_ALIGNED(dst_width_bytes, 16)) {
+ InterpolateRow = InterpolateRow_16_NEON;
+ }
+ }
+#endif
+ for (j = 0; j < dst_height; ++j) {
+ int yi;
+ int yf;
+ if (y > max_y) {
+ y = max_y;
+ }
+ yi = y >> 16;
+ yf = filtering ? ((y >> 8) & 255) : 0;
+ InterpolateRow(dst_argb, src_argb + yi * src_stride, src_stride,
+ dst_width_words, yf);
+ dst_argb += dst_stride;
+ y += dy;
+ }
+}
+
+// Simplify the filtering based on scale factors.
+enum FilterMode ScaleFilterReduce(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ enum FilterMode filtering) {
+ if (src_width < 0) {
+ src_width = -src_width;
+ }
+ if (src_height < 0) {
+ src_height = -src_height;
+ }
+ if (filtering == kFilterBox) {
+ // If scaling both axis to 0.5 or larger, switch from Box to Bilinear.
+ if (dst_width * 2 >= src_width && dst_height * 2 >= src_height) {
+ filtering = kFilterBilinear;
+ }
+ }
+ if (filtering == kFilterBilinear) {
+ if (src_height == 1) {
+ filtering = kFilterLinear;
+ }
+ // TODO(fbarchard): Detect any odd scale factor and reduce to Linear.
+ if (dst_height == src_height || dst_height * 3 == src_height) {
+ filtering = kFilterLinear;
+ }
+ // TODO(fbarchard): Remove 1 pixel wide filter restriction, which is to
+ // avoid reading 2 pixels horizontally that causes memory exception.
+ if (src_width == 1) {
+ filtering = kFilterNone;
+ }
+ }
+ if (filtering == kFilterLinear) {
+ if (src_width == 1) {
+ filtering = kFilterNone;
+ }
+ // TODO(fbarchard): Detect any odd scale factor and reduce to None.
+ if (dst_width == src_width || dst_width * 3 == src_width) {
+ filtering = kFilterNone;
+ }
+ }
+ return filtering;
+}
+
+// Divide num by div and return as 16.16 fixed point result.
+int FixedDiv_C(int num, int div) {
+ return (int)(((int64_t)(num) << 16) / div);
+}
+
+// Divide num by div and return as 16.16 fixed point result.
+int FixedDiv1_C(int num, int div) {
+ return (int)((((int64_t)(num) << 16) - 0x00010001) / (div - 1));
+}
+
+#define CENTERSTART(dx, s) (dx < 0) ? -((-dx >> 1) + s) : ((dx >> 1) + s)
+
+// Compute slope values for stepping.
+void ScaleSlope(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ enum FilterMode filtering,
+ int* x,
+ int* y,
+ int* dx,
+ int* dy) {
+ assert(x != NULL);
+ assert(y != NULL);
+ assert(dx != NULL);
+ assert(dy != NULL);
+ assert(src_width != 0);
+ assert(src_height != 0);
+ assert(dst_width > 0);
+ assert(dst_height > 0);
+ // Check for 1 pixel and avoid FixedDiv overflow.
+ if (dst_width == 1 && src_width >= 32768) {
+ dst_width = src_width;
+ }
+ if (dst_height == 1 && src_height >= 32768) {
+ dst_height = src_height;
+ }
+ if (filtering == kFilterBox) {
+ // Scale step for point sampling duplicates all pixels equally.
+ *dx = FixedDiv(Abs(src_width), dst_width);
+ *dy = FixedDiv(src_height, dst_height);
+ *x = 0;
+ *y = 0;
+ } else if (filtering == kFilterBilinear) {
+ // Scale step for bilinear sampling renders last pixel once for upsample.
+ if (dst_width <= Abs(src_width)) {
+ *dx = FixedDiv(Abs(src_width), dst_width);
+ *x = CENTERSTART(*dx, -32768); // Subtract 0.5 (32768) to center filter.
+ } else if (dst_width > 1) {
+ *dx = FixedDiv1(Abs(src_width), dst_width);
+ *x = 0;
+ }
+ if (dst_height <= src_height) {
+ *dy = FixedDiv(src_height, dst_height);
+ *y = CENTERSTART(*dy, -32768); // Subtract 0.5 (32768) to center filter.
+ } else if (dst_height > 1) {
+ *dy = FixedDiv1(src_height, dst_height);
+ *y = 0;
+ }
+ } else if (filtering == kFilterLinear) {
+ // Scale step for bilinear sampling renders last pixel once for upsample.
+ if (dst_width <= Abs(src_width)) {
+ *dx = FixedDiv(Abs(src_width), dst_width);
+ *x = CENTERSTART(*dx, -32768); // Subtract 0.5 (32768) to center filter.
+ } else if (dst_width > 1) {
+ *dx = FixedDiv1(Abs(src_width), dst_width);
+ *x = 0;
+ }
+ *dy = FixedDiv(src_height, dst_height);
+ *y = *dy >> 1;
+ } else {
+ // Scale step for point sampling duplicates all pixels equally.
+ *dx = FixedDiv(Abs(src_width), dst_width);
+ *dy = FixedDiv(src_height, dst_height);
+ *x = CENTERSTART(*dx, 0);
+ *y = CENTERSTART(*dy, 0);
+ }
+ // Negative src_width means horizontally mirror.
+ if (src_width < 0) {
+ *x += (dst_width - 1) * *dx;
+ *dx = -*dx;
+ // src_width = -src_width; // Caller must do this.
+ }
+}
+#undef CENTERSTART
+
+// Read 8x2 upsample with filtering and write 16x1.
+// actually reads an extra pixel, so 9x2.
+void ScaleRowUp2_16_C(const uint16_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint16_t* dst,
+ int dst_width) {
+ const uint16_t* src2 = src_ptr + src_stride;
+
+ int x;
+ for (x = 0; x < dst_width - 1; x += 2) {
+ uint16_t p0 = src_ptr[0];
+ uint16_t p1 = src_ptr[1];
+ uint16_t p2 = src2[0];
+ uint16_t p3 = src2[1];
+ dst[0] = (p0 * 9 + p1 * 3 + p2 * 3 + p3 + 8) >> 4;
+ dst[1] = (p0 * 3 + p1 * 9 + p2 + p3 * 3 + 8) >> 4;
+ ++src_ptr;
+ ++src2;
+ dst += 2;
+ }
+ if (dst_width & 1) {
+ uint16_t p0 = src_ptr[0];
+ uint16_t p1 = src_ptr[1];
+ uint16_t p2 = src2[0];
+ uint16_t p3 = src2[1];
+ dst[0] = (p0 * 9 + p1 * 3 + p2 * 3 + p3 + 8) >> 4;
+ }
+}
+
+#ifdef __cplusplus
+} // extern "C"
+} // namespace libyuv
+#endif
diff --git a/media/libvpx/libvpx/third_party/libyuv/source/scale_gcc.cc b/media/libvpx/libvpx/third_party/libyuv/source/scale_gcc.cc
new file mode 100644
index 0000000000..312236d2df
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/libyuv/source/scale_gcc.cc
@@ -0,0 +1,1374 @@
+/*
+ * Copyright 2013 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/row.h"
+#include "libyuv/scale_row.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+// This module is for GCC x86 and x64.
+#if !defined(LIBYUV_DISABLE_X86) && \
+ (defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER)))
+
+// Offsets for source bytes 0 to 9
+static const uvec8 kShuf0 = {0, 1, 3, 4, 5, 7, 8, 9,
+ 128, 128, 128, 128, 128, 128, 128, 128};
+
+// Offsets for source bytes 11 to 20 with 8 subtracted = 3 to 12.
+static const uvec8 kShuf1 = {3, 4, 5, 7, 8, 9, 11, 12,
+ 128, 128, 128, 128, 128, 128, 128, 128};
+
+// Offsets for source bytes 21 to 31 with 16 subtracted = 5 to 31.
+static const uvec8 kShuf2 = {5, 7, 8, 9, 11, 12, 13, 15,
+ 128, 128, 128, 128, 128, 128, 128, 128};
+
+// Offsets for source bytes 0 to 10
+static const uvec8 kShuf01 = {0, 1, 1, 2, 2, 3, 4, 5, 5, 6, 6, 7, 8, 9, 9, 10};
+
+// Offsets for source bytes 10 to 21 with 8 subtracted = 3 to 13.
+static const uvec8 kShuf11 = {2, 3, 4, 5, 5, 6, 6, 7,
+ 8, 9, 9, 10, 10, 11, 12, 13};
+
+// Offsets for source bytes 21 to 31 with 16 subtracted = 5 to 31.
+static const uvec8 kShuf21 = {5, 6, 6, 7, 8, 9, 9, 10,
+ 10, 11, 12, 13, 13, 14, 14, 15};
+
+// Coefficients for source bytes 0 to 10
+static const uvec8 kMadd01 = {3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2};
+
+// Coefficients for source bytes 10 to 21
+static const uvec8 kMadd11 = {1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1};
+
+// Coefficients for source bytes 21 to 31
+static const uvec8 kMadd21 = {2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3};
+
+// Coefficients for source bytes 21 to 31
+static const vec16 kRound34 = {2, 2, 2, 2, 2, 2, 2, 2};
+
+static const uvec8 kShuf38a = {0, 3, 6, 8, 11, 14, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128};
+
+static const uvec8 kShuf38b = {128, 128, 128, 128, 128, 128, 0, 3,
+ 6, 8, 11, 14, 128, 128, 128, 128};
+
+// Arrange words 0,3,6 into 0,1,2
+static const uvec8 kShufAc = {0, 1, 6, 7, 12, 13, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128};
+
+// Arrange words 0,3,6 into 3,4,5
+static const uvec8 kShufAc3 = {128, 128, 128, 128, 128, 128, 0, 1,
+ 6, 7, 12, 13, 128, 128, 128, 128};
+
+// Scaling values for boxes of 3x3 and 2x3
+static const uvec16 kScaleAc33 = {65536 / 9, 65536 / 9, 65536 / 6, 65536 / 9,
+ 65536 / 9, 65536 / 6, 0, 0};
+
+// Arrange first value for pixels 0,1,2,3,4,5
+static const uvec8 kShufAb0 = {0, 128, 3, 128, 6, 128, 8, 128,
+ 11, 128, 14, 128, 128, 128, 128, 128};
+
+// Arrange second value for pixels 0,1,2,3,4,5
+static const uvec8 kShufAb1 = {1, 128, 4, 128, 7, 128, 9, 128,
+ 12, 128, 15, 128, 128, 128, 128, 128};
+
+// Arrange third value for pixels 0,1,2,3,4,5
+static const uvec8 kShufAb2 = {2, 128, 5, 128, 128, 128, 10, 128,
+ 13, 128, 128, 128, 128, 128, 128, 128};
+
+// Scaling values for boxes of 3x2 and 2x2
+static const uvec16 kScaleAb2 = {65536 / 3, 65536 / 3, 65536 / 2, 65536 / 3,
+ 65536 / 3, 65536 / 2, 0, 0};
+
+// GCC versions of row functions are verbatim conversions from Visual C.
+// Generated using gcc disassembly on Visual C object file:
+// objdump -D yuvscaler.obj >yuvscaler.txt
+
+void ScaleRowDown2_SSSE3(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width) {
+ (void)src_stride;
+ asm volatile(
+ // 16 pixel loop.
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "lea 0x20(%0),%0 \n"
+ "psrlw $0x8,%%xmm0 \n"
+ "psrlw $0x8,%%xmm1 \n"
+ "packuswb %%xmm1,%%xmm0 \n"
+ "movdqu %%xmm0,(%1) \n"
+ "lea 0x10(%1),%1 \n"
+ "sub $0x10,%2 \n"
+ "jg 1b \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst_ptr), // %1
+ "+r"(dst_width) // %2
+ ::"memory",
+ "cc", "xmm0", "xmm1");
+}
+
+void ScaleRowDown2Linear_SSSE3(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width) {
+ (void)src_stride;
+ asm volatile(
+ "pcmpeqb %%xmm4,%%xmm4 \n"
+ "psrlw $0xf,%%xmm4 \n"
+ "packuswb %%xmm4,%%xmm4 \n"
+ "pxor %%xmm5,%%xmm5 \n"
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "lea 0x20(%0),%0 \n"
+ "pmaddubsw %%xmm4,%%xmm0 \n"
+ "pmaddubsw %%xmm4,%%xmm1 \n"
+ "pavgw %%xmm5,%%xmm0 \n"
+ "pavgw %%xmm5,%%xmm1 \n"
+ "packuswb %%xmm1,%%xmm0 \n"
+ "movdqu %%xmm0,(%1) \n"
+ "lea 0x10(%1),%1 \n"
+ "sub $0x10,%2 \n"
+ "jg 1b \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst_ptr), // %1
+ "+r"(dst_width) // %2
+ ::"memory",
+ "cc", "xmm0", "xmm1", "xmm4", "xmm5");
+}
+
+void ScaleRowDown2Box_SSSE3(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width) {
+ asm volatile(
+ "pcmpeqb %%xmm4,%%xmm4 \n"
+ "psrlw $0xf,%%xmm4 \n"
+ "packuswb %%xmm4,%%xmm4 \n"
+ "pxor %%xmm5,%%xmm5 \n"
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "movdqu 0x00(%0,%3,1),%%xmm2 \n"
+ "movdqu 0x10(%0,%3,1),%%xmm3 \n"
+ "lea 0x20(%0),%0 \n"
+ "pmaddubsw %%xmm4,%%xmm0 \n"
+ "pmaddubsw %%xmm4,%%xmm1 \n"
+ "pmaddubsw %%xmm4,%%xmm2 \n"
+ "pmaddubsw %%xmm4,%%xmm3 \n"
+ "paddw %%xmm2,%%xmm0 \n"
+ "paddw %%xmm3,%%xmm1 \n"
+ "psrlw $0x1,%%xmm0 \n"
+ "psrlw $0x1,%%xmm1 \n"
+ "pavgw %%xmm5,%%xmm0 \n"
+ "pavgw %%xmm5,%%xmm1 \n"
+ "packuswb %%xmm1,%%xmm0 \n"
+ "movdqu %%xmm0,(%1) \n"
+ "lea 0x10(%1),%1 \n"
+ "sub $0x10,%2 \n"
+ "jg 1b \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst_ptr), // %1
+ "+r"(dst_width) // %2
+ : "r"((intptr_t)(src_stride)) // %3
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5");
+}
+
+#ifdef HAS_SCALEROWDOWN2_AVX2
+void ScaleRowDown2_AVX2(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width) {
+ (void)src_stride;
+ asm volatile(
+
+ LABELALIGN
+ "1: \n"
+ "vmovdqu (%0),%%ymm0 \n"
+ "vmovdqu 0x20(%0),%%ymm1 \n"
+ "lea 0x40(%0),%0 \n"
+ "vpsrlw $0x8,%%ymm0,%%ymm0 \n"
+ "vpsrlw $0x8,%%ymm1,%%ymm1 \n"
+ "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n"
+ "vmovdqu %%ymm0,(%1) \n"
+ "lea 0x20(%1),%1 \n"
+ "sub $0x20,%2 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst_ptr), // %1
+ "+r"(dst_width) // %2
+ ::"memory",
+ "cc", "xmm0", "xmm1");
+}
+
+void ScaleRowDown2Linear_AVX2(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width) {
+ (void)src_stride;
+ asm volatile(
+ "vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n"
+ "vpsrlw $0xf,%%ymm4,%%ymm4 \n"
+ "vpackuswb %%ymm4,%%ymm4,%%ymm4 \n"
+ "vpxor %%ymm5,%%ymm5,%%ymm5 \n"
+
+ LABELALIGN
+ "1: \n"
+ "vmovdqu (%0),%%ymm0 \n"
+ "vmovdqu 0x20(%0),%%ymm1 \n"
+ "lea 0x40(%0),%0 \n"
+ "vpmaddubsw %%ymm4,%%ymm0,%%ymm0 \n"
+ "vpmaddubsw %%ymm4,%%ymm1,%%ymm1 \n"
+ "vpavgw %%ymm5,%%ymm0,%%ymm0 \n"
+ "vpavgw %%ymm5,%%ymm1,%%ymm1 \n"
+ "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n"
+ "vmovdqu %%ymm0,(%1) \n"
+ "lea 0x20(%1),%1 \n"
+ "sub $0x20,%2 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst_ptr), // %1
+ "+r"(dst_width) // %2
+ ::"memory",
+ "cc", "xmm0", "xmm1", "xmm4", "xmm5");
+}
+
+void ScaleRowDown2Box_AVX2(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width) {
+ asm volatile(
+ "vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n"
+ "vpsrlw $0xf,%%ymm4,%%ymm4 \n"
+ "vpackuswb %%ymm4,%%ymm4,%%ymm4 \n"
+ "vpxor %%ymm5,%%ymm5,%%ymm5 \n"
+
+ LABELALIGN
+ "1: \n"
+ "vmovdqu (%0),%%ymm0 \n"
+ "vmovdqu 0x20(%0),%%ymm1 \n"
+ "vmovdqu 0x00(%0,%3,1),%%ymm2 \n"
+ "vmovdqu 0x20(%0,%3,1),%%ymm3 \n"
+ "lea 0x40(%0),%0 \n"
+ "vpmaddubsw %%ymm4,%%ymm0,%%ymm0 \n"
+ "vpmaddubsw %%ymm4,%%ymm1,%%ymm1 \n"
+ "vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n"
+ "vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n"
+ "vpaddw %%ymm2,%%ymm0,%%ymm0 \n"
+ "vpaddw %%ymm3,%%ymm1,%%ymm1 \n"
+ "vpsrlw $0x1,%%ymm0,%%ymm0 \n"
+ "vpsrlw $0x1,%%ymm1,%%ymm1 \n"
+ "vpavgw %%ymm5,%%ymm0,%%ymm0 \n"
+ "vpavgw %%ymm5,%%ymm1,%%ymm1 \n"
+ "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n"
+ "vmovdqu %%ymm0,(%1) \n"
+ "lea 0x20(%1),%1 \n"
+ "sub $0x20,%2 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst_ptr), // %1
+ "+r"(dst_width) // %2
+ : "r"((intptr_t)(src_stride)) // %3
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5");
+}
+#endif // HAS_SCALEROWDOWN2_AVX2
+
+void ScaleRowDown4_SSSE3(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width) {
+ (void)src_stride;
+ asm volatile(
+ "pcmpeqb %%xmm5,%%xmm5 \n"
+ "psrld $0x18,%%xmm5 \n"
+ "pslld $0x10,%%xmm5 \n"
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "lea 0x20(%0),%0 \n"
+ "pand %%xmm5,%%xmm0 \n"
+ "pand %%xmm5,%%xmm1 \n"
+ "packuswb %%xmm1,%%xmm0 \n"
+ "psrlw $0x8,%%xmm0 \n"
+ "packuswb %%xmm0,%%xmm0 \n"
+ "movq %%xmm0,(%1) \n"
+ "lea 0x8(%1),%1 \n"
+ "sub $0x8,%2 \n"
+ "jg 1b \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst_ptr), // %1
+ "+r"(dst_width) // %2
+ ::"memory",
+ "cc", "xmm0", "xmm1", "xmm5");
+}
+
+void ScaleRowDown4Box_SSSE3(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width) {
+ intptr_t stridex3;
+ asm volatile(
+ "pcmpeqb %%xmm4,%%xmm4 \n"
+ "psrlw $0xf,%%xmm4 \n"
+ "movdqa %%xmm4,%%xmm5 \n"
+ "packuswb %%xmm4,%%xmm4 \n"
+ "psllw $0x3,%%xmm5 \n"
+ "lea 0x00(%4,%4,2),%3 \n"
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "movdqu 0x00(%0,%4,1),%%xmm2 \n"
+ "movdqu 0x10(%0,%4,1),%%xmm3 \n"
+ "pmaddubsw %%xmm4,%%xmm0 \n"
+ "pmaddubsw %%xmm4,%%xmm1 \n"
+ "pmaddubsw %%xmm4,%%xmm2 \n"
+ "pmaddubsw %%xmm4,%%xmm3 \n"
+ "paddw %%xmm2,%%xmm0 \n"
+ "paddw %%xmm3,%%xmm1 \n"
+ "movdqu 0x00(%0,%4,2),%%xmm2 \n"
+ "movdqu 0x10(%0,%4,2),%%xmm3 \n"
+ "pmaddubsw %%xmm4,%%xmm2 \n"
+ "pmaddubsw %%xmm4,%%xmm3 \n"
+ "paddw %%xmm2,%%xmm0 \n"
+ "paddw %%xmm3,%%xmm1 \n"
+ "movdqu 0x00(%0,%3,1),%%xmm2 \n"
+ "movdqu 0x10(%0,%3,1),%%xmm3 \n"
+ "lea 0x20(%0),%0 \n"
+ "pmaddubsw %%xmm4,%%xmm2 \n"
+ "pmaddubsw %%xmm4,%%xmm3 \n"
+ "paddw %%xmm2,%%xmm0 \n"
+ "paddw %%xmm3,%%xmm1 \n"
+ "phaddw %%xmm1,%%xmm0 \n"
+ "paddw %%xmm5,%%xmm0 \n"
+ "psrlw $0x4,%%xmm0 \n"
+ "packuswb %%xmm0,%%xmm0 \n"
+ "movq %%xmm0,(%1) \n"
+ "lea 0x8(%1),%1 \n"
+ "sub $0x8,%2 \n"
+ "jg 1b \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst_ptr), // %1
+ "+r"(dst_width), // %2
+ "=&r"(stridex3) // %3
+ : "r"((intptr_t)(src_stride)) // %4
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5");
+}
+
+#ifdef HAS_SCALEROWDOWN4_AVX2
+void ScaleRowDown4_AVX2(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width) {
+ (void)src_stride;
+ asm volatile(
+ "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
+ "vpsrld $0x18,%%ymm5,%%ymm5 \n"
+ "vpslld $0x10,%%ymm5,%%ymm5 \n"
+
+ LABELALIGN
+ "1: \n"
+ "vmovdqu (%0),%%ymm0 \n"
+ "vmovdqu 0x20(%0),%%ymm1 \n"
+ "lea 0x40(%0),%0 \n"
+ "vpand %%ymm5,%%ymm0,%%ymm0 \n"
+ "vpand %%ymm5,%%ymm1,%%ymm1 \n"
+ "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n"
+ "vpsrlw $0x8,%%ymm0,%%ymm0 \n"
+ "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n"
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n"
+ "vmovdqu %%xmm0,(%1) \n"
+ "lea 0x10(%1),%1 \n"
+ "sub $0x10,%2 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst_ptr), // %1
+ "+r"(dst_width) // %2
+ ::"memory",
+ "cc", "xmm0", "xmm1", "xmm5");
+}
+
+void ScaleRowDown4Box_AVX2(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width) {
+ asm volatile(
+ "vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n"
+ "vpsrlw $0xf,%%ymm4,%%ymm4 \n"
+ "vpsllw $0x3,%%ymm4,%%ymm5 \n"
+ "vpackuswb %%ymm4,%%ymm4,%%ymm4 \n"
+
+ LABELALIGN
+ "1: \n"
+ "vmovdqu (%0),%%ymm0 \n"
+ "vmovdqu 0x20(%0),%%ymm1 \n"
+ "vmovdqu 0x00(%0,%3,1),%%ymm2 \n"
+ "vmovdqu 0x20(%0,%3,1),%%ymm3 \n"
+ "vpmaddubsw %%ymm4,%%ymm0,%%ymm0 \n"
+ "vpmaddubsw %%ymm4,%%ymm1,%%ymm1 \n"
+ "vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n"
+ "vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n"
+ "vpaddw %%ymm2,%%ymm0,%%ymm0 \n"
+ "vpaddw %%ymm3,%%ymm1,%%ymm1 \n"
+ "vmovdqu 0x00(%0,%3,2),%%ymm2 \n"
+ "vmovdqu 0x20(%0,%3,2),%%ymm3 \n"
+ "vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n"
+ "vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n"
+ "vpaddw %%ymm2,%%ymm0,%%ymm0 \n"
+ "vpaddw %%ymm3,%%ymm1,%%ymm1 \n"
+ "vmovdqu 0x00(%0,%4,1),%%ymm2 \n"
+ "vmovdqu 0x20(%0,%4,1),%%ymm3 \n"
+ "lea 0x40(%0),%0 \n"
+ "vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n"
+ "vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n"
+ "vpaddw %%ymm2,%%ymm0,%%ymm0 \n"
+ "vpaddw %%ymm3,%%ymm1,%%ymm1 \n"
+ "vphaddw %%ymm1,%%ymm0,%%ymm0 \n"
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n"
+ "vpaddw %%ymm5,%%ymm0,%%ymm0 \n"
+ "vpsrlw $0x4,%%ymm0,%%ymm0 \n"
+ "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n"
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n"
+ "vmovdqu %%xmm0,(%1) \n"
+ "lea 0x10(%1),%1 \n"
+ "sub $0x10,%2 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst_ptr), // %1
+ "+r"(dst_width) // %2
+ : "r"((intptr_t)(src_stride)), // %3
+ "r"((intptr_t)(src_stride * 3)) // %4
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5");
+}
+#endif // HAS_SCALEROWDOWN4_AVX2
+
+void ScaleRowDown34_SSSE3(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width) {
+ (void)src_stride;
+ asm volatile(
+ "movdqa %0,%%xmm3 \n"
+ "movdqa %1,%%xmm4 \n"
+ "movdqa %2,%%xmm5 \n"
+ :
+ : "m"(kShuf0), // %0
+ "m"(kShuf1), // %1
+ "m"(kShuf2) // %2
+ );
+ asm volatile(
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm2 \n"
+ "lea 0x20(%0),%0 \n"
+ "movdqa %%xmm2,%%xmm1 \n"
+ "palignr $0x8,%%xmm0,%%xmm1 \n"
+ "pshufb %%xmm3,%%xmm0 \n"
+ "pshufb %%xmm4,%%xmm1 \n"
+ "pshufb %%xmm5,%%xmm2 \n"
+ "movq %%xmm0,(%1) \n"
+ "movq %%xmm1,0x8(%1) \n"
+ "movq %%xmm2,0x10(%1) \n"
+ "lea 0x18(%1),%1 \n"
+ "sub $0x18,%2 \n"
+ "jg 1b \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst_ptr), // %1
+ "+r"(dst_width) // %2
+ ::"memory",
+ "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5");
+}
+
+void ScaleRowDown34_1_Box_SSSE3(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width) {
+ asm volatile(
+ "movdqa %0,%%xmm2 \n" // kShuf01
+ "movdqa %1,%%xmm3 \n" // kShuf11
+ "movdqa %2,%%xmm4 \n" // kShuf21
+ :
+ : "m"(kShuf01), // %0
+ "m"(kShuf11), // %1
+ "m"(kShuf21) // %2
+ );
+ asm volatile(
+ "movdqa %0,%%xmm5 \n" // kMadd01
+ "movdqa %1,%%xmm0 \n" // kMadd11
+ "movdqa %2,%%xmm1 \n" // kRound34
+ :
+ : "m"(kMadd01), // %0
+ "m"(kMadd11), // %1
+ "m"(kRound34) // %2
+ );
+ asm volatile(
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm6 \n"
+ "movdqu 0x00(%0,%3,1),%%xmm7 \n"
+ "pavgb %%xmm7,%%xmm6 \n"
+ "pshufb %%xmm2,%%xmm6 \n"
+ "pmaddubsw %%xmm5,%%xmm6 \n"
+ "paddsw %%xmm1,%%xmm6 \n"
+ "psrlw $0x2,%%xmm6 \n"
+ "packuswb %%xmm6,%%xmm6 \n"
+ "movq %%xmm6,(%1) \n"
+ "movdqu 0x8(%0),%%xmm6 \n"
+ "movdqu 0x8(%0,%3,1),%%xmm7 \n"
+ "pavgb %%xmm7,%%xmm6 \n"
+ "pshufb %%xmm3,%%xmm6 \n"
+ "pmaddubsw %%xmm0,%%xmm6 \n"
+ "paddsw %%xmm1,%%xmm6 \n"
+ "psrlw $0x2,%%xmm6 \n"
+ "packuswb %%xmm6,%%xmm6 \n"
+ "movq %%xmm6,0x8(%1) \n"
+ "movdqu 0x10(%0),%%xmm6 \n"
+ "movdqu 0x10(%0,%3,1),%%xmm7 \n"
+ "lea 0x20(%0),%0 \n"
+ "pavgb %%xmm7,%%xmm6 \n"
+ "pshufb %%xmm4,%%xmm6 \n"
+ "pmaddubsw %4,%%xmm6 \n"
+ "paddsw %%xmm1,%%xmm6 \n"
+ "psrlw $0x2,%%xmm6 \n"
+ "packuswb %%xmm6,%%xmm6 \n"
+ "movq %%xmm6,0x10(%1) \n"
+ "lea 0x18(%1),%1 \n"
+ "sub $0x18,%2 \n"
+ "jg 1b \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst_ptr), // %1
+ "+r"(dst_width) // %2
+ : "r"((intptr_t)(src_stride)), // %3
+ "m"(kMadd21) // %4
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
+ "xmm7");
+}
+
+void ScaleRowDown34_0_Box_SSSE3(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width) {
+ asm volatile(
+ "movdqa %0,%%xmm2 \n" // kShuf01
+ "movdqa %1,%%xmm3 \n" // kShuf11
+ "movdqa %2,%%xmm4 \n" // kShuf21
+ :
+ : "m"(kShuf01), // %0
+ "m"(kShuf11), // %1
+ "m"(kShuf21) // %2
+ );
+ asm volatile(
+ "movdqa %0,%%xmm5 \n" // kMadd01
+ "movdqa %1,%%xmm0 \n" // kMadd11
+ "movdqa %2,%%xmm1 \n" // kRound34
+ :
+ : "m"(kMadd01), // %0
+ "m"(kMadd11), // %1
+ "m"(kRound34) // %2
+ );
+
+ asm volatile(
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm6 \n"
+ "movdqu 0x00(%0,%3,1),%%xmm7 \n"
+ "pavgb %%xmm6,%%xmm7 \n"
+ "pavgb %%xmm7,%%xmm6 \n"
+ "pshufb %%xmm2,%%xmm6 \n"
+ "pmaddubsw %%xmm5,%%xmm6 \n"
+ "paddsw %%xmm1,%%xmm6 \n"
+ "psrlw $0x2,%%xmm6 \n"
+ "packuswb %%xmm6,%%xmm6 \n"
+ "movq %%xmm6,(%1) \n"
+ "movdqu 0x8(%0),%%xmm6 \n"
+ "movdqu 0x8(%0,%3,1),%%xmm7 \n"
+ "pavgb %%xmm6,%%xmm7 \n"
+ "pavgb %%xmm7,%%xmm6 \n"
+ "pshufb %%xmm3,%%xmm6 \n"
+ "pmaddubsw %%xmm0,%%xmm6 \n"
+ "paddsw %%xmm1,%%xmm6 \n"
+ "psrlw $0x2,%%xmm6 \n"
+ "packuswb %%xmm6,%%xmm6 \n"
+ "movq %%xmm6,0x8(%1) \n"
+ "movdqu 0x10(%0),%%xmm6 \n"
+ "movdqu 0x10(%0,%3,1),%%xmm7 \n"
+ "lea 0x20(%0),%0 \n"
+ "pavgb %%xmm6,%%xmm7 \n"
+ "pavgb %%xmm7,%%xmm6 \n"
+ "pshufb %%xmm4,%%xmm6 \n"
+ "pmaddubsw %4,%%xmm6 \n"
+ "paddsw %%xmm1,%%xmm6 \n"
+ "psrlw $0x2,%%xmm6 \n"
+ "packuswb %%xmm6,%%xmm6 \n"
+ "movq %%xmm6,0x10(%1) \n"
+ "lea 0x18(%1),%1 \n"
+ "sub $0x18,%2 \n"
+ "jg 1b \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst_ptr), // %1
+ "+r"(dst_width) // %2
+ : "r"((intptr_t)(src_stride)), // %3
+ "m"(kMadd21) // %4
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
+ "xmm7");
+}
+
+void ScaleRowDown38_SSSE3(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width) {
+ (void)src_stride;
+ asm volatile(
+ "movdqa %3,%%xmm4 \n"
+ "movdqa %4,%%xmm5 \n"
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "lea 0x20(%0),%0 \n"
+ "pshufb %%xmm4,%%xmm0 \n"
+ "pshufb %%xmm5,%%xmm1 \n"
+ "paddusb %%xmm1,%%xmm0 \n"
+ "movq %%xmm0,(%1) \n"
+ "movhlps %%xmm0,%%xmm1 \n"
+ "movd %%xmm1,0x8(%1) \n"
+ "lea 0xc(%1),%1 \n"
+ "sub $0xc,%2 \n"
+ "jg 1b \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst_ptr), // %1
+ "+r"(dst_width) // %2
+ : "m"(kShuf38a), // %3
+ "m"(kShuf38b) // %4
+ : "memory", "cc", "xmm0", "xmm1", "xmm4", "xmm5");
+}
+
+void ScaleRowDown38_2_Box_SSSE3(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width) {
+ asm volatile(
+ "movdqa %0,%%xmm2 \n"
+ "movdqa %1,%%xmm3 \n"
+ "movdqa %2,%%xmm4 \n"
+ "movdqa %3,%%xmm5 \n"
+ :
+ : "m"(kShufAb0), // %0
+ "m"(kShufAb1), // %1
+ "m"(kShufAb2), // %2
+ "m"(kScaleAb2) // %3
+ );
+ asm volatile(
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x00(%0,%3,1),%%xmm1 \n"
+ "lea 0x10(%0),%0 \n"
+ "pavgb %%xmm1,%%xmm0 \n"
+ "movdqa %%xmm0,%%xmm1 \n"
+ "pshufb %%xmm2,%%xmm1 \n"
+ "movdqa %%xmm0,%%xmm6 \n"
+ "pshufb %%xmm3,%%xmm6 \n"
+ "paddusw %%xmm6,%%xmm1 \n"
+ "pshufb %%xmm4,%%xmm0 \n"
+ "paddusw %%xmm0,%%xmm1 \n"
+ "pmulhuw %%xmm5,%%xmm1 \n"
+ "packuswb %%xmm1,%%xmm1 \n"
+ "movd %%xmm1,(%1) \n"
+ "psrlq $0x10,%%xmm1 \n"
+ "movd %%xmm1,0x2(%1) \n"
+ "lea 0x6(%1),%1 \n"
+ "sub $0x6,%2 \n"
+ "jg 1b \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst_ptr), // %1
+ "+r"(dst_width) // %2
+ : "r"((intptr_t)(src_stride)) // %3
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6");
+}
+
+void ScaleRowDown38_3_Box_SSSE3(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width) {
+ asm volatile(
+ "movdqa %0,%%xmm2 \n"
+ "movdqa %1,%%xmm3 \n"
+ "movdqa %2,%%xmm4 \n"
+ "pxor %%xmm5,%%xmm5 \n"
+ :
+ : "m"(kShufAc), // %0
+ "m"(kShufAc3), // %1
+ "m"(kScaleAc33) // %2
+ );
+ asm volatile(
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x00(%0,%3,1),%%xmm6 \n"
+ "movhlps %%xmm0,%%xmm1 \n"
+ "movhlps %%xmm6,%%xmm7 \n"
+ "punpcklbw %%xmm5,%%xmm0 \n"
+ "punpcklbw %%xmm5,%%xmm1 \n"
+ "punpcklbw %%xmm5,%%xmm6 \n"
+ "punpcklbw %%xmm5,%%xmm7 \n"
+ "paddusw %%xmm6,%%xmm0 \n"
+ "paddusw %%xmm7,%%xmm1 \n"
+ "movdqu 0x00(%0,%3,2),%%xmm6 \n"
+ "lea 0x10(%0),%0 \n"
+ "movhlps %%xmm6,%%xmm7 \n"
+ "punpcklbw %%xmm5,%%xmm6 \n"
+ "punpcklbw %%xmm5,%%xmm7 \n"
+ "paddusw %%xmm6,%%xmm0 \n"
+ "paddusw %%xmm7,%%xmm1 \n"
+ "movdqa %%xmm0,%%xmm6 \n"
+ "psrldq $0x2,%%xmm0 \n"
+ "paddusw %%xmm0,%%xmm6 \n"
+ "psrldq $0x2,%%xmm0 \n"
+ "paddusw %%xmm0,%%xmm6 \n"
+ "pshufb %%xmm2,%%xmm6 \n"
+ "movdqa %%xmm1,%%xmm7 \n"
+ "psrldq $0x2,%%xmm1 \n"
+ "paddusw %%xmm1,%%xmm7 \n"
+ "psrldq $0x2,%%xmm1 \n"
+ "paddusw %%xmm1,%%xmm7 \n"
+ "pshufb %%xmm3,%%xmm7 \n"
+ "paddusw %%xmm7,%%xmm6 \n"
+ "pmulhuw %%xmm4,%%xmm6 \n"
+ "packuswb %%xmm6,%%xmm6 \n"
+ "movd %%xmm6,(%1) \n"
+ "psrlq $0x10,%%xmm6 \n"
+ "movd %%xmm6,0x2(%1) \n"
+ "lea 0x6(%1),%1 \n"
+ "sub $0x6,%2 \n"
+ "jg 1b \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst_ptr), // %1
+ "+r"(dst_width) // %2
+ : "r"((intptr_t)(src_stride)) // %3
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
+ "xmm7");
+}
+
+// Reads 16xN bytes and produces 16 shorts at a time.
+void ScaleAddRow_SSE2(const uint8_t* src_ptr,
+ uint16_t* dst_ptr,
+ int src_width) {
+ asm volatile(
+
+ "pxor %%xmm5,%%xmm5 \n"
+
+ // 16 pixel loop.
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm3 \n"
+ "lea 0x10(%0),%0 \n" // src_ptr += 16
+ "movdqu (%1),%%xmm0 \n"
+ "movdqu 0x10(%1),%%xmm1 \n"
+ "movdqa %%xmm3,%%xmm2 \n"
+ "punpcklbw %%xmm5,%%xmm2 \n"
+ "punpckhbw %%xmm5,%%xmm3 \n"
+ "paddusw %%xmm2,%%xmm0 \n"
+ "paddusw %%xmm3,%%xmm1 \n"
+ "movdqu %%xmm0,(%1) \n"
+ "movdqu %%xmm1,0x10(%1) \n"
+ "lea 0x20(%1),%1 \n"
+ "sub $0x10,%2 \n"
+ "jg 1b \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst_ptr), // %1
+ "+r"(src_width) // %2
+ :
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5");
+}
+
+#ifdef HAS_SCALEADDROW_AVX2
+// Reads 32 bytes and accumulates to 32 shorts at a time.
+void ScaleAddRow_AVX2(const uint8_t* src_ptr,
+ uint16_t* dst_ptr,
+ int src_width) {
+ asm volatile(
+
+ "vpxor %%ymm5,%%ymm5,%%ymm5 \n"
+
+ LABELALIGN
+ "1: \n"
+ "vmovdqu (%0),%%ymm3 \n"
+ "lea 0x20(%0),%0 \n" // src_ptr += 32
+ "vpermq $0xd8,%%ymm3,%%ymm3 \n"
+ "vpunpcklbw %%ymm5,%%ymm3,%%ymm2 \n"
+ "vpunpckhbw %%ymm5,%%ymm3,%%ymm3 \n"
+ "vpaddusw (%1),%%ymm2,%%ymm0 \n"
+ "vpaddusw 0x20(%1),%%ymm3,%%ymm1 \n"
+ "vmovdqu %%ymm0,(%1) \n"
+ "vmovdqu %%ymm1,0x20(%1) \n"
+ "lea 0x40(%1),%1 \n"
+ "sub $0x20,%2 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst_ptr), // %1
+ "+r"(src_width) // %2
+ :
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5");
+}
+#endif // HAS_SCALEADDROW_AVX2
+
+// Constant for making pixels signed to avoid pmaddubsw
+// saturation.
+static const uvec8 kFsub80 = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80};
+
+// Constant for making pixels unsigned and adding .5 for rounding.
+static const uvec16 kFadd40 = {0x4040, 0x4040, 0x4040, 0x4040,
+ 0x4040, 0x4040, 0x4040, 0x4040};
+
+// Bilinear column filtering. SSSE3 version.
+void ScaleFilterCols_SSSE3(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
+ int dst_width,
+ int x,
+ int dx) {
+ intptr_t x0, x1, temp_pixel;
+ asm volatile(
+ "movd %6,%%xmm2 \n"
+ "movd %7,%%xmm3 \n"
+ "movl $0x04040000,%k2 \n"
+ "movd %k2,%%xmm5 \n"
+ "pcmpeqb %%xmm6,%%xmm6 \n"
+ "psrlw $0x9,%%xmm6 \n" // 0x007f007f
+ "pcmpeqb %%xmm7,%%xmm7 \n"
+ "psrlw $15,%%xmm7 \n" // 0x00010001
+
+ "pextrw $0x1,%%xmm2,%k3 \n"
+ "subl $0x2,%5 \n"
+ "jl 29f \n"
+ "movdqa %%xmm2,%%xmm0 \n"
+ "paddd %%xmm3,%%xmm0 \n"
+ "punpckldq %%xmm0,%%xmm2 \n"
+ "punpckldq %%xmm3,%%xmm3 \n"
+ "paddd %%xmm3,%%xmm3 \n"
+ "pextrw $0x3,%%xmm2,%k4 \n"
+
+ LABELALIGN
+ "2: \n"
+ "movdqa %%xmm2,%%xmm1 \n"
+ "paddd %%xmm3,%%xmm2 \n"
+ "movzwl 0x00(%1,%3,1),%k2 \n"
+ "movd %k2,%%xmm0 \n"
+ "psrlw $0x9,%%xmm1 \n"
+ "movzwl 0x00(%1,%4,1),%k2 \n"
+ "movd %k2,%%xmm4 \n"
+ "pshufb %%xmm5,%%xmm1 \n"
+ "punpcklwd %%xmm4,%%xmm0 \n"
+ "psubb %8,%%xmm0 \n" // make pixels signed.
+ "pxor %%xmm6,%%xmm1 \n" // 128 - f = (f ^ 127 ) +
+ // 1
+ "paddusb %%xmm7,%%xmm1 \n"
+ "pmaddubsw %%xmm0,%%xmm1 \n"
+ "pextrw $0x1,%%xmm2,%k3 \n"
+ "pextrw $0x3,%%xmm2,%k4 \n"
+ "paddw %9,%%xmm1 \n" // make pixels unsigned.
+ "psrlw $0x7,%%xmm1 \n"
+ "packuswb %%xmm1,%%xmm1 \n"
+ "movd %%xmm1,%k2 \n"
+ "mov %w2,(%0) \n"
+ "lea 0x2(%0),%0 \n"
+ "subl $0x2,%5 \n"
+ "jge 2b \n"
+
+ LABELALIGN
+ "29: \n"
+ "addl $0x1,%5 \n"
+ "jl 99f \n"
+ "movzwl 0x00(%1,%3,1),%k2 \n"
+ "movd %k2,%%xmm0 \n"
+ "psrlw $0x9,%%xmm2 \n"
+ "pshufb %%xmm5,%%xmm2 \n"
+ "psubb %8,%%xmm0 \n" // make pixels signed.
+ "pxor %%xmm6,%%xmm2 \n"
+ "paddusb %%xmm7,%%xmm2 \n"
+ "pmaddubsw %%xmm0,%%xmm2 \n"
+ "paddw %9,%%xmm2 \n" // make pixels unsigned.
+ "psrlw $0x7,%%xmm2 \n"
+ "packuswb %%xmm2,%%xmm2 \n"
+ "movd %%xmm2,%k2 \n"
+ "mov %b2,(%0) \n"
+ "99: \n"
+ : "+r"(dst_ptr), // %0
+ "+r"(src_ptr), // %1
+ "=&a"(temp_pixel), // %2
+ "=&r"(x0), // %3
+ "=&r"(x1), // %4
+#if defined(__x86_64__)
+ "+rm"(dst_width) // %5
+#else
+ "+m"(dst_width) // %5
+#endif
+ : "rm"(x), // %6
+ "rm"(dx), // %7
+#if defined(__x86_64__)
+ "x"(kFsub80), // %8
+ "x"(kFadd40) // %9
+#else
+ "m"(kFsub80), // %8
+ "m"(kFadd40) // %9
+#endif
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
+ "xmm7");
+}
+
+// Reads 4 pixels, duplicates them and writes 8 pixels.
+// Alignment requirement: src_argb 16 byte aligned, dst_argb 16 byte aligned.
+void ScaleColsUp2_SSE2(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
+ int dst_width,
+ int x,
+ int dx) {
+ (void)x;
+ (void)dx;
+ asm volatile(
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%1),%%xmm0 \n"
+ "lea 0x10(%1),%1 \n"
+ "movdqa %%xmm0,%%xmm1 \n"
+ "punpcklbw %%xmm0,%%xmm0 \n"
+ "punpckhbw %%xmm1,%%xmm1 \n"
+ "movdqu %%xmm0,(%0) \n"
+ "movdqu %%xmm1,0x10(%0) \n"
+ "lea 0x20(%0),%0 \n"
+ "sub $0x20,%2 \n"
+ "jg 1b \n"
+
+ : "+r"(dst_ptr), // %0
+ "+r"(src_ptr), // %1
+ "+r"(dst_width) // %2
+ ::"memory",
+ "cc", "xmm0", "xmm1");
+}
+
+void ScaleARGBRowDown2_SSE2(const uint8_t* src_argb,
+ ptrdiff_t src_stride,
+ uint8_t* dst_argb,
+ int dst_width) {
+ (void)src_stride;
+ asm volatile(
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "lea 0x20(%0),%0 \n"
+ "shufps $0xdd,%%xmm1,%%xmm0 \n"
+ "movdqu %%xmm0,(%1) \n"
+ "lea 0x10(%1),%1 \n"
+ "sub $0x4,%2 \n"
+ "jg 1b \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_argb), // %1
+ "+r"(dst_width) // %2
+ ::"memory",
+ "cc", "xmm0", "xmm1");
+}
+
+void ScaleARGBRowDown2Linear_SSE2(const uint8_t* src_argb,
+ ptrdiff_t src_stride,
+ uint8_t* dst_argb,
+ int dst_width) {
+ (void)src_stride;
+ asm volatile(
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "lea 0x20(%0),%0 \n"
+ "movdqa %%xmm0,%%xmm2 \n"
+ "shufps $0x88,%%xmm1,%%xmm0 \n"
+ "shufps $0xdd,%%xmm1,%%xmm2 \n"
+ "pavgb %%xmm2,%%xmm0 \n"
+ "movdqu %%xmm0,(%1) \n"
+ "lea 0x10(%1),%1 \n"
+ "sub $0x4,%2 \n"
+ "jg 1b \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_argb), // %1
+ "+r"(dst_width) // %2
+ ::"memory",
+ "cc", "xmm0", "xmm1");
+}
+
+void ScaleARGBRowDown2Box_SSE2(const uint8_t* src_argb,
+ ptrdiff_t src_stride,
+ uint8_t* dst_argb,
+ int dst_width) {
+ asm volatile(
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "movdqu 0x00(%0,%3,1),%%xmm2 \n"
+ "movdqu 0x10(%0,%3,1),%%xmm3 \n"
+ "lea 0x20(%0),%0 \n"
+ "pavgb %%xmm2,%%xmm0 \n"
+ "pavgb %%xmm3,%%xmm1 \n"
+ "movdqa %%xmm0,%%xmm2 \n"
+ "shufps $0x88,%%xmm1,%%xmm0 \n"
+ "shufps $0xdd,%%xmm1,%%xmm2 \n"
+ "pavgb %%xmm2,%%xmm0 \n"
+ "movdqu %%xmm0,(%1) \n"
+ "lea 0x10(%1),%1 \n"
+ "sub $0x4,%2 \n"
+ "jg 1b \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_argb), // %1
+ "+r"(dst_width) // %2
+ : "r"((intptr_t)(src_stride)) // %3
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3");
+}
+
+// Reads 4 pixels at a time.
+// Alignment requirement: dst_argb 16 byte aligned.
+void ScaleARGBRowDownEven_SSE2(const uint8_t* src_argb,
+ ptrdiff_t src_stride,
+ int src_stepx,
+ uint8_t* dst_argb,
+ int dst_width) {
+ intptr_t src_stepx_x4 = (intptr_t)(src_stepx);
+ intptr_t src_stepx_x12;
+ (void)src_stride;
+ asm volatile(
+ "lea 0x00(,%1,4),%1 \n"
+ "lea 0x00(%1,%1,2),%4 \n"
+
+ LABELALIGN
+ "1: \n"
+ "movd (%0),%%xmm0 \n"
+ "movd 0x00(%0,%1,1),%%xmm1 \n"
+ "punpckldq %%xmm1,%%xmm0 \n"
+ "movd 0x00(%0,%1,2),%%xmm2 \n"
+ "movd 0x00(%0,%4,1),%%xmm3 \n"
+ "lea 0x00(%0,%1,4),%0 \n"
+ "punpckldq %%xmm3,%%xmm2 \n"
+ "punpcklqdq %%xmm2,%%xmm0 \n"
+ "movdqu %%xmm0,(%2) \n"
+ "lea 0x10(%2),%2 \n"
+ "sub $0x4,%3 \n"
+ "jg 1b \n"
+ : "+r"(src_argb), // %0
+ "+r"(src_stepx_x4), // %1
+ "+r"(dst_argb), // %2
+ "+r"(dst_width), // %3
+ "=&r"(src_stepx_x12) // %4
+ ::"memory",
+ "cc", "xmm0", "xmm1", "xmm2", "xmm3");
+}
+
+// Blends four 2x2 to 4x1.
+// Alignment requirement: dst_argb 16 byte aligned.
+void ScaleARGBRowDownEvenBox_SSE2(const uint8_t* src_argb,
+ ptrdiff_t src_stride,
+ int src_stepx,
+ uint8_t* dst_argb,
+ int dst_width) {
+ intptr_t src_stepx_x4 = (intptr_t)(src_stepx);
+ intptr_t src_stepx_x12;
+ intptr_t row1 = (intptr_t)(src_stride);
+ asm volatile(
+ "lea 0x00(,%1,4),%1 \n"
+ "lea 0x00(%1,%1,2),%4 \n"
+ "lea 0x00(%0,%5,1),%5 \n"
+
+ LABELALIGN
+ "1: \n"
+ "movq (%0),%%xmm0 \n"
+ "movhps 0x00(%0,%1,1),%%xmm0 \n"
+ "movq 0x00(%0,%1,2),%%xmm1 \n"
+ "movhps 0x00(%0,%4,1),%%xmm1 \n"
+ "lea 0x00(%0,%1,4),%0 \n"
+ "movq (%5),%%xmm2 \n"
+ "movhps 0x00(%5,%1,1),%%xmm2 \n"
+ "movq 0x00(%5,%1,2),%%xmm3 \n"
+ "movhps 0x00(%5,%4,1),%%xmm3 \n"
+ "lea 0x00(%5,%1,4),%5 \n"
+ "pavgb %%xmm2,%%xmm0 \n"
+ "pavgb %%xmm3,%%xmm1 \n"
+ "movdqa %%xmm0,%%xmm2 \n"
+ "shufps $0x88,%%xmm1,%%xmm0 \n"
+ "shufps $0xdd,%%xmm1,%%xmm2 \n"
+ "pavgb %%xmm2,%%xmm0 \n"
+ "movdqu %%xmm0,(%2) \n"
+ "lea 0x10(%2),%2 \n"
+ "sub $0x4,%3 \n"
+ "jg 1b \n"
+ : "+r"(src_argb), // %0
+ "+r"(src_stepx_x4), // %1
+ "+r"(dst_argb), // %2
+ "+rm"(dst_width), // %3
+ "=&r"(src_stepx_x12), // %4
+ "+r"(row1) // %5
+ ::"memory",
+ "cc", "xmm0", "xmm1", "xmm2", "xmm3");
+}
+
+void ScaleARGBCols_SSE2(uint8_t* dst_argb,
+ const uint8_t* src_argb,
+ int dst_width,
+ int x,
+ int dx) {
+ intptr_t x0, x1;
+ asm volatile(
+ "movd %5,%%xmm2 \n"
+ "movd %6,%%xmm3 \n"
+ "pshufd $0x0,%%xmm2,%%xmm2 \n"
+ "pshufd $0x11,%%xmm3,%%xmm0 \n"
+ "paddd %%xmm0,%%xmm2 \n"
+ "paddd %%xmm3,%%xmm3 \n"
+ "pshufd $0x5,%%xmm3,%%xmm0 \n"
+ "paddd %%xmm0,%%xmm2 \n"
+ "paddd %%xmm3,%%xmm3 \n"
+ "pshufd $0x0,%%xmm3,%%xmm3 \n"
+ "pextrw $0x1,%%xmm2,%k0 \n"
+ "pextrw $0x3,%%xmm2,%k1 \n"
+ "cmp $0x0,%4 \n"
+ "jl 99f \n"
+ "sub $0x4,%4 \n"
+ "jl 49f \n"
+
+ LABELALIGN
+ "40: \n"
+ "movd 0x00(%3,%0,4),%%xmm0 \n"
+ "movd 0x00(%3,%1,4),%%xmm1 \n"
+ "pextrw $0x5,%%xmm2,%k0 \n"
+ "pextrw $0x7,%%xmm2,%k1 \n"
+ "paddd %%xmm3,%%xmm2 \n"
+ "punpckldq %%xmm1,%%xmm0 \n"
+ "movd 0x00(%3,%0,4),%%xmm1 \n"
+ "movd 0x00(%3,%1,4),%%xmm4 \n"
+ "pextrw $0x1,%%xmm2,%k0 \n"
+ "pextrw $0x3,%%xmm2,%k1 \n"
+ "punpckldq %%xmm4,%%xmm1 \n"
+ "punpcklqdq %%xmm1,%%xmm0 \n"
+ "movdqu %%xmm0,(%2) \n"
+ "lea 0x10(%2),%2 \n"
+ "sub $0x4,%4 \n"
+ "jge 40b \n"
+
+ "49: \n"
+ "test $0x2,%4 \n"
+ "je 29f \n"
+ "movd 0x00(%3,%0,4),%%xmm0 \n"
+ "movd 0x00(%3,%1,4),%%xmm1 \n"
+ "pextrw $0x5,%%xmm2,%k0 \n"
+ "punpckldq %%xmm1,%%xmm0 \n"
+ "movq %%xmm0,(%2) \n"
+ "lea 0x8(%2),%2 \n"
+ "29: \n"
+ "test $0x1,%4 \n"
+ "je 99f \n"
+ "movd 0x00(%3,%0,4),%%xmm0 \n"
+ "movd %%xmm0,(%2) \n"
+ "99: \n"
+ : "=&a"(x0), // %0
+ "=&d"(x1), // %1
+ "+r"(dst_argb), // %2
+ "+r"(src_argb), // %3
+ "+r"(dst_width) // %4
+ : "rm"(x), // %5
+ "rm"(dx) // %6
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4");
+}
+
+// Reads 4 pixels, duplicates them and writes 8 pixels.
+// Alignment requirement: src_argb 16 byte aligned, dst_argb 16 byte aligned.
+void ScaleARGBColsUp2_SSE2(uint8_t* dst_argb,
+ const uint8_t* src_argb,
+ int dst_width,
+ int x,
+ int dx) {
+ (void)x;
+ (void)dx;
+ asm volatile(
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%1),%%xmm0 \n"
+ "lea 0x10(%1),%1 \n"
+ "movdqa %%xmm0,%%xmm1 \n"
+ "punpckldq %%xmm0,%%xmm0 \n"
+ "punpckhdq %%xmm1,%%xmm1 \n"
+ "movdqu %%xmm0,(%0) \n"
+ "movdqu %%xmm1,0x10(%0) \n"
+ "lea 0x20(%0),%0 \n"
+ "sub $0x8,%2 \n"
+ "jg 1b \n"
+
+ : "+r"(dst_argb), // %0
+ "+r"(src_argb), // %1
+ "+r"(dst_width) // %2
+ ::"memory",
+ "cc", "xmm0", "xmm1");
+}
+
+// Shuffle table for arranging 2 pixels into pairs for pmaddubsw
+static const uvec8 kShuffleColARGB = {
+ 0u, 4u, 1u, 5u, 2u, 6u, 3u, 7u, // bbggrraa 1st pixel
+ 8u, 12u, 9u, 13u, 10u, 14u, 11u, 15u // bbggrraa 2nd pixel
+};
+
+// Shuffle table for duplicating 2 fractions into 8 bytes each
+static const uvec8 kShuffleFractions = {
+ 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 4u, 4u, 4u, 4u, 4u, 4u, 4u, 4u,
+};
+
+// Bilinear row filtering combines 4x2 -> 4x1. SSSE3 version
+void ScaleARGBFilterCols_SSSE3(uint8_t* dst_argb,
+ const uint8_t* src_argb,
+ int dst_width,
+ int x,
+ int dx) {
+ intptr_t x0, x1;
+ asm volatile(
+ "movdqa %0,%%xmm4 \n"
+ "movdqa %1,%%xmm5 \n"
+ :
+ : "m"(kShuffleColARGB), // %0
+ "m"(kShuffleFractions) // %1
+ );
+
+ asm volatile(
+ "movd %5,%%xmm2 \n"
+ "movd %6,%%xmm3 \n"
+ "pcmpeqb %%xmm6,%%xmm6 \n"
+ "psrlw $0x9,%%xmm6 \n"
+ "pextrw $0x1,%%xmm2,%k3 \n"
+ "sub $0x2,%2 \n"
+ "jl 29f \n"
+ "movdqa %%xmm2,%%xmm0 \n"
+ "paddd %%xmm3,%%xmm0 \n"
+ "punpckldq %%xmm0,%%xmm2 \n"
+ "punpckldq %%xmm3,%%xmm3 \n"
+ "paddd %%xmm3,%%xmm3 \n"
+ "pextrw $0x3,%%xmm2,%k4 \n"
+
+ LABELALIGN
+ "2: \n"
+ "movdqa %%xmm2,%%xmm1 \n"
+ "paddd %%xmm3,%%xmm2 \n"
+ "movq 0x00(%1,%3,4),%%xmm0 \n"
+ "psrlw $0x9,%%xmm1 \n"
+ "movhps 0x00(%1,%4,4),%%xmm0 \n"
+ "pshufb %%xmm5,%%xmm1 \n"
+ "pshufb %%xmm4,%%xmm0 \n"
+ "pxor %%xmm6,%%xmm1 \n"
+ "pmaddubsw %%xmm1,%%xmm0 \n"
+ "psrlw $0x7,%%xmm0 \n"
+ "pextrw $0x1,%%xmm2,%k3 \n"
+ "pextrw $0x3,%%xmm2,%k4 \n"
+ "packuswb %%xmm0,%%xmm0 \n"
+ "movq %%xmm0,(%0) \n"
+ "lea 0x8(%0),%0 \n"
+ "sub $0x2,%2 \n"
+ "jge 2b \n"
+
+ LABELALIGN
+ "29: \n"
+ "add $0x1,%2 \n"
+ "jl 99f \n"
+ "psrlw $0x9,%%xmm2 \n"
+ "movq 0x00(%1,%3,4),%%xmm0 \n"
+ "pshufb %%xmm5,%%xmm2 \n"
+ "pshufb %%xmm4,%%xmm0 \n"
+ "pxor %%xmm6,%%xmm2 \n"
+ "pmaddubsw %%xmm2,%%xmm0 \n"
+ "psrlw $0x7,%%xmm0 \n"
+ "packuswb %%xmm0,%%xmm0 \n"
+ "movd %%xmm0,(%0) \n"
+
+ LABELALIGN "99: \n" // clang-format error.
+
+ : "+r"(dst_argb), // %0
+ "+r"(src_argb), // %1
+ "+rm"(dst_width), // %2
+ "=&r"(x0), // %3
+ "=&r"(x1) // %4
+ : "rm"(x), // %5
+ "rm"(dx) // %6
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6");
+}
+
+// Divide num by div and return as 16.16 fixed point result.
+int FixedDiv_X86(int num, int div) {
+ asm volatile(
+ "cdq \n"
+ "shld $0x10,%%eax,%%edx \n"
+ "shl $0x10,%%eax \n"
+ "idiv %1 \n"
+ "mov %0, %%eax \n"
+ : "+a"(num) // %0
+ : "c"(div) // %1
+ : "memory", "cc", "edx");
+ return num;
+}
+
+// Divide num - 1 by div - 1 and return as 16.16 fixed point result.
+int FixedDiv1_X86(int num, int div) {
+ asm volatile(
+ "cdq \n"
+ "shld $0x10,%%eax,%%edx \n"
+ "shl $0x10,%%eax \n"
+ "sub $0x10001,%%eax \n"
+ "sbb $0x0,%%edx \n"
+ "sub $0x1,%1 \n"
+ "idiv %1 \n"
+ "mov %0, %%eax \n"
+ : "+a"(num) // %0
+ : "c"(div) // %1
+ : "memory", "cc", "edx");
+ return num;
+}
+
+#endif // defined(__x86_64__) || defined(__i386__)
+
+#ifdef __cplusplus
+} // extern "C"
+} // namespace libyuv
+#endif
diff --git a/media/libvpx/libvpx/third_party/libyuv/source/scale_msa.cc b/media/libvpx/libvpx/third_party/libyuv/source/scale_msa.cc
new file mode 100644
index 0000000000..482a521f0d
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/libyuv/source/scale_msa.cc
@@ -0,0 +1,949 @@
+/*
+ * Copyright 2016 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+
+#include "libyuv/scale_row.h"
+
+// This module is for GCC MSA
+#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
+#include "libyuv/macros_msa.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+#define LOAD_INDEXED_DATA(srcp, indx0, out0) \
+ { \
+ out0[0] = srcp[indx0[0]]; \
+ out0[1] = srcp[indx0[1]]; \
+ out0[2] = srcp[indx0[2]]; \
+ out0[3] = srcp[indx0[3]]; \
+ }
+
+void ScaleARGBRowDown2_MSA(const uint8_t* src_argb,
+ ptrdiff_t src_stride,
+ uint8_t* dst_argb,
+ int dst_width) {
+ int x;
+ v16u8 src0, src1, dst0;
+ (void)src_stride;
+
+ for (x = 0; x < dst_width; x += 4) {
+ src0 = (v16u8)__msa_ld_b((v16i8*)src_argb, 0);
+ src1 = (v16u8)__msa_ld_b((v16i8*)src_argb, 16);
+ dst0 = (v16u8)__msa_pckod_w((v4i32)src1, (v4i32)src0);
+ ST_UB(dst0, dst_argb);
+ src_argb += 32;
+ dst_argb += 16;
+ }
+}
+
+void ScaleARGBRowDown2Linear_MSA(const uint8_t* src_argb,
+ ptrdiff_t src_stride,
+ uint8_t* dst_argb,
+ int dst_width) {
+ int x;
+ v16u8 src0, src1, vec0, vec1, dst0;
+ (void)src_stride;
+
+ for (x = 0; x < dst_width; x += 4) {
+ src0 = (v16u8)__msa_ld_b((v16i8*)src_argb, 0);
+ src1 = (v16u8)__msa_ld_b((v16i8*)src_argb, 16);
+ vec0 = (v16u8)__msa_pckev_w((v4i32)src1, (v4i32)src0);
+ vec1 = (v16u8)__msa_pckod_w((v4i32)src1, (v4i32)src0);
+ dst0 = (v16u8)__msa_aver_u_b((v16u8)vec0, (v16u8)vec1);
+ ST_UB(dst0, dst_argb);
+ src_argb += 32;
+ dst_argb += 16;
+ }
+}
+
+void ScaleARGBRowDown2Box_MSA(const uint8_t* src_argb,
+ ptrdiff_t src_stride,
+ uint8_t* dst_argb,
+ int dst_width) {
+ int x;
+ const uint8_t* s = src_argb;
+ const uint8_t* t = src_argb + src_stride;
+ v16u8 src0, src1, src2, src3, vec0, vec1, vec2, vec3, dst0;
+ v8u16 reg0, reg1, reg2, reg3;
+ v16i8 shuffler = {0, 4, 1, 5, 2, 6, 3, 7, 8, 12, 9, 13, 10, 14, 11, 15};
+
+ for (x = 0; x < dst_width; x += 4) {
+ src0 = (v16u8)__msa_ld_b((v16i8*)s, 0);
+ src1 = (v16u8)__msa_ld_b((v16i8*)s, 16);
+ src2 = (v16u8)__msa_ld_b((v16i8*)t, 0);
+ src3 = (v16u8)__msa_ld_b((v16i8*)t, 16);
+ vec0 = (v16u8)__msa_vshf_b(shuffler, (v16i8)src0, (v16i8)src0);
+ vec1 = (v16u8)__msa_vshf_b(shuffler, (v16i8)src1, (v16i8)src1);
+ vec2 = (v16u8)__msa_vshf_b(shuffler, (v16i8)src2, (v16i8)src2);
+ vec3 = (v16u8)__msa_vshf_b(shuffler, (v16i8)src3, (v16i8)src3);
+ reg0 = __msa_hadd_u_h(vec0, vec0);
+ reg1 = __msa_hadd_u_h(vec1, vec1);
+ reg2 = __msa_hadd_u_h(vec2, vec2);
+ reg3 = __msa_hadd_u_h(vec3, vec3);
+ reg0 += reg2;
+ reg1 += reg3;
+ reg0 = (v8u16)__msa_srari_h((v8i16)reg0, 2);
+ reg1 = (v8u16)__msa_srari_h((v8i16)reg1, 2);
+ dst0 = (v16u8)__msa_pckev_b((v16i8)reg1, (v16i8)reg0);
+ ST_UB(dst0, dst_argb);
+ s += 32;
+ t += 32;
+ dst_argb += 16;
+ }
+}
+
+void ScaleARGBRowDownEven_MSA(const uint8_t* src_argb,
+ ptrdiff_t src_stride,
+ int32_t src_stepx,
+ uint8_t* dst_argb,
+ int dst_width) {
+ int x;
+ int32_t stepx = src_stepx * 4;
+ int32_t data0, data1, data2, data3;
+ (void)src_stride;
+
+ for (x = 0; x < dst_width; x += 4) {
+ data0 = LW(src_argb);
+ data1 = LW(src_argb + stepx);
+ data2 = LW(src_argb + stepx * 2);
+ data3 = LW(src_argb + stepx * 3);
+ SW(data0, dst_argb);
+ SW(data1, dst_argb + 4);
+ SW(data2, dst_argb + 8);
+ SW(data3, dst_argb + 12);
+ src_argb += stepx * 4;
+ dst_argb += 16;
+ }
+}
+
+void ScaleARGBRowDownEvenBox_MSA(const uint8_t* src_argb,
+ ptrdiff_t src_stride,
+ int src_stepx,
+ uint8_t* dst_argb,
+ int dst_width) {
+ int x;
+ const uint8_t* nxt_argb = src_argb + src_stride;
+ int32_t stepx = src_stepx * 4;
+ int64_t data0, data1, data2, data3;
+ v16u8 src0 = {0}, src1 = {0}, src2 = {0}, src3 = {0};
+ v16u8 vec0, vec1, vec2, vec3;
+ v8u16 reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7;
+ v16u8 dst0;
+
+ for (x = 0; x < dst_width; x += 4) {
+ data0 = LD(src_argb);
+ data1 = LD(src_argb + stepx);
+ data2 = LD(src_argb + stepx * 2);
+ data3 = LD(src_argb + stepx * 3);
+ src0 = (v16u8)__msa_insert_d((v2i64)src0, 0, data0);
+ src0 = (v16u8)__msa_insert_d((v2i64)src0, 1, data1);
+ src1 = (v16u8)__msa_insert_d((v2i64)src1, 0, data2);
+ src1 = (v16u8)__msa_insert_d((v2i64)src1, 1, data3);
+ data0 = LD(nxt_argb);
+ data1 = LD(nxt_argb + stepx);
+ data2 = LD(nxt_argb + stepx * 2);
+ data3 = LD(nxt_argb + stepx * 3);
+ src2 = (v16u8)__msa_insert_d((v2i64)src2, 0, data0);
+ src2 = (v16u8)__msa_insert_d((v2i64)src2, 1, data1);
+ src3 = (v16u8)__msa_insert_d((v2i64)src3, 0, data2);
+ src3 = (v16u8)__msa_insert_d((v2i64)src3, 1, data3);
+ vec0 = (v16u8)__msa_ilvr_b((v16i8)src2, (v16i8)src0);
+ vec1 = (v16u8)__msa_ilvr_b((v16i8)src3, (v16i8)src1);
+ vec2 = (v16u8)__msa_ilvl_b((v16i8)src2, (v16i8)src0);
+ vec3 = (v16u8)__msa_ilvl_b((v16i8)src3, (v16i8)src1);
+ reg0 = __msa_hadd_u_h(vec0, vec0);
+ reg1 = __msa_hadd_u_h(vec1, vec1);
+ reg2 = __msa_hadd_u_h(vec2, vec2);
+ reg3 = __msa_hadd_u_h(vec3, vec3);
+ reg4 = (v8u16)__msa_pckev_d((v2i64)reg2, (v2i64)reg0);
+ reg5 = (v8u16)__msa_pckev_d((v2i64)reg3, (v2i64)reg1);
+ reg6 = (v8u16)__msa_pckod_d((v2i64)reg2, (v2i64)reg0);
+ reg7 = (v8u16)__msa_pckod_d((v2i64)reg3, (v2i64)reg1);
+ reg4 += reg6;
+ reg5 += reg7;
+ reg4 = (v8u16)__msa_srari_h((v8i16)reg4, 2);
+ reg5 = (v8u16)__msa_srari_h((v8i16)reg5, 2);
+ dst0 = (v16u8)__msa_pckev_b((v16i8)reg5, (v16i8)reg4);
+ ST_UB(dst0, dst_argb);
+ src_argb += stepx * 4;
+ nxt_argb += stepx * 4;
+ dst_argb += 16;
+ }
+}
+
+void ScaleRowDown2_MSA(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width) {
+ int x;
+ v16u8 src0, src1, src2, src3, dst0, dst1;
+ (void)src_stride;
+
+ for (x = 0; x < dst_width; x += 32) {
+ src0 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 0);
+ src1 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 16);
+ src2 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 32);
+ src3 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 48);
+ dst0 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0);
+ dst1 = (v16u8)__msa_pckod_b((v16i8)src3, (v16i8)src2);
+ ST_UB2(dst0, dst1, dst, 16);
+ src_ptr += 64;
+ dst += 32;
+ }
+}
+
+void ScaleRowDown2Linear_MSA(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width) {
+ int x;
+ v16u8 src0, src1, src2, src3, vec0, vec1, vec2, vec3, dst0, dst1;
+ (void)src_stride;
+
+ for (x = 0; x < dst_width; x += 32) {
+ src0 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 0);
+ src1 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 16);
+ src2 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 32);
+ src3 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 48);
+ vec0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0);
+ vec2 = (v16u8)__msa_pckev_b((v16i8)src3, (v16i8)src2);
+ vec1 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0);
+ vec3 = (v16u8)__msa_pckod_b((v16i8)src3, (v16i8)src2);
+ dst0 = __msa_aver_u_b(vec1, vec0);
+ dst1 = __msa_aver_u_b(vec3, vec2);
+ ST_UB2(dst0, dst1, dst, 16);
+ src_ptr += 64;
+ dst += 32;
+ }
+}
+
+void ScaleRowDown2Box_MSA(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width) {
+ int x;
+ const uint8_t* s = src_ptr;
+ const uint8_t* t = src_ptr + src_stride;
+ v16u8 src0, src1, src2, src3, src4, src5, src6, src7, dst0, dst1;
+ v8u16 vec0, vec1, vec2, vec3;
+
+ for (x = 0; x < dst_width; x += 32) {
+ src0 = (v16u8)__msa_ld_b((v16i8*)s, 0);
+ src1 = (v16u8)__msa_ld_b((v16i8*)s, 16);
+ src2 = (v16u8)__msa_ld_b((v16i8*)s, 32);
+ src3 = (v16u8)__msa_ld_b((v16i8*)s, 48);
+ src4 = (v16u8)__msa_ld_b((v16i8*)t, 0);
+ src5 = (v16u8)__msa_ld_b((v16i8*)t, 16);
+ src6 = (v16u8)__msa_ld_b((v16i8*)t, 32);
+ src7 = (v16u8)__msa_ld_b((v16i8*)t, 48);
+ vec0 = __msa_hadd_u_h(src0, src0);
+ vec1 = __msa_hadd_u_h(src1, src1);
+ vec2 = __msa_hadd_u_h(src2, src2);
+ vec3 = __msa_hadd_u_h(src3, src3);
+ vec0 += __msa_hadd_u_h(src4, src4);
+ vec1 += __msa_hadd_u_h(src5, src5);
+ vec2 += __msa_hadd_u_h(src6, src6);
+ vec3 += __msa_hadd_u_h(src7, src7);
+ vec0 = (v8u16)__msa_srari_h((v8i16)vec0, 2);
+ vec1 = (v8u16)__msa_srari_h((v8i16)vec1, 2);
+ vec2 = (v8u16)__msa_srari_h((v8i16)vec2, 2);
+ vec3 = (v8u16)__msa_srari_h((v8i16)vec3, 2);
+ dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0);
+ dst1 = (v16u8)__msa_pckev_b((v16i8)vec3, (v16i8)vec2);
+ ST_UB2(dst0, dst1, dst, 16);
+ s += 64;
+ t += 64;
+ dst += 32;
+ }
+}
+
+void ScaleRowDown4_MSA(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width) {
+ int x;
+ v16u8 src0, src1, src2, src3, vec0, vec1, dst0;
+ (void)src_stride;
+
+ for (x = 0; x < dst_width; x += 16) {
+ src0 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 0);
+ src1 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 16);
+ src2 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 32);
+ src3 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 48);
+ vec0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0);
+ vec1 = (v16u8)__msa_pckev_b((v16i8)src3, (v16i8)src2);
+ dst0 = (v16u8)__msa_pckod_b((v16i8)vec1, (v16i8)vec0);
+ ST_UB(dst0, dst);
+ src_ptr += 64;
+ dst += 16;
+ }
+}
+
+void ScaleRowDown4Box_MSA(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width) {
+ int x;
+ const uint8_t* s = src_ptr;
+ const uint8_t* t0 = s + src_stride;
+ const uint8_t* t1 = s + src_stride * 2;
+ const uint8_t* t2 = s + src_stride * 3;
+ v16u8 src0, src1, src2, src3, src4, src5, src6, src7, dst0;
+ v8u16 vec0, vec1, vec2, vec3;
+ v4u32 reg0, reg1, reg2, reg3;
+
+ for (x = 0; x < dst_width; x += 16) {
+ src0 = (v16u8)__msa_ld_b((v16i8*)s, 0);
+ src1 = (v16u8)__msa_ld_b((v16i8*)s, 16);
+ src2 = (v16u8)__msa_ld_b((v16i8*)s, 32);
+ src3 = (v16u8)__msa_ld_b((v16i8*)s, 48);
+ src4 = (v16u8)__msa_ld_b((v16i8*)t0, 0);
+ src5 = (v16u8)__msa_ld_b((v16i8*)t0, 16);
+ src6 = (v16u8)__msa_ld_b((v16i8*)t0, 32);
+ src7 = (v16u8)__msa_ld_b((v16i8*)t0, 48);
+ vec0 = __msa_hadd_u_h(src0, src0);
+ vec1 = __msa_hadd_u_h(src1, src1);
+ vec2 = __msa_hadd_u_h(src2, src2);
+ vec3 = __msa_hadd_u_h(src3, src3);
+ vec0 += __msa_hadd_u_h(src4, src4);
+ vec1 += __msa_hadd_u_h(src5, src5);
+ vec2 += __msa_hadd_u_h(src6, src6);
+ vec3 += __msa_hadd_u_h(src7, src7);
+ src0 = (v16u8)__msa_ld_b((v16i8*)t1, 0);
+ src1 = (v16u8)__msa_ld_b((v16i8*)t1, 16);
+ src2 = (v16u8)__msa_ld_b((v16i8*)t1, 32);
+ src3 = (v16u8)__msa_ld_b((v16i8*)t1, 48);
+ src4 = (v16u8)__msa_ld_b((v16i8*)t2, 0);
+ src5 = (v16u8)__msa_ld_b((v16i8*)t2, 16);
+ src6 = (v16u8)__msa_ld_b((v16i8*)t2, 32);
+ src7 = (v16u8)__msa_ld_b((v16i8*)t2, 48);
+ vec0 += __msa_hadd_u_h(src0, src0);
+ vec1 += __msa_hadd_u_h(src1, src1);
+ vec2 += __msa_hadd_u_h(src2, src2);
+ vec3 += __msa_hadd_u_h(src3, src3);
+ vec0 += __msa_hadd_u_h(src4, src4);
+ vec1 += __msa_hadd_u_h(src5, src5);
+ vec2 += __msa_hadd_u_h(src6, src6);
+ vec3 += __msa_hadd_u_h(src7, src7);
+ reg0 = __msa_hadd_u_w(vec0, vec0);
+ reg1 = __msa_hadd_u_w(vec1, vec1);
+ reg2 = __msa_hadd_u_w(vec2, vec2);
+ reg3 = __msa_hadd_u_w(vec3, vec3);
+ reg0 = (v4u32)__msa_srari_w((v4i32)reg0, 4);
+ reg1 = (v4u32)__msa_srari_w((v4i32)reg1, 4);
+ reg2 = (v4u32)__msa_srari_w((v4i32)reg2, 4);
+ reg3 = (v4u32)__msa_srari_w((v4i32)reg3, 4);
+ vec0 = (v8u16)__msa_pckev_h((v8i16)reg1, (v8i16)reg0);
+ vec1 = (v8u16)__msa_pckev_h((v8i16)reg3, (v8i16)reg2);
+ dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0);
+ ST_UB(dst0, dst);
+ s += 64;
+ t0 += 64;
+ t1 += 64;
+ t2 += 64;
+ dst += 16;
+ }
+}
+
+void ScaleRowDown38_MSA(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width) {
+ int x, width;
+ uint64_t dst0;
+ uint32_t dst1;
+ v16u8 src0, src1, vec0;
+ v16i8 mask = {0, 3, 6, 8, 11, 14, 16, 19, 22, 24, 27, 30, 0, 0, 0, 0};
+ (void)src_stride;
+
+ assert(dst_width % 3 == 0);
+ width = dst_width / 3;
+
+ for (x = 0; x < width; x += 4) {
+ src0 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 0);
+ src1 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 16);
+ vec0 = (v16u8)__msa_vshf_b(mask, (v16i8)src1, (v16i8)src0);
+ dst0 = __msa_copy_u_d((v2i64)vec0, 0);
+ dst1 = __msa_copy_u_w((v4i32)vec0, 2);
+ SD(dst0, dst);
+ SW(dst1, dst + 8);
+ src_ptr += 32;
+ dst += 12;
+ }
+}
+
+void ScaleRowDown38_2_Box_MSA(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width) {
+ int x, width;
+ const uint8_t* s = src_ptr;
+ const uint8_t* t = src_ptr + src_stride;
+ uint64_t dst0;
+ uint32_t dst1;
+ v16u8 src0, src1, src2, src3, out;
+ v8u16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+ v4u32 tmp0, tmp1, tmp2, tmp3, tmp4;
+ v8i16 zero = {0};
+ v8i16 mask = {0, 1, 2, 8, 3, 4, 5, 9};
+ v16i8 dst_mask = {0, 2, 16, 4, 6, 18, 8, 10, 20, 12, 14, 22, 0, 0, 0, 0};
+ v4u32 const_0x2AAA = (v4u32)__msa_fill_w(0x2AAA);
+ v4u32 const_0x4000 = (v4u32)__msa_fill_w(0x4000);
+
+ assert((dst_width % 3 == 0) && (dst_width > 0));
+ width = dst_width / 3;
+
+ for (x = 0; x < width; x += 4) {
+ src0 = (v16u8)__msa_ld_b((v16i8*)s, 0);
+ src1 = (v16u8)__msa_ld_b((v16i8*)s, 16);
+ src2 = (v16u8)__msa_ld_b((v16i8*)t, 0);
+ src3 = (v16u8)__msa_ld_b((v16i8*)t, 16);
+ vec0 = (v8u16)__msa_ilvr_b((v16i8)src2, (v16i8)src0);
+ vec1 = (v8u16)__msa_ilvl_b((v16i8)src2, (v16i8)src0);
+ vec2 = (v8u16)__msa_ilvr_b((v16i8)src3, (v16i8)src1);
+ vec3 = (v8u16)__msa_ilvl_b((v16i8)src3, (v16i8)src1);
+ vec0 = __msa_hadd_u_h((v16u8)vec0, (v16u8)vec0);
+ vec1 = __msa_hadd_u_h((v16u8)vec1, (v16u8)vec1);
+ vec2 = __msa_hadd_u_h((v16u8)vec2, (v16u8)vec2);
+ vec3 = __msa_hadd_u_h((v16u8)vec3, (v16u8)vec3);
+ vec4 = (v8u16)__msa_vshf_h(mask, zero, (v8i16)vec0);
+ vec5 = (v8u16)__msa_vshf_h(mask, zero, (v8i16)vec1);
+ vec6 = (v8u16)__msa_vshf_h(mask, zero, (v8i16)vec2);
+ vec7 = (v8u16)__msa_vshf_h(mask, zero, (v8i16)vec3);
+ vec0 = (v8u16)__msa_pckod_w((v4i32)vec1, (v4i32)vec0);
+ vec1 = (v8u16)__msa_pckod_w((v4i32)vec3, (v4i32)vec2);
+ vec0 = (v8u16)__msa_pckod_w((v4i32)vec1, (v4i32)vec0);
+ tmp0 = __msa_hadd_u_w(vec4, vec4);
+ tmp1 = __msa_hadd_u_w(vec5, vec5);
+ tmp2 = __msa_hadd_u_w(vec6, vec6);
+ tmp3 = __msa_hadd_u_w(vec7, vec7);
+ tmp4 = __msa_hadd_u_w(vec0, vec0);
+ vec0 = (v8u16)__msa_pckev_h((v8i16)tmp1, (v8i16)tmp0);
+ vec1 = (v8u16)__msa_pckev_h((v8i16)tmp3, (v8i16)tmp2);
+ tmp0 = __msa_hadd_u_w(vec0, vec0);
+ tmp1 = __msa_hadd_u_w(vec1, vec1);
+ tmp0 *= const_0x2AAA;
+ tmp1 *= const_0x2AAA;
+ tmp4 *= const_0x4000;
+ tmp0 = (v4u32)__msa_srai_w((v4i32)tmp0, 16);
+ tmp1 = (v4u32)__msa_srai_w((v4i32)tmp1, 16);
+ tmp4 = (v4u32)__msa_srai_w((v4i32)tmp4, 16);
+ vec0 = (v8u16)__msa_pckev_h((v8i16)tmp1, (v8i16)tmp0);
+ vec1 = (v8u16)__msa_pckev_h((v8i16)tmp4, (v8i16)tmp4);
+ out = (v16u8)__msa_vshf_b(dst_mask, (v16i8)vec1, (v16i8)vec0);
+ dst0 = __msa_copy_u_d((v2i64)out, 0);
+ dst1 = __msa_copy_u_w((v4i32)out, 2);
+ SD(dst0, dst_ptr);
+ SW(dst1, dst_ptr + 8);
+ s += 32;
+ t += 32;
+ dst_ptr += 12;
+ }
+}
+
+void ScaleRowDown38_3_Box_MSA(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width) {
+ int x, width;
+ const uint8_t* s = src_ptr;
+ const uint8_t* t0 = s + src_stride;
+ const uint8_t* t1 = s + src_stride * 2;
+ uint64_t dst0;
+ uint32_t dst1;
+ v16u8 src0, src1, src2, src3, src4, src5, out;
+ v8u16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+ v4u32 tmp0, tmp1, tmp2, tmp3, tmp4;
+ v8u16 zero = {0};
+ v8i16 mask = {0, 1, 2, 8, 3, 4, 5, 9};
+ v16i8 dst_mask = {0, 2, 16, 4, 6, 18, 8, 10, 20, 12, 14, 22, 0, 0, 0, 0};
+ v4u32 const_0x1C71 = (v4u32)__msa_fill_w(0x1C71);
+ v4u32 const_0x2AAA = (v4u32)__msa_fill_w(0x2AAA);
+
+ assert((dst_width % 3 == 0) && (dst_width > 0));
+ width = dst_width / 3;
+
+ for (x = 0; x < width; x += 4) {
+ src0 = (v16u8)__msa_ld_b((v16i8*)s, 0);
+ src1 = (v16u8)__msa_ld_b((v16i8*)s, 16);
+ src2 = (v16u8)__msa_ld_b((v16i8*)t0, 0);
+ src3 = (v16u8)__msa_ld_b((v16i8*)t0, 16);
+ src4 = (v16u8)__msa_ld_b((v16i8*)t1, 0);
+ src5 = (v16u8)__msa_ld_b((v16i8*)t1, 16);
+ vec0 = (v8u16)__msa_ilvr_b((v16i8)src2, (v16i8)src0);
+ vec1 = (v8u16)__msa_ilvl_b((v16i8)src2, (v16i8)src0);
+ vec2 = (v8u16)__msa_ilvr_b((v16i8)src3, (v16i8)src1);
+ vec3 = (v8u16)__msa_ilvl_b((v16i8)src3, (v16i8)src1);
+ vec4 = (v8u16)__msa_ilvr_b((v16i8)zero, (v16i8)src4);
+ vec5 = (v8u16)__msa_ilvl_b((v16i8)zero, (v16i8)src4);
+ vec6 = (v8u16)__msa_ilvr_b((v16i8)zero, (v16i8)src5);
+ vec7 = (v8u16)__msa_ilvl_b((v16i8)zero, (v16i8)src5);
+ vec0 = __msa_hadd_u_h((v16u8)vec0, (v16u8)vec0);
+ vec1 = __msa_hadd_u_h((v16u8)vec1, (v16u8)vec1);
+ vec2 = __msa_hadd_u_h((v16u8)vec2, (v16u8)vec2);
+ vec3 = __msa_hadd_u_h((v16u8)vec3, (v16u8)vec3);
+ vec0 += __msa_hadd_u_h((v16u8)vec4, (v16u8)vec4);
+ vec1 += __msa_hadd_u_h((v16u8)vec5, (v16u8)vec5);
+ vec2 += __msa_hadd_u_h((v16u8)vec6, (v16u8)vec6);
+ vec3 += __msa_hadd_u_h((v16u8)vec7, (v16u8)vec7);
+ vec4 = (v8u16)__msa_vshf_h(mask, (v8i16)zero, (v8i16)vec0);
+ vec5 = (v8u16)__msa_vshf_h(mask, (v8i16)zero, (v8i16)vec1);
+ vec6 = (v8u16)__msa_vshf_h(mask, (v8i16)zero, (v8i16)vec2);
+ vec7 = (v8u16)__msa_vshf_h(mask, (v8i16)zero, (v8i16)vec3);
+ vec0 = (v8u16)__msa_pckod_w((v4i32)vec1, (v4i32)vec0);
+ vec1 = (v8u16)__msa_pckod_w((v4i32)vec3, (v4i32)vec2);
+ vec0 = (v8u16)__msa_pckod_w((v4i32)vec1, (v4i32)vec0);
+ tmp0 = __msa_hadd_u_w(vec4, vec4);
+ tmp1 = __msa_hadd_u_w(vec5, vec5);
+ tmp2 = __msa_hadd_u_w(vec6, vec6);
+ tmp3 = __msa_hadd_u_w(vec7, vec7);
+ tmp4 = __msa_hadd_u_w(vec0, vec0);
+ vec0 = (v8u16)__msa_pckev_h((v8i16)tmp1, (v8i16)tmp0);
+ vec1 = (v8u16)__msa_pckev_h((v8i16)tmp3, (v8i16)tmp2);
+ tmp0 = __msa_hadd_u_w(vec0, vec0);
+ tmp1 = __msa_hadd_u_w(vec1, vec1);
+ tmp0 *= const_0x1C71;
+ tmp1 *= const_0x1C71;
+ tmp4 *= const_0x2AAA;
+ tmp0 = (v4u32)__msa_srai_w((v4i32)tmp0, 16);
+ tmp1 = (v4u32)__msa_srai_w((v4i32)tmp1, 16);
+ tmp4 = (v4u32)__msa_srai_w((v4i32)tmp4, 16);
+ vec0 = (v8u16)__msa_pckev_h((v8i16)tmp1, (v8i16)tmp0);
+ vec1 = (v8u16)__msa_pckev_h((v8i16)tmp4, (v8i16)tmp4);
+ out = (v16u8)__msa_vshf_b(dst_mask, (v16i8)vec1, (v16i8)vec0);
+ dst0 = __msa_copy_u_d((v2i64)out, 0);
+ dst1 = __msa_copy_u_w((v4i32)out, 2);
+ SD(dst0, dst_ptr);
+ SW(dst1, dst_ptr + 8);
+ s += 32;
+ t0 += 32;
+ t1 += 32;
+ dst_ptr += 12;
+ }
+}
+
+void ScaleAddRow_MSA(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width) {
+ int x;
+ v16u8 src0;
+ v8u16 dst0, dst1;
+ v16i8 zero = {0};
+
+ assert(src_width > 0);
+
+ for (x = 0; x < src_width; x += 16) {
+ src0 = LD_UB(src_ptr);
+ dst0 = (v8u16)__msa_ld_h((v8i16*)dst_ptr, 0);
+ dst1 = (v8u16)__msa_ld_h((v8i16*)dst_ptr, 16);
+ dst0 += (v8u16)__msa_ilvr_b(zero, (v16i8)src0);
+ dst1 += (v8u16)__msa_ilvl_b(zero, (v16i8)src0);
+ ST_UH2(dst0, dst1, dst_ptr, 8);
+ src_ptr += 16;
+ dst_ptr += 16;
+ }
+}
+
+void ScaleFilterCols_MSA(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
+ int dst_width,
+ int x,
+ int dx) {
+ int j;
+ v4i32 vec_x = __msa_fill_w(x);
+ v4i32 vec_dx = __msa_fill_w(dx);
+ v4i32 vec_const = {0, 1, 2, 3};
+ v4i32 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8, vec9;
+ v4i32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+ v8u16 reg0, reg1;
+ v16u8 dst0;
+ v4i32 const_0xFFFF = __msa_fill_w(0xFFFF);
+ v4i32 const_0x40 = __msa_fill_w(0x40);
+
+ vec0 = vec_dx * vec_const;
+ vec1 = vec_dx * 4;
+ vec_x += vec0;
+
+ for (j = 0; j < dst_width - 1; j += 16) {
+ vec2 = vec_x >> 16;
+ vec6 = vec_x & const_0xFFFF;
+ vec_x += vec1;
+ vec3 = vec_x >> 16;
+ vec7 = vec_x & const_0xFFFF;
+ vec_x += vec1;
+ vec4 = vec_x >> 16;
+ vec8 = vec_x & const_0xFFFF;
+ vec_x += vec1;
+ vec5 = vec_x >> 16;
+ vec9 = vec_x & const_0xFFFF;
+ vec_x += vec1;
+ vec6 >>= 9;
+ vec7 >>= 9;
+ vec8 >>= 9;
+ vec9 >>= 9;
+ LOAD_INDEXED_DATA(src_ptr, vec2, tmp0);
+ LOAD_INDEXED_DATA(src_ptr, vec3, tmp1);
+ LOAD_INDEXED_DATA(src_ptr, vec4, tmp2);
+ LOAD_INDEXED_DATA(src_ptr, vec5, tmp3);
+ vec2 += 1;
+ vec3 += 1;
+ vec4 += 1;
+ vec5 += 1;
+ LOAD_INDEXED_DATA(src_ptr, vec2, tmp4);
+ LOAD_INDEXED_DATA(src_ptr, vec3, tmp5);
+ LOAD_INDEXED_DATA(src_ptr, vec4, tmp6);
+ LOAD_INDEXED_DATA(src_ptr, vec5, tmp7);
+ tmp4 -= tmp0;
+ tmp5 -= tmp1;
+ tmp6 -= tmp2;
+ tmp7 -= tmp3;
+ tmp4 *= vec6;
+ tmp5 *= vec7;
+ tmp6 *= vec8;
+ tmp7 *= vec9;
+ tmp4 += const_0x40;
+ tmp5 += const_0x40;
+ tmp6 += const_0x40;
+ tmp7 += const_0x40;
+ tmp4 >>= 7;
+ tmp5 >>= 7;
+ tmp6 >>= 7;
+ tmp7 >>= 7;
+ tmp0 += tmp4;
+ tmp1 += tmp5;
+ tmp2 += tmp6;
+ tmp3 += tmp7;
+ reg0 = (v8u16)__msa_pckev_h((v8i16)tmp1, (v8i16)tmp0);
+ reg1 = (v8u16)__msa_pckev_h((v8i16)tmp3, (v8i16)tmp2);
+ dst0 = (v16u8)__msa_pckev_b((v16i8)reg1, (v16i8)reg0);
+ __msa_st_b(dst0, dst_ptr, 0);
+ dst_ptr += 16;
+ }
+}
+
+void ScaleARGBCols_MSA(uint8_t* dst_argb,
+ const uint8_t* src_argb,
+ int dst_width,
+ int x,
+ int dx) {
+ const uint32_t* src = (const uint32_t*)(src_argb);
+ uint32_t* dst = (uint32_t*)(dst_argb);
+ int j;
+ v4i32 x_vec = __msa_fill_w(x);
+ v4i32 dx_vec = __msa_fill_w(dx);
+ v4i32 const_vec = {0, 1, 2, 3};
+ v4i32 vec0, vec1, vec2;
+ v4i32 dst0;
+
+ vec0 = dx_vec * const_vec;
+ vec1 = dx_vec * 4;
+ x_vec += vec0;
+
+ for (j = 0; j < dst_width; j += 4) {
+ vec2 = x_vec >> 16;
+ x_vec += vec1;
+ LOAD_INDEXED_DATA(src, vec2, dst0);
+ __msa_st_w(dst0, dst, 0);
+ dst += 4;
+ }
+}
+
+void ScaleARGBFilterCols_MSA(uint8_t* dst_argb,
+ const uint8_t* src_argb,
+ int dst_width,
+ int x,
+ int dx) {
+ const uint32_t* src = (const uint32_t*)(src_argb);
+ int j;
+ v4u32 src0, src1, src2, src3;
+ v4u32 vec0, vec1, vec2, vec3;
+ v16u8 reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7;
+ v16u8 mult0, mult1, mult2, mult3;
+ v8u16 tmp0, tmp1, tmp2, tmp3;
+ v16u8 dst0, dst1;
+ v4u32 vec_x = (v4u32)__msa_fill_w(x);
+ v4u32 vec_dx = (v4u32)__msa_fill_w(dx);
+ v4u32 vec_const = {0, 1, 2, 3};
+ v16u8 const_0x7f = (v16u8)__msa_fill_b(0x7f);
+
+ vec0 = vec_dx * vec_const;
+ vec1 = vec_dx * 4;
+ vec_x += vec0;
+
+ for (j = 0; j < dst_width - 1; j += 8) {
+ vec2 = vec_x >> 16;
+ reg0 = (v16u8)(vec_x >> 9);
+ vec_x += vec1;
+ vec3 = vec_x >> 16;
+ reg1 = (v16u8)(vec_x >> 9);
+ vec_x += vec1;
+ reg0 = reg0 & const_0x7f;
+ reg1 = reg1 & const_0x7f;
+ reg0 = (v16u8)__msa_shf_b((v16i8)reg0, 0);
+ reg1 = (v16u8)__msa_shf_b((v16i8)reg1, 0);
+ reg2 = reg0 ^ const_0x7f;
+ reg3 = reg1 ^ const_0x7f;
+ mult0 = (v16u8)__msa_ilvr_b((v16i8)reg0, (v16i8)reg2);
+ mult1 = (v16u8)__msa_ilvl_b((v16i8)reg0, (v16i8)reg2);
+ mult2 = (v16u8)__msa_ilvr_b((v16i8)reg1, (v16i8)reg3);
+ mult3 = (v16u8)__msa_ilvl_b((v16i8)reg1, (v16i8)reg3);
+ LOAD_INDEXED_DATA(src, vec2, src0);
+ LOAD_INDEXED_DATA(src, vec3, src1);
+ vec2 += 1;
+ vec3 += 1;
+ LOAD_INDEXED_DATA(src, vec2, src2);
+ LOAD_INDEXED_DATA(src, vec3, src3);
+ reg4 = (v16u8)__msa_ilvr_b((v16i8)src2, (v16i8)src0);
+ reg5 = (v16u8)__msa_ilvl_b((v16i8)src2, (v16i8)src0);
+ reg6 = (v16u8)__msa_ilvr_b((v16i8)src3, (v16i8)src1);
+ reg7 = (v16u8)__msa_ilvl_b((v16i8)src3, (v16i8)src1);
+ tmp0 = __msa_dotp_u_h(reg4, mult0);
+ tmp1 = __msa_dotp_u_h(reg5, mult1);
+ tmp2 = __msa_dotp_u_h(reg6, mult2);
+ tmp3 = __msa_dotp_u_h(reg7, mult3);
+ tmp0 >>= 7;
+ tmp1 >>= 7;
+ tmp2 >>= 7;
+ tmp3 >>= 7;
+ dst0 = (v16u8)__msa_pckev_b((v16i8)tmp1, (v16i8)tmp0);
+ dst1 = (v16u8)__msa_pckev_b((v16i8)tmp3, (v16i8)tmp2);
+ __msa_st_b(dst0, dst_argb, 0);
+ __msa_st_b(dst1, dst_argb, 16);
+ dst_argb += 32;
+ }
+}
+
+void ScaleRowDown34_MSA(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width) {
+ int x;
+ (void)src_stride;
+ v16u8 src0, src1, src2, src3;
+ v16u8 vec0, vec1, vec2;
+ v16i8 mask0 = {0, 1, 3, 4, 5, 7, 8, 9, 11, 12, 13, 15, 16, 17, 19, 20};
+ v16i8 mask1 = {5, 7, 8, 9, 11, 12, 13, 15, 16, 17, 19, 20, 21, 23, 24, 25};
+ v16i8 mask2 = {11, 12, 13, 15, 16, 17, 19, 20,
+ 21, 23, 24, 25, 27, 28, 29, 31};
+
+ assert((dst_width % 3 == 0) && (dst_width > 0));
+
+ for (x = 0; x < dst_width; x += 48) {
+ src0 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 0);
+ src1 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 16);
+ src2 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 32);
+ src3 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 48);
+ vec0 = (v16u8)__msa_vshf_b(mask0, (v16i8)src1, (v16i8)src0);
+ vec1 = (v16u8)__msa_vshf_b(mask1, (v16i8)src2, (v16i8)src1);
+ vec2 = (v16u8)__msa_vshf_b(mask2, (v16i8)src3, (v16i8)src2);
+ __msa_st_b((v16i8)vec0, dst, 0);
+ __msa_st_b((v16i8)vec1, dst, 16);
+ __msa_st_b((v16i8)vec2, dst, 32);
+ src_ptr += 64;
+ dst += 48;
+ }
+}
+
+void ScaleRowDown34_0_Box_MSA(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* d,
+ int dst_width) {
+ const uint8_t* s = src_ptr;
+ const uint8_t* t = src_ptr + src_stride;
+ int x;
+ v16u8 src0, src1, src2, src3, src4, src5, src6, src7, dst0, dst1, dst2;
+ v16u8 vec0, vec1, vec2, vec3, vec4, vec5;
+ v16u8 vec6, vec7, vec8, vec9, vec10, vec11;
+ v8i16 reg0, reg1, reg2, reg3, reg4, reg5;
+ v8i16 reg6, reg7, reg8, reg9, reg10, reg11;
+ v16u8 const0 = {3, 1, 1, 1, 1, 3, 3, 1, 1, 1, 1, 3, 3, 1, 1, 1};
+ v16u8 const1 = {1, 3, 3, 1, 1, 1, 1, 3, 3, 1, 1, 1, 1, 3, 3, 1};
+ v16u8 const2 = {1, 1, 1, 3, 3, 1, 1, 1, 1, 3, 3, 1, 1, 1, 1, 3};
+ v16i8 mask0 = {0, 1, 1, 2, 2, 3, 4, 5, 5, 6, 6, 7, 8, 9, 9, 10};
+ v16i8 mask1 = {10, 11, 12, 13, 13, 14, 14, 15,
+ 16, 17, 17, 18, 18, 19, 20, 21};
+ v16i8 mask2 = {5, 6, 6, 7, 8, 9, 9, 10, 10, 11, 12, 13, 13, 14, 14, 15};
+ v8i16 shft0 = {2, 1, 2, 2, 1, 2, 2, 1};
+ v8i16 shft1 = {2, 2, 1, 2, 2, 1, 2, 2};
+ v8i16 shft2 = {1, 2, 2, 1, 2, 2, 1, 2};
+
+ assert((dst_width % 3 == 0) && (dst_width > 0));
+
+ for (x = 0; x < dst_width; x += 48) {
+ src0 = (v16u8)__msa_ld_b((v16i8*)s, 0);
+ src1 = (v16u8)__msa_ld_b((v16i8*)s, 16);
+ src2 = (v16u8)__msa_ld_b((v16i8*)s, 32);
+ src3 = (v16u8)__msa_ld_b((v16i8*)s, 48);
+ src4 = (v16u8)__msa_ld_b((v16i8*)t, 0);
+ src5 = (v16u8)__msa_ld_b((v16i8*)t, 16);
+ src6 = (v16u8)__msa_ld_b((v16i8*)t, 32);
+ src7 = (v16u8)__msa_ld_b((v16i8*)t, 48);
+ vec0 = (v16u8)__msa_vshf_b(mask0, (v16i8)src0, (v16i8)src0);
+ vec1 = (v16u8)__msa_vshf_b(mask1, (v16i8)src1, (v16i8)src0);
+ vec2 = (v16u8)__msa_vshf_b(mask2, (v16i8)src1, (v16i8)src1);
+ vec3 = (v16u8)__msa_vshf_b(mask0, (v16i8)src2, (v16i8)src2);
+ vec4 = (v16u8)__msa_vshf_b(mask1, (v16i8)src3, (v16i8)src2);
+ vec5 = (v16u8)__msa_vshf_b(mask2, (v16i8)src3, (v16i8)src3);
+ vec6 = (v16u8)__msa_vshf_b(mask0, (v16i8)src4, (v16i8)src4);
+ vec7 = (v16u8)__msa_vshf_b(mask1, (v16i8)src5, (v16i8)src4);
+ vec8 = (v16u8)__msa_vshf_b(mask2, (v16i8)src5, (v16i8)src5);
+ vec9 = (v16u8)__msa_vshf_b(mask0, (v16i8)src6, (v16i8)src6);
+ vec10 = (v16u8)__msa_vshf_b(mask1, (v16i8)src7, (v16i8)src6);
+ vec11 = (v16u8)__msa_vshf_b(mask2, (v16i8)src7, (v16i8)src7);
+ reg0 = (v8i16)__msa_dotp_u_h(vec0, const0);
+ reg1 = (v8i16)__msa_dotp_u_h(vec1, const1);
+ reg2 = (v8i16)__msa_dotp_u_h(vec2, const2);
+ reg3 = (v8i16)__msa_dotp_u_h(vec3, const0);
+ reg4 = (v8i16)__msa_dotp_u_h(vec4, const1);
+ reg5 = (v8i16)__msa_dotp_u_h(vec5, const2);
+ reg6 = (v8i16)__msa_dotp_u_h(vec6, const0);
+ reg7 = (v8i16)__msa_dotp_u_h(vec7, const1);
+ reg8 = (v8i16)__msa_dotp_u_h(vec8, const2);
+ reg9 = (v8i16)__msa_dotp_u_h(vec9, const0);
+ reg10 = (v8i16)__msa_dotp_u_h(vec10, const1);
+ reg11 = (v8i16)__msa_dotp_u_h(vec11, const2);
+ reg0 = __msa_srar_h(reg0, shft0);
+ reg1 = __msa_srar_h(reg1, shft1);
+ reg2 = __msa_srar_h(reg2, shft2);
+ reg3 = __msa_srar_h(reg3, shft0);
+ reg4 = __msa_srar_h(reg4, shft1);
+ reg5 = __msa_srar_h(reg5, shft2);
+ reg6 = __msa_srar_h(reg6, shft0);
+ reg7 = __msa_srar_h(reg7, shft1);
+ reg8 = __msa_srar_h(reg8, shft2);
+ reg9 = __msa_srar_h(reg9, shft0);
+ reg10 = __msa_srar_h(reg10, shft1);
+ reg11 = __msa_srar_h(reg11, shft2);
+ reg0 = reg0 * 3 + reg6;
+ reg1 = reg1 * 3 + reg7;
+ reg2 = reg2 * 3 + reg8;
+ reg3 = reg3 * 3 + reg9;
+ reg4 = reg4 * 3 + reg10;
+ reg5 = reg5 * 3 + reg11;
+ reg0 = __msa_srari_h(reg0, 2);
+ reg1 = __msa_srari_h(reg1, 2);
+ reg2 = __msa_srari_h(reg2, 2);
+ reg3 = __msa_srari_h(reg3, 2);
+ reg4 = __msa_srari_h(reg4, 2);
+ reg5 = __msa_srari_h(reg5, 2);
+ dst0 = (v16u8)__msa_pckev_b((v16i8)reg1, (v16i8)reg0);
+ dst1 = (v16u8)__msa_pckev_b((v16i8)reg3, (v16i8)reg2);
+ dst2 = (v16u8)__msa_pckev_b((v16i8)reg5, (v16i8)reg4);
+ __msa_st_b((v16i8)dst0, d, 0);
+ __msa_st_b((v16i8)dst1, d, 16);
+ __msa_st_b((v16i8)dst2, d, 32);
+ s += 64;
+ t += 64;
+ d += 48;
+ }
+}
+
+void ScaleRowDown34_1_Box_MSA(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* d,
+ int dst_width) {
+ const uint8_t* s = src_ptr;
+ const uint8_t* t = src_ptr + src_stride;
+ int x;
+ v16u8 src0, src1, src2, src3, src4, src5, src6, src7, dst0, dst1, dst2;
+ v16u8 vec0, vec1, vec2, vec3, vec4, vec5;
+ v16u8 vec6, vec7, vec8, vec9, vec10, vec11;
+ v8i16 reg0, reg1, reg2, reg3, reg4, reg5;
+ v8i16 reg6, reg7, reg8, reg9, reg10, reg11;
+ v16u8 const0 = {3, 1, 1, 1, 1, 3, 3, 1, 1, 1, 1, 3, 3, 1, 1, 1};
+ v16u8 const1 = {1, 3, 3, 1, 1, 1, 1, 3, 3, 1, 1, 1, 1, 3, 3, 1};
+ v16u8 const2 = {1, 1, 1, 3, 3, 1, 1, 1, 1, 3, 3, 1, 1, 1, 1, 3};
+ v16i8 mask0 = {0, 1, 1, 2, 2, 3, 4, 5, 5, 6, 6, 7, 8, 9, 9, 10};
+ v16i8 mask1 = {10, 11, 12, 13, 13, 14, 14, 15,
+ 16, 17, 17, 18, 18, 19, 20, 21};
+ v16i8 mask2 = {5, 6, 6, 7, 8, 9, 9, 10, 10, 11, 12, 13, 13, 14, 14, 15};
+ v8i16 shft0 = {2, 1, 2, 2, 1, 2, 2, 1};
+ v8i16 shft1 = {2, 2, 1, 2, 2, 1, 2, 2};
+ v8i16 shft2 = {1, 2, 2, 1, 2, 2, 1, 2};
+
+ assert((dst_width % 3 == 0) && (dst_width > 0));
+
+ for (x = 0; x < dst_width; x += 48) {
+ src0 = (v16u8)__msa_ld_b((v16i8*)s, 0);
+ src1 = (v16u8)__msa_ld_b((v16i8*)s, 16);
+ src2 = (v16u8)__msa_ld_b((v16i8*)s, 32);
+ src3 = (v16u8)__msa_ld_b((v16i8*)s, 48);
+ src4 = (v16u8)__msa_ld_b((v16i8*)t, 0);
+ src5 = (v16u8)__msa_ld_b((v16i8*)t, 16);
+ src6 = (v16u8)__msa_ld_b((v16i8*)t, 32);
+ src7 = (v16u8)__msa_ld_b((v16i8*)t, 48);
+ vec0 = (v16u8)__msa_vshf_b(mask0, (v16i8)src0, (v16i8)src0);
+ vec1 = (v16u8)__msa_vshf_b(mask1, (v16i8)src1, (v16i8)src0);
+ vec2 = (v16u8)__msa_vshf_b(mask2, (v16i8)src1, (v16i8)src1);
+ vec3 = (v16u8)__msa_vshf_b(mask0, (v16i8)src2, (v16i8)src2);
+ vec4 = (v16u8)__msa_vshf_b(mask1, (v16i8)src3, (v16i8)src2);
+ vec5 = (v16u8)__msa_vshf_b(mask2, (v16i8)src3, (v16i8)src3);
+ vec6 = (v16u8)__msa_vshf_b(mask0, (v16i8)src4, (v16i8)src4);
+ vec7 = (v16u8)__msa_vshf_b(mask1, (v16i8)src5, (v16i8)src4);
+ vec8 = (v16u8)__msa_vshf_b(mask2, (v16i8)src5, (v16i8)src5);
+ vec9 = (v16u8)__msa_vshf_b(mask0, (v16i8)src6, (v16i8)src6);
+ vec10 = (v16u8)__msa_vshf_b(mask1, (v16i8)src7, (v16i8)src6);
+ vec11 = (v16u8)__msa_vshf_b(mask2, (v16i8)src7, (v16i8)src7);
+ reg0 = (v8i16)__msa_dotp_u_h(vec0, const0);
+ reg1 = (v8i16)__msa_dotp_u_h(vec1, const1);
+ reg2 = (v8i16)__msa_dotp_u_h(vec2, const2);
+ reg3 = (v8i16)__msa_dotp_u_h(vec3, const0);
+ reg4 = (v8i16)__msa_dotp_u_h(vec4, const1);
+ reg5 = (v8i16)__msa_dotp_u_h(vec5, const2);
+ reg6 = (v8i16)__msa_dotp_u_h(vec6, const0);
+ reg7 = (v8i16)__msa_dotp_u_h(vec7, const1);
+ reg8 = (v8i16)__msa_dotp_u_h(vec8, const2);
+ reg9 = (v8i16)__msa_dotp_u_h(vec9, const0);
+ reg10 = (v8i16)__msa_dotp_u_h(vec10, const1);
+ reg11 = (v8i16)__msa_dotp_u_h(vec11, const2);
+ reg0 = __msa_srar_h(reg0, shft0);
+ reg1 = __msa_srar_h(reg1, shft1);
+ reg2 = __msa_srar_h(reg2, shft2);
+ reg3 = __msa_srar_h(reg3, shft0);
+ reg4 = __msa_srar_h(reg4, shft1);
+ reg5 = __msa_srar_h(reg5, shft2);
+ reg6 = __msa_srar_h(reg6, shft0);
+ reg7 = __msa_srar_h(reg7, shft1);
+ reg8 = __msa_srar_h(reg8, shft2);
+ reg9 = __msa_srar_h(reg9, shft0);
+ reg10 = __msa_srar_h(reg10, shft1);
+ reg11 = __msa_srar_h(reg11, shft2);
+ reg0 += reg6;
+ reg1 += reg7;
+ reg2 += reg8;
+ reg3 += reg9;
+ reg4 += reg10;
+ reg5 += reg11;
+ reg0 = __msa_srari_h(reg0, 1);
+ reg1 = __msa_srari_h(reg1, 1);
+ reg2 = __msa_srari_h(reg2, 1);
+ reg3 = __msa_srari_h(reg3, 1);
+ reg4 = __msa_srari_h(reg4, 1);
+ reg5 = __msa_srari_h(reg5, 1);
+ dst0 = (v16u8)__msa_pckev_b((v16i8)reg1, (v16i8)reg0);
+ dst1 = (v16u8)__msa_pckev_b((v16i8)reg3, (v16i8)reg2);
+ dst2 = (v16u8)__msa_pckev_b((v16i8)reg5, (v16i8)reg4);
+ __msa_st_b((v16i8)dst0, d, 0);
+ __msa_st_b((v16i8)dst1, d, 16);
+ __msa_st_b((v16i8)dst2, d, 32);
+ s += 64;
+ t += 64;
+ d += 48;
+ }
+}
+
+#ifdef __cplusplus
+} // extern "C"
+} // namespace libyuv
+#endif
+
+#endif // !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
diff --git a/media/libvpx/libvpx/third_party/libyuv/source/scale_neon.cc b/media/libvpx/libvpx/third_party/libyuv/source/scale_neon.cc
new file mode 100644
index 0000000000..459a2995df
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/libyuv/source/scale_neon.cc
@@ -0,0 +1,970 @@
+/*
+ * Copyright 2011 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/row.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+// This module is for GCC Neon.
+#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__) && \
+ !defined(__aarch64__)
+
+// NEON downscalers with interpolation.
+// Provided by Fritz Koenig
+
+// Read 32x1 throw away even pixels, and write 16x1.
+void ScaleRowDown2_NEON(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width) {
+ (void)src_stride;
+ asm volatile(
+ "1: \n"
+ // load even pixels into q0, odd into q1
+ "vld2.8 {q0, q1}, [%0]! \n"
+ "subs %2, %2, #16 \n" // 16 processed per loop
+ "vst1.8 {q1}, [%1]! \n" // store odd pixels
+ "bgt 1b \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst), // %1
+ "+r"(dst_width) // %2
+ :
+ : "q0", "q1" // Clobber List
+ );
+}
+
+// Read 32x1 average down and write 16x1.
+void ScaleRowDown2Linear_NEON(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width) {
+ (void)src_stride;
+ asm volatile(
+ "1: \n"
+ "vld2.8 {q0, q1}, [%0]! \n" // load 32 pixels
+ "subs %2, %2, #16 \n" // 16 processed per loop
+ "vrhadd.u8 q0, q0, q1 \n" // rounding half add
+ "vst1.8 {q0}, [%1]! \n"
+ "bgt 1b \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst), // %1
+ "+r"(dst_width) // %2
+ :
+ : "q0", "q1" // Clobber List
+ );
+}
+
+// Read 32x2 average down and write 16x1.
+void ScaleRowDown2Box_NEON(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width) {
+ asm volatile(
+ // change the stride to row 2 pointer
+ "add %1, %0 \n"
+ "1: \n"
+ "vld1.8 {q0, q1}, [%0]! \n" // load row 1 and post inc
+ "vld1.8 {q2, q3}, [%1]! \n" // load row 2 and post inc
+ "subs %3, %3, #16 \n" // 16 processed per loop
+ "vpaddl.u8 q0, q0 \n" // row 1 add adjacent
+ "vpaddl.u8 q1, q1 \n"
+ "vpadal.u8 q0, q2 \n" // row 2 add adjacent +
+ // row1
+ "vpadal.u8 q1, q3 \n"
+ "vrshrn.u16 d0, q0, #2 \n" // downshift, round and
+ // pack
+ "vrshrn.u16 d1, q1, #2 \n"
+ "vst1.8 {q0}, [%2]! \n"
+ "bgt 1b \n"
+ : "+r"(src_ptr), // %0
+ "+r"(src_stride), // %1
+ "+r"(dst), // %2
+ "+r"(dst_width) // %3
+ :
+ : "q0", "q1", "q2", "q3" // Clobber List
+ );
+}
+
+void ScaleRowDown4_NEON(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width) {
+ (void)src_stride;
+ asm volatile(
+ "1: \n"
+ "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // src line 0
+ "subs %2, %2, #8 \n" // 8 processed per loop
+ "vst1.8 {d2}, [%1]! \n"
+ "bgt 1b \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst_ptr), // %1
+ "+r"(dst_width) // %2
+ :
+ : "q0", "q1", "memory", "cc");
+}
+
+void ScaleRowDown4Box_NEON(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width) {
+ const uint8_t* src_ptr1 = src_ptr + src_stride;
+ const uint8_t* src_ptr2 = src_ptr + src_stride * 2;
+ const uint8_t* src_ptr3 = src_ptr + src_stride * 3;
+ asm volatile(
+ "1: \n"
+ "vld1.8 {q0}, [%0]! \n" // load up 16x4
+ "vld1.8 {q1}, [%3]! \n"
+ "vld1.8 {q2}, [%4]! \n"
+ "vld1.8 {q3}, [%5]! \n"
+ "subs %2, %2, #4 \n"
+ "vpaddl.u8 q0, q0 \n"
+ "vpadal.u8 q0, q1 \n"
+ "vpadal.u8 q0, q2 \n"
+ "vpadal.u8 q0, q3 \n"
+ "vpaddl.u16 q0, q0 \n"
+ "vrshrn.u32 d0, q0, #4 \n" // divide by 16 w/rounding
+ "vmovn.u16 d0, q0 \n"
+ "vst1.32 {d0[0]}, [%1]! \n"
+ "bgt 1b \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst_ptr), // %1
+ "+r"(dst_width), // %2
+ "+r"(src_ptr1), // %3
+ "+r"(src_ptr2), // %4
+ "+r"(src_ptr3) // %5
+ :
+ : "q0", "q1", "q2", "q3", "memory", "cc");
+}
+
+// Down scale from 4 to 3 pixels. Use the neon multilane read/write
+// to load up the every 4th pixel into a 4 different registers.
+// Point samples 32 pixels to 24 pixels.
+void ScaleRowDown34_NEON(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width) {
+ (void)src_stride;
+ asm volatile(
+ "1: \n"
+ "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // src line 0
+ "subs %2, %2, #24 \n"
+ "vmov d2, d3 \n" // order d0, d1, d2
+ "vst3.8 {d0, d1, d2}, [%1]! \n"
+ "bgt 1b \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst_ptr), // %1
+ "+r"(dst_width) // %2
+ :
+ : "d0", "d1", "d2", "d3", "memory", "cc");
+}
+
+void ScaleRowDown34_0_Box_NEON(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width) {
+ asm volatile(
+ "vmov.u8 d24, #3 \n"
+ "add %3, %0 \n"
+ "1: \n"
+ "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // src line 0
+ "vld4.8 {d4, d5, d6, d7}, [%3]! \n" // src line 1
+ "subs %2, %2, #24 \n"
+
+ // filter src line 0 with src line 1
+ // expand chars to shorts to allow for room
+ // when adding lines together
+ "vmovl.u8 q8, d4 \n"
+ "vmovl.u8 q9, d5 \n"
+ "vmovl.u8 q10, d6 \n"
+ "vmovl.u8 q11, d7 \n"
+
+ // 3 * line_0 + line_1
+ "vmlal.u8 q8, d0, d24 \n"
+ "vmlal.u8 q9, d1, d24 \n"
+ "vmlal.u8 q10, d2, d24 \n"
+ "vmlal.u8 q11, d3, d24 \n"
+
+ // (3 * line_0 + line_1) >> 2
+ "vqrshrn.u16 d0, q8, #2 \n"
+ "vqrshrn.u16 d1, q9, #2 \n"
+ "vqrshrn.u16 d2, q10, #2 \n"
+ "vqrshrn.u16 d3, q11, #2 \n"
+
+ // a0 = (src[0] * 3 + s[1] * 1) >> 2
+ "vmovl.u8 q8, d1 \n"
+ "vmlal.u8 q8, d0, d24 \n"
+ "vqrshrn.u16 d0, q8, #2 \n"
+
+ // a1 = (src[1] * 1 + s[2] * 1) >> 1
+ "vrhadd.u8 d1, d1, d2 \n"
+
+ // a2 = (src[2] * 1 + s[3] * 3) >> 2
+ "vmovl.u8 q8, d2 \n"
+ "vmlal.u8 q8, d3, d24 \n"
+ "vqrshrn.u16 d2, q8, #2 \n"
+
+ "vst3.8 {d0, d1, d2}, [%1]! \n"
+
+ "bgt 1b \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst_ptr), // %1
+ "+r"(dst_width), // %2
+ "+r"(src_stride) // %3
+ :
+ : "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11", "d24", "memory",
+ "cc");
+}
+
+void ScaleRowDown34_1_Box_NEON(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width) {
+ asm volatile(
+ "vmov.u8 d24, #3 \n"
+ "add %3, %0 \n"
+ "1: \n"
+ "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // src line 0
+ "vld4.8 {d4, d5, d6, d7}, [%3]! \n" // src line 1
+ "subs %2, %2, #24 \n"
+ // average src line 0 with src line 1
+ "vrhadd.u8 q0, q0, q2 \n"
+ "vrhadd.u8 q1, q1, q3 \n"
+
+ // a0 = (src[0] * 3 + s[1] * 1) >> 2
+ "vmovl.u8 q3, d1 \n"
+ "vmlal.u8 q3, d0, d24 \n"
+ "vqrshrn.u16 d0, q3, #2 \n"
+
+ // a1 = (src[1] * 1 + s[2] * 1) >> 1
+ "vrhadd.u8 d1, d1, d2 \n"
+
+ // a2 = (src[2] * 1 + s[3] * 3) >> 2
+ "vmovl.u8 q3, d2 \n"
+ "vmlal.u8 q3, d3, d24 \n"
+ "vqrshrn.u16 d2, q3, #2 \n"
+
+ "vst3.8 {d0, d1, d2}, [%1]! \n"
+ "bgt 1b \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst_ptr), // %1
+ "+r"(dst_width), // %2
+ "+r"(src_stride) // %3
+ :
+ : "r4", "q0", "q1", "q2", "q3", "d24", "memory", "cc");
+}
+
+#define HAS_SCALEROWDOWN38_NEON
+static const uvec8 kShuf38 = {0, 3, 6, 8, 11, 14, 16, 19,
+ 22, 24, 27, 30, 0, 0, 0, 0};
+static const uvec8 kShuf38_2 = {0, 8, 16, 2, 10, 17, 4, 12,
+ 18, 6, 14, 19, 0, 0, 0, 0};
+static const vec16 kMult38_Div6 = {65536 / 12, 65536 / 12, 65536 / 12,
+ 65536 / 12, 65536 / 12, 65536 / 12,
+ 65536 / 12, 65536 / 12};
+static const vec16 kMult38_Div9 = {65536 / 18, 65536 / 18, 65536 / 18,
+ 65536 / 18, 65536 / 18, 65536 / 18,
+ 65536 / 18, 65536 / 18};
+
+// 32 -> 12
+void ScaleRowDown38_NEON(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width) {
+ (void)src_stride;
+ asm volatile(
+ "vld1.8 {q3}, [%3] \n"
+ "1: \n"
+ "vld1.8 {d0, d1, d2, d3}, [%0]! \n"
+ "subs %2, %2, #12 \n"
+ "vtbl.u8 d4, {d0, d1, d2, d3}, d6 \n"
+ "vtbl.u8 d5, {d0, d1, d2, d3}, d7 \n"
+ "vst1.8 {d4}, [%1]! \n"
+ "vst1.32 {d5[0]}, [%1]! \n"
+ "bgt 1b \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst_ptr), // %1
+ "+r"(dst_width) // %2
+ : "r"(&kShuf38) // %3
+ : "d0", "d1", "d2", "d3", "d4", "d5", "memory", "cc");
+}
+
+// 32x3 -> 12x1
+void OMITFP ScaleRowDown38_3_Box_NEON(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width) {
+ const uint8_t* src_ptr1 = src_ptr + src_stride * 2;
+
+ asm volatile(
+ "vld1.16 {q13}, [%5] \n"
+ "vld1.8 {q14}, [%6] \n"
+ "vld1.8 {q15}, [%7] \n"
+ "add %3, %0 \n"
+ "1: \n"
+
+ // d0 = 00 40 01 41 02 42 03 43
+ // d1 = 10 50 11 51 12 52 13 53
+ // d2 = 20 60 21 61 22 62 23 63
+ // d3 = 30 70 31 71 32 72 33 73
+ "vld4.8 {d0, d1, d2, d3}, [%0]! \n"
+ "vld4.8 {d4, d5, d6, d7}, [%3]! \n"
+ "vld4.8 {d16, d17, d18, d19}, [%4]! \n"
+ "subs %2, %2, #12 \n"
+
+ // Shuffle the input data around to get align the data
+ // so adjacent data can be added. 0,1 - 2,3 - 4,5 - 6,7
+ // d0 = 00 10 01 11 02 12 03 13
+ // d1 = 40 50 41 51 42 52 43 53
+ "vtrn.u8 d0, d1 \n"
+ "vtrn.u8 d4, d5 \n"
+ "vtrn.u8 d16, d17 \n"
+
+ // d2 = 20 30 21 31 22 32 23 33
+ // d3 = 60 70 61 71 62 72 63 73
+ "vtrn.u8 d2, d3 \n"
+ "vtrn.u8 d6, d7 \n"
+ "vtrn.u8 d18, d19 \n"
+
+ // d0 = 00+10 01+11 02+12 03+13
+ // d2 = 40+50 41+51 42+52 43+53
+ "vpaddl.u8 q0, q0 \n"
+ "vpaddl.u8 q2, q2 \n"
+ "vpaddl.u8 q8, q8 \n"
+
+ // d3 = 60+70 61+71 62+72 63+73
+ "vpaddl.u8 d3, d3 \n"
+ "vpaddl.u8 d7, d7 \n"
+ "vpaddl.u8 d19, d19 \n"
+
+ // combine source lines
+ "vadd.u16 q0, q2 \n"
+ "vadd.u16 q0, q8 \n"
+ "vadd.u16 d4, d3, d7 \n"
+ "vadd.u16 d4, d19 \n"
+
+ // dst_ptr[3] = (s[6 + st * 0] + s[7 + st * 0]
+ // + s[6 + st * 1] + s[7 + st * 1]
+ // + s[6 + st * 2] + s[7 + st * 2]) / 6
+ "vqrdmulh.s16 q2, q2, q13 \n"
+ "vmovn.u16 d4, q2 \n"
+
+ // Shuffle 2,3 reg around so that 2 can be added to the
+ // 0,1 reg and 3 can be added to the 4,5 reg. This
+ // requires expanding from u8 to u16 as the 0,1 and 4,5
+ // registers are already expanded. Then do transposes
+ // to get aligned.
+ // q2 = xx 20 xx 30 xx 21 xx 31 xx 22 xx 32 xx 23 xx 33
+ "vmovl.u8 q1, d2 \n"
+ "vmovl.u8 q3, d6 \n"
+ "vmovl.u8 q9, d18 \n"
+
+ // combine source lines
+ "vadd.u16 q1, q3 \n"
+ "vadd.u16 q1, q9 \n"
+
+ // d4 = xx 20 xx 30 xx 22 xx 32
+ // d5 = xx 21 xx 31 xx 23 xx 33
+ "vtrn.u32 d2, d3 \n"
+
+ // d4 = xx 20 xx 21 xx 22 xx 23
+ // d5 = xx 30 xx 31 xx 32 xx 33
+ "vtrn.u16 d2, d3 \n"
+
+ // 0+1+2, 3+4+5
+ "vadd.u16 q0, q1 \n"
+
+ // Need to divide, but can't downshift as the the value
+ // isn't a power of 2. So multiply by 65536 / n
+ // and take the upper 16 bits.
+ "vqrdmulh.s16 q0, q0, q15 \n"
+
+ // Align for table lookup, vtbl requires registers to
+ // be adjacent
+ "vmov.u8 d2, d4 \n"
+
+ "vtbl.u8 d3, {d0, d1, d2}, d28 \n"
+ "vtbl.u8 d4, {d0, d1, d2}, d29 \n"
+
+ "vst1.8 {d3}, [%1]! \n"
+ "vst1.32 {d4[0]}, [%1]! \n"
+ "bgt 1b \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst_ptr), // %1
+ "+r"(dst_width), // %2
+ "+r"(src_stride), // %3
+ "+r"(src_ptr1) // %4
+ : "r"(&kMult38_Div6), // %5
+ "r"(&kShuf38_2), // %6
+ "r"(&kMult38_Div9) // %7
+ : "q0", "q1", "q2", "q3", "q8", "q9", "q13", "q14", "q15", "memory",
+ "cc");
+}
+
+// 32x2 -> 12x1
+void ScaleRowDown38_2_Box_NEON(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width) {
+ asm volatile(
+ "vld1.16 {q13}, [%4] \n"
+ "vld1.8 {q14}, [%5] \n"
+ "add %3, %0 \n"
+ "1: \n"
+
+ // d0 = 00 40 01 41 02 42 03 43
+ // d1 = 10 50 11 51 12 52 13 53
+ // d2 = 20 60 21 61 22 62 23 63
+ // d3 = 30 70 31 71 32 72 33 73
+ "vld4.8 {d0, d1, d2, d3}, [%0]! \n"
+ "vld4.8 {d4, d5, d6, d7}, [%3]! \n"
+ "subs %2, %2, #12 \n"
+
+ // Shuffle the input data around to get align the data
+ // so adjacent data can be added. 0,1 - 2,3 - 4,5 - 6,7
+ // d0 = 00 10 01 11 02 12 03 13
+ // d1 = 40 50 41 51 42 52 43 53
+ "vtrn.u8 d0, d1 \n"
+ "vtrn.u8 d4, d5 \n"
+
+ // d2 = 20 30 21 31 22 32 23 33
+ // d3 = 60 70 61 71 62 72 63 73
+ "vtrn.u8 d2, d3 \n"
+ "vtrn.u8 d6, d7 \n"
+
+ // d0 = 00+10 01+11 02+12 03+13
+ // d2 = 40+50 41+51 42+52 43+53
+ "vpaddl.u8 q0, q0 \n"
+ "vpaddl.u8 q2, q2 \n"
+
+ // d3 = 60+70 61+71 62+72 63+73
+ "vpaddl.u8 d3, d3 \n"
+ "vpaddl.u8 d7, d7 \n"
+
+ // combine source lines
+ "vadd.u16 q0, q2 \n"
+ "vadd.u16 d4, d3, d7 \n"
+
+ // dst_ptr[3] = (s[6] + s[7] + s[6+st] + s[7+st]) / 4
+ "vqrshrn.u16 d4, q2, #2 \n"
+
+ // Shuffle 2,3 reg around so that 2 can be added to the
+ // 0,1 reg and 3 can be added to the 4,5 reg. This
+ // requires expanding from u8 to u16 as the 0,1 and 4,5
+ // registers are already expanded. Then do transposes
+ // to get aligned.
+ // q2 = xx 20 xx 30 xx 21 xx 31 xx 22 xx 32 xx 23 xx 33
+ "vmovl.u8 q1, d2 \n"
+ "vmovl.u8 q3, d6 \n"
+
+ // combine source lines
+ "vadd.u16 q1, q3 \n"
+
+ // d4 = xx 20 xx 30 xx 22 xx 32
+ // d5 = xx 21 xx 31 xx 23 xx 33
+ "vtrn.u32 d2, d3 \n"
+
+ // d4 = xx 20 xx 21 xx 22 xx 23
+ // d5 = xx 30 xx 31 xx 32 xx 33
+ "vtrn.u16 d2, d3 \n"
+
+ // 0+1+2, 3+4+5
+ "vadd.u16 q0, q1 \n"
+
+ // Need to divide, but can't downshift as the the value
+ // isn't a power of 2. So multiply by 65536 / n
+ // and take the upper 16 bits.
+ "vqrdmulh.s16 q0, q0, q13 \n"
+
+ // Align for table lookup, vtbl requires registers to
+ // be adjacent
+ "vmov.u8 d2, d4 \n"
+
+ "vtbl.u8 d3, {d0, d1, d2}, d28 \n"
+ "vtbl.u8 d4, {d0, d1, d2}, d29 \n"
+
+ "vst1.8 {d3}, [%1]! \n"
+ "vst1.32 {d4[0]}, [%1]! \n"
+ "bgt 1b \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst_ptr), // %1
+ "+r"(dst_width), // %2
+ "+r"(src_stride) // %3
+ : "r"(&kMult38_Div6), // %4
+ "r"(&kShuf38_2) // %5
+ : "q0", "q1", "q2", "q3", "q13", "q14", "memory", "cc");
+}
+
+void ScaleAddRows_NEON(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint16_t* dst_ptr,
+ int src_width,
+ int src_height) {
+ const uint8_t* src_tmp;
+ asm volatile(
+ "1: \n"
+ "mov %0, %1 \n"
+ "mov r12, %5 \n"
+ "veor q2, q2, q2 \n"
+ "veor q3, q3, q3 \n"
+ "2: \n"
+ // load 16 pixels into q0
+ "vld1.8 {q0}, [%0], %3 \n"
+ "vaddw.u8 q3, q3, d1 \n"
+ "vaddw.u8 q2, q2, d0 \n"
+ "subs r12, r12, #1 \n"
+ "bgt 2b \n"
+ "vst1.16 {q2, q3}, [%2]! \n" // store pixels
+ "add %1, %1, #16 \n"
+ "subs %4, %4, #16 \n" // 16 processed per loop
+ "bgt 1b \n"
+ : "=&r"(src_tmp), // %0
+ "+r"(src_ptr), // %1
+ "+r"(dst_ptr), // %2
+ "+r"(src_stride), // %3
+ "+r"(src_width), // %4
+ "+r"(src_height) // %5
+ :
+ : "memory", "cc", "r12", "q0", "q1", "q2", "q3" // Clobber List
+ );
+}
+
+// TODO(Yang Zhang): Investigate less load instructions for
+// the x/dx stepping
+#define LOAD2_DATA8_LANE(n) \
+ "lsr %5, %3, #16 \n" \
+ "add %6, %1, %5 \n" \
+ "add %3, %3, %4 \n" \
+ "vld2.8 {d6[" #n "], d7[" #n "]}, [%6] \n"
+
+// The NEON version mimics this formula (from row_common.cc):
+// #define BLENDER(a, b, f) (uint8_t)((int)(a) +
+// ((((int)((f)) * ((int)(b) - (int)(a))) + 0x8000) >> 16))
+
+void ScaleFilterCols_NEON(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
+ int dst_width,
+ int x,
+ int dx) {
+ int dx_offset[4] = {0, 1, 2, 3};
+ int* tmp = dx_offset;
+ const uint8_t* src_tmp = src_ptr;
+ asm volatile (
+ "vdup.32 q0, %3 \n" // x
+ "vdup.32 q1, %4 \n" // dx
+ "vld1.32 {q2}, [%5] \n" // 0 1 2 3
+ "vshl.i32 q3, q1, #2 \n" // 4 * dx
+ "vmul.s32 q1, q1, q2 \n"
+ // x , x + 1 * dx, x + 2 * dx, x + 3 * dx
+ "vadd.s32 q1, q1, q0 \n"
+ // x + 4 * dx, x + 5 * dx, x + 6 * dx, x + 7 * dx
+ "vadd.s32 q2, q1, q3 \n"
+ "vshl.i32 q0, q3, #1 \n" // 8 * dx
+ "1: \n"
+ LOAD2_DATA8_LANE(0)
+ LOAD2_DATA8_LANE(1)
+ LOAD2_DATA8_LANE(2)
+ LOAD2_DATA8_LANE(3)
+ LOAD2_DATA8_LANE(4)
+ LOAD2_DATA8_LANE(5)
+ LOAD2_DATA8_LANE(6)
+ LOAD2_DATA8_LANE(7)
+ "vmov q10, q1 \n"
+ "vmov q11, q2 \n"
+ "vuzp.16 q10, q11 \n"
+ "vmovl.u8 q8, d6 \n"
+ "vmovl.u8 q9, d7 \n"
+ "vsubl.s16 q11, d18, d16 \n"
+ "vsubl.s16 q12, d19, d17 \n"
+ "vmovl.u16 q13, d20 \n"
+ "vmovl.u16 q10, d21 \n"
+ "vmul.s32 q11, q11, q13 \n"
+ "vmul.s32 q12, q12, q10 \n"
+ "vrshrn.s32 d18, q11, #16 \n"
+ "vrshrn.s32 d19, q12, #16 \n"
+ "vadd.s16 q8, q8, q9 \n"
+ "vmovn.s16 d6, q8 \n"
+
+ "vst1.8 {d6}, [%0]! \n" // store pixels
+ "vadd.s32 q1, q1, q0 \n"
+ "vadd.s32 q2, q2, q0 \n"
+ "subs %2, %2, #8 \n" // 8 processed per loop
+ "bgt 1b \n"
+ : "+r"(dst_ptr), // %0
+ "+r"(src_ptr), // %1
+ "+r"(dst_width), // %2
+ "+r"(x), // %3
+ "+r"(dx), // %4
+ "+r"(tmp), // %5
+ "+r"(src_tmp) // %6
+ :
+ : "memory", "cc", "q0", "q1", "q2", "q3",
+ "q8", "q9", "q10", "q11", "q12", "q13"
+ );
+}
+
+#undef LOAD2_DATA8_LANE
+
+// 16x2 -> 16x1
+void ScaleFilterRows_NEON(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ int dst_width,
+ int source_y_fraction) {
+ asm volatile(
+ "cmp %4, #0 \n"
+ "beq 100f \n"
+ "add %2, %1 \n"
+ "cmp %4, #64 \n"
+ "beq 75f \n"
+ "cmp %4, #128 \n"
+ "beq 50f \n"
+ "cmp %4, #192 \n"
+ "beq 25f \n"
+
+ "vdup.8 d5, %4 \n"
+ "rsb %4, #256 \n"
+ "vdup.8 d4, %4 \n"
+ // General purpose row blend.
+ "1: \n"
+ "vld1.8 {q0}, [%1]! \n"
+ "vld1.8 {q1}, [%2]! \n"
+ "subs %3, %3, #16 \n"
+ "vmull.u8 q13, d0, d4 \n"
+ "vmull.u8 q14, d1, d4 \n"
+ "vmlal.u8 q13, d2, d5 \n"
+ "vmlal.u8 q14, d3, d5 \n"
+ "vrshrn.u16 d0, q13, #8 \n"
+ "vrshrn.u16 d1, q14, #8 \n"
+ "vst1.8 {q0}, [%0]! \n"
+ "bgt 1b \n"
+ "b 99f \n"
+
+ // Blend 25 / 75.
+ "25: \n"
+ "vld1.8 {q0}, [%1]! \n"
+ "vld1.8 {q1}, [%2]! \n"
+ "subs %3, %3, #16 \n"
+ "vrhadd.u8 q0, q1 \n"
+ "vrhadd.u8 q0, q1 \n"
+ "vst1.8 {q0}, [%0]! \n"
+ "bgt 25b \n"
+ "b 99f \n"
+
+ // Blend 50 / 50.
+ "50: \n"
+ "vld1.8 {q0}, [%1]! \n"
+ "vld1.8 {q1}, [%2]! \n"
+ "subs %3, %3, #16 \n"
+ "vrhadd.u8 q0, q1 \n"
+ "vst1.8 {q0}, [%0]! \n"
+ "bgt 50b \n"
+ "b 99f \n"
+
+ // Blend 75 / 25.
+ "75: \n"
+ "vld1.8 {q1}, [%1]! \n"
+ "vld1.8 {q0}, [%2]! \n"
+ "subs %3, %3, #16 \n"
+ "vrhadd.u8 q0, q1 \n"
+ "vrhadd.u8 q0, q1 \n"
+ "vst1.8 {q0}, [%0]! \n"
+ "bgt 75b \n"
+ "b 99f \n"
+
+ // Blend 100 / 0 - Copy row unchanged.
+ "100: \n"
+ "vld1.8 {q0}, [%1]! \n"
+ "subs %3, %3, #16 \n"
+ "vst1.8 {q0}, [%0]! \n"
+ "bgt 100b \n"
+
+ "99: \n"
+ "vst1.8 {d1[7]}, [%0] \n"
+ : "+r"(dst_ptr), // %0
+ "+r"(src_ptr), // %1
+ "+r"(src_stride), // %2
+ "+r"(dst_width), // %3
+ "+r"(source_y_fraction) // %4
+ :
+ : "q0", "q1", "d4", "d5", "q13", "q14", "memory", "cc");
+}
+
+void ScaleARGBRowDown2_NEON(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width) {
+ (void)src_stride;
+ asm volatile(
+ "1: \n"
+ "vld4.32 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels.
+ "vld4.32 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB
+ "subs %2, %2, #8 \n" // 8 processed per loop
+ "vmov q2, q1 \n" // load next 8 ARGB
+ "vst2.32 {q2, q3}, [%1]! \n" // store odd pixels
+ "bgt 1b \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst), // %1
+ "+r"(dst_width) // %2
+ :
+ : "memory", "cc", "q0", "q1", "q2", "q3" // Clobber List
+ );
+}
+
+// 46: f964 018d vld4.32 {d16,d18,d20,d22}, [r4]!
+// 4a: 3e04 subs r6, #4
+// 4c: f964 118d vld4.32 {d17,d19,d21,d23}, [r4]!
+// 50: ef64 21f4 vorr q9, q10, q10
+// 54: f942 038d vst2.32 {d16-d19}, [r2]!
+// 58: d1f5 bne.n 46 <ScaleARGBRowDown2_C+0x46>
+
+void ScaleARGBRowDown2Linear_NEON(const uint8_t* src_argb,
+ ptrdiff_t src_stride,
+ uint8_t* dst_argb,
+ int dst_width) {
+ (void)src_stride;
+ asm volatile(
+ "1: \n"
+ "vld4.32 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels.
+ "vld4.32 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB
+ "subs %2, %2, #8 \n" // 8 processed per loop
+ "vrhadd.u8 q0, q0, q1 \n" // rounding half add
+ "vrhadd.u8 q1, q2, q3 \n" // rounding half add
+ "vst2.32 {q0, q1}, [%1]! \n"
+ "bgt 1b \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_argb), // %1
+ "+r"(dst_width) // %2
+ :
+ : "memory", "cc", "q0", "q1", "q2", "q3" // Clobber List
+ );
+}
+
+void ScaleARGBRowDown2Box_NEON(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width) {
+ asm volatile(
+ // change the stride to row 2 pointer
+ "add %1, %1, %0 \n"
+ "1: \n"
+ "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels.
+ "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB
+ "subs %3, %3, #8 \n" // 8 processed per loop.
+ "vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts.
+ "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts.
+ "vpaddl.u8 q2, q2 \n" // R 16 bytes -> 8 shorts.
+ "vpaddl.u8 q3, q3 \n" // A 16 bytes -> 8 shorts.
+ "vld4.8 {d16, d18, d20, d22}, [%1]! \n" // load 8 more ARGB
+ "vld4.8 {d17, d19, d21, d23}, [%1]! \n" // load last 8 ARGB
+ "vpadal.u8 q0, q8 \n" // B 16 bytes -> 8 shorts.
+ "vpadal.u8 q1, q9 \n" // G 16 bytes -> 8 shorts.
+ "vpadal.u8 q2, q10 \n" // R 16 bytes -> 8 shorts.
+ "vpadal.u8 q3, q11 \n" // A 16 bytes -> 8 shorts.
+ "vrshrn.u16 d0, q0, #2 \n" // round and pack to bytes
+ "vrshrn.u16 d1, q1, #2 \n"
+ "vrshrn.u16 d2, q2, #2 \n"
+ "vrshrn.u16 d3, q3, #2 \n"
+ "vst4.8 {d0, d1, d2, d3}, [%2]! \n"
+ "bgt 1b \n"
+ : "+r"(src_ptr), // %0
+ "+r"(src_stride), // %1
+ "+r"(dst), // %2
+ "+r"(dst_width) // %3
+ :
+ : "memory", "cc", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11");
+}
+
+// Reads 4 pixels at a time.
+// Alignment requirement: src_argb 4 byte aligned.
+void ScaleARGBRowDownEven_NEON(const uint8_t* src_argb,
+ ptrdiff_t src_stride,
+ int src_stepx,
+ uint8_t* dst_argb,
+ int dst_width) {
+ (void)src_stride;
+ asm volatile(
+ "mov r12, %3, lsl #2 \n"
+ "1: \n"
+ "vld1.32 {d0[0]}, [%0], r12 \n"
+ "vld1.32 {d0[1]}, [%0], r12 \n"
+ "vld1.32 {d1[0]}, [%0], r12 \n"
+ "vld1.32 {d1[1]}, [%0], r12 \n"
+ "subs %2, %2, #4 \n" // 4 pixels per loop.
+ "vst1.8 {q0}, [%1]! \n"
+ "bgt 1b \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_argb), // %1
+ "+r"(dst_width) // %2
+ : "r"(src_stepx) // %3
+ : "memory", "cc", "r12", "q0");
+}
+
+// Reads 4 pixels at a time.
+// Alignment requirement: src_argb 4 byte aligned.
+void ScaleARGBRowDownEvenBox_NEON(const uint8_t* src_argb,
+ ptrdiff_t src_stride,
+ int src_stepx,
+ uint8_t* dst_argb,
+ int dst_width) {
+ asm volatile(
+ "mov r12, %4, lsl #2 \n"
+ "add %1, %1, %0 \n"
+ "1: \n"
+ "vld1.8 {d0}, [%0], r12 \n" // 4 2x2 blocks -> 2x1
+ "vld1.8 {d1}, [%1], r12 \n"
+ "vld1.8 {d2}, [%0], r12 \n"
+ "vld1.8 {d3}, [%1], r12 \n"
+ "vld1.8 {d4}, [%0], r12 \n"
+ "vld1.8 {d5}, [%1], r12 \n"
+ "vld1.8 {d6}, [%0], r12 \n"
+ "vld1.8 {d7}, [%1], r12 \n"
+ "vaddl.u8 q0, d0, d1 \n"
+ "vaddl.u8 q1, d2, d3 \n"
+ "vaddl.u8 q2, d4, d5 \n"
+ "vaddl.u8 q3, d6, d7 \n"
+ "vswp.8 d1, d2 \n" // ab_cd -> ac_bd
+ "vswp.8 d5, d6 \n" // ef_gh -> eg_fh
+ "vadd.u16 q0, q0, q1 \n" // (a+b)_(c+d)
+ "vadd.u16 q2, q2, q3 \n" // (e+f)_(g+h)
+ "vrshrn.u16 d0, q0, #2 \n" // first 2 pixels.
+ "vrshrn.u16 d1, q2, #2 \n" // next 2 pixels.
+ "subs %3, %3, #4 \n" // 4 pixels per loop.
+ "vst1.8 {q0}, [%2]! \n"
+ "bgt 1b \n"
+ : "+r"(src_argb), // %0
+ "+r"(src_stride), // %1
+ "+r"(dst_argb), // %2
+ "+r"(dst_width) // %3
+ : "r"(src_stepx) // %4
+ : "memory", "cc", "r12", "q0", "q1", "q2", "q3");
+}
+
+// TODO(Yang Zhang): Investigate less load instructions for
+// the x/dx stepping
+#define LOAD1_DATA32_LANE(dn, n) \
+ "lsr %5, %3, #16 \n" \
+ "add %6, %1, %5, lsl #2 \n" \
+ "add %3, %3, %4 \n" \
+ "vld1.32 {" #dn "[" #n "]}, [%6] \n"
+
+void ScaleARGBCols_NEON(uint8_t* dst_argb,
+ const uint8_t* src_argb,
+ int dst_width,
+ int x,
+ int dx) {
+ int tmp;
+ const uint8_t* src_tmp = src_argb;
+ asm volatile(
+ "1: \n"
+ // clang-format off
+ LOAD1_DATA32_LANE(d0, 0)
+ LOAD1_DATA32_LANE(d0, 1)
+ LOAD1_DATA32_LANE(d1, 0)
+ LOAD1_DATA32_LANE(d1, 1)
+ LOAD1_DATA32_LANE(d2, 0)
+ LOAD1_DATA32_LANE(d2, 1)
+ LOAD1_DATA32_LANE(d3, 0)
+ LOAD1_DATA32_LANE(d3, 1)
+ // clang-format on
+ "vst1.32 {q0, q1}, [%0]! \n" // store pixels
+ "subs %2, %2, #8 \n" // 8 processed per loop
+ "bgt 1b \n"
+ : "+r"(dst_argb), // %0
+ "+r"(src_argb), // %1
+ "+r"(dst_width), // %2
+ "+r"(x), // %3
+ "+r"(dx), // %4
+ "=&r"(tmp), // %5
+ "+r"(src_tmp) // %6
+ :
+ : "memory", "cc", "q0", "q1");
+}
+
+#undef LOAD1_DATA32_LANE
+
+// TODO(Yang Zhang): Investigate less load instructions for
+// the x/dx stepping
+#define LOAD2_DATA32_LANE(dn1, dn2, n) \
+ "lsr %5, %3, #16 \n" \
+ "add %6, %1, %5, lsl #2 \n" \
+ "add %3, %3, %4 \n" \
+ "vld2.32 {" #dn1 "[" #n "], " #dn2 "[" #n "]}, [%6] \n"
+
+void ScaleARGBFilterCols_NEON(uint8_t* dst_argb,
+ const uint8_t* src_argb,
+ int dst_width,
+ int x,
+ int dx) {
+ int dx_offset[4] = {0, 1, 2, 3};
+ int* tmp = dx_offset;
+ const uint8_t* src_tmp = src_argb;
+ asm volatile (
+ "vdup.32 q0, %3 \n" // x
+ "vdup.32 q1, %4 \n" // dx
+ "vld1.32 {q2}, [%5] \n" // 0 1 2 3
+ "vshl.i32 q9, q1, #2 \n" // 4 * dx
+ "vmul.s32 q1, q1, q2 \n"
+ "vmov.i8 q3, #0x7f \n" // 0x7F
+ "vmov.i16 q15, #0x7f \n" // 0x7F
+ // x , x + 1 * dx, x + 2 * dx, x + 3 * dx
+ "vadd.s32 q8, q1, q0 \n"
+ "1: \n"
+ // d0, d1: a
+ // d2, d3: b
+ LOAD2_DATA32_LANE(d0, d2, 0)
+ LOAD2_DATA32_LANE(d0, d2, 1)
+ LOAD2_DATA32_LANE(d1, d3, 0)
+ LOAD2_DATA32_LANE(d1, d3, 1)
+ "vshrn.i32 d22, q8, #9 \n"
+ "vand.16 d22, d22, d30 \n"
+ "vdup.8 d24, d22[0] \n"
+ "vdup.8 d25, d22[2] \n"
+ "vdup.8 d26, d22[4] \n"
+ "vdup.8 d27, d22[6] \n"
+ "vext.8 d4, d24, d25, #4 \n"
+ "vext.8 d5, d26, d27, #4 \n" // f
+ "veor.8 q10, q2, q3 \n" // 0x7f ^ f
+ "vmull.u8 q11, d0, d20 \n"
+ "vmull.u8 q12, d1, d21 \n"
+ "vmull.u8 q13, d2, d4 \n"
+ "vmull.u8 q14, d3, d5 \n"
+ "vadd.i16 q11, q11, q13 \n"
+ "vadd.i16 q12, q12, q14 \n"
+ "vshrn.i16 d0, q11, #7 \n"
+ "vshrn.i16 d1, q12, #7 \n"
+
+ "vst1.32 {d0, d1}, [%0]! \n" // store pixels
+ "vadd.s32 q8, q8, q9 \n"
+ "subs %2, %2, #4 \n" // 4 processed per loop
+ "bgt 1b \n"
+ : "+r"(dst_argb), // %0
+ "+r"(src_argb), // %1
+ "+r"(dst_width), // %2
+ "+r"(x), // %3
+ "+r"(dx), // %4
+ "+r"(tmp), // %5
+ "+r"(src_tmp) // %6
+ :
+ : "memory", "cc", "q0", "q1", "q2", "q3", "q8", "q9",
+ "q10", "q11", "q12", "q13", "q14", "q15"
+ );
+}
+
+#undef LOAD2_DATA32_LANE
+
+#endif // defined(__ARM_NEON__) && !defined(__aarch64__)
+
+#ifdef __cplusplus
+} // extern "C"
+} // namespace libyuv
+#endif
diff --git a/media/libvpx/libvpx/third_party/libyuv/source/scale_neon64.cc b/media/libvpx/libvpx/third_party/libyuv/source/scale_neon64.cc
new file mode 100644
index 0000000000..494a9cfbfb
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/libyuv/source/scale_neon64.cc
@@ -0,0 +1,1064 @@
+/*
+ * Copyright 2014 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/row.h"
+#include "libyuv/scale.h"
+#include "libyuv/scale_row.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+// This module is for GCC Neon armv8 64 bit.
+#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
+
+// Read 32x1 throw away even pixels, and write 16x1.
+void ScaleRowDown2_NEON(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width) {
+ (void)src_stride;
+ asm volatile(
+ "1: \n"
+ // load even pixels into v0, odd into v1
+ "ld2 {v0.16b,v1.16b}, [%0], #32 \n"
+ "subs %w2, %w2, #16 \n" // 16 processed per loop
+ "st1 {v1.16b}, [%1], #16 \n" // store odd pixels
+ "b.gt 1b \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst), // %1
+ "+r"(dst_width) // %2
+ :
+ : "v0", "v1" // Clobber List
+ );
+}
+
+// Read 32x1 average down and write 16x1.
+void ScaleRowDown2Linear_NEON(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width) {
+ (void)src_stride;
+ asm volatile(
+ "1: \n"
+ // load even pixels into v0, odd into v1
+ "ld2 {v0.16b,v1.16b}, [%0], #32 \n"
+ "subs %w2, %w2, #16 \n" // 16 processed per loop
+ "urhadd v0.16b, v0.16b, v1.16b \n" // rounding half add
+ "st1 {v0.16b}, [%1], #16 \n"
+ "b.gt 1b \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst), // %1
+ "+r"(dst_width) // %2
+ :
+ : "v0", "v1" // Clobber List
+ );
+}
+
+// Read 32x2 average down and write 16x1.
+void ScaleRowDown2Box_NEON(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width) {
+ asm volatile(
+ // change the stride to row 2 pointer
+ "add %1, %1, %0 \n"
+ "1: \n"
+ "ld1 {v0.16b, v1.16b}, [%0], #32 \n" // load row 1 and post inc
+ "ld1 {v2.16b, v3.16b}, [%1], #32 \n" // load row 2 and post inc
+ "subs %w3, %w3, #16 \n" // 16 processed per loop
+ "uaddlp v0.8h, v0.16b \n" // row 1 add adjacent
+ "uaddlp v1.8h, v1.16b \n"
+ "uadalp v0.8h, v2.16b \n" // += row 2 add adjacent
+ "uadalp v1.8h, v3.16b \n"
+ "rshrn v0.8b, v0.8h, #2 \n" // round and pack
+ "rshrn2 v0.16b, v1.8h, #2 \n"
+ "st1 {v0.16b}, [%2], #16 \n"
+ "b.gt 1b \n"
+ : "+r"(src_ptr), // %0
+ "+r"(src_stride), // %1
+ "+r"(dst), // %2
+ "+r"(dst_width) // %3
+ :
+ : "v0", "v1", "v2", "v3" // Clobber List
+ );
+}
+
+void ScaleRowDown4_NEON(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width) {
+ (void)src_stride;
+ asm volatile(
+ "1: \n"
+ "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // src line 0
+ "subs %w2, %w2, #8 \n" // 8 processed per loop
+ "st1 {v2.8b}, [%1], #8 \n"
+ "b.gt 1b \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst_ptr), // %1
+ "+r"(dst_width) // %2
+ :
+ : "v0", "v1", "v2", "v3", "memory", "cc");
+}
+
+void ScaleRowDown4Box_NEON(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width) {
+ const uint8_t* src_ptr1 = src_ptr + src_stride;
+ const uint8_t* src_ptr2 = src_ptr + src_stride * 2;
+ const uint8_t* src_ptr3 = src_ptr + src_stride * 3;
+ asm volatile(
+ "1: \n"
+ "ld1 {v0.16b}, [%0], #16 \n" // load up 16x4
+ "ld1 {v1.16b}, [%2], #16 \n"
+ "ld1 {v2.16b}, [%3], #16 \n"
+ "ld1 {v3.16b}, [%4], #16 \n"
+ "subs %w5, %w5, #4 \n"
+ "uaddlp v0.8h, v0.16b \n"
+ "uadalp v0.8h, v1.16b \n"
+ "uadalp v0.8h, v2.16b \n"
+ "uadalp v0.8h, v3.16b \n"
+ "addp v0.8h, v0.8h, v0.8h \n"
+ "rshrn v0.8b, v0.8h, #4 \n" // divide by 16 w/rounding
+ "st1 {v0.s}[0], [%1], #4 \n"
+ "b.gt 1b \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst_ptr), // %1
+ "+r"(src_ptr1), // %2
+ "+r"(src_ptr2), // %3
+ "+r"(src_ptr3), // %4
+ "+r"(dst_width) // %5
+ :
+ : "v0", "v1", "v2", "v3", "memory", "cc");
+}
+
+// Down scale from 4 to 3 pixels. Use the neon multilane read/write
+// to load up the every 4th pixel into a 4 different registers.
+// Point samples 32 pixels to 24 pixels.
+void ScaleRowDown34_NEON(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width) {
+ (void)src_stride;
+ asm volatile(
+ "1: \n"
+ "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // src line 0
+ "subs %w2, %w2, #24 \n"
+ "orr v2.16b, v3.16b, v3.16b \n" // order v0,v1,v2
+ "st3 {v0.8b,v1.8b,v2.8b}, [%1], #24 \n"
+ "b.gt 1b \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst_ptr), // %1
+ "+r"(dst_width) // %2
+ :
+ : "v0", "v1", "v2", "v3", "memory", "cc");
+}
+
+void ScaleRowDown34_0_Box_NEON(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width) {
+ asm volatile(
+ "movi v20.8b, #3 \n"
+ "add %3, %3, %0 \n"
+ "1: \n"
+ "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // src line 0
+ "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%3], #32 \n" // src line 1
+ "subs %w2, %w2, #24 \n"
+
+ // filter src line 0 with src line 1
+ // expand chars to shorts to allow for room
+ // when adding lines together
+ "ushll v16.8h, v4.8b, #0 \n"
+ "ushll v17.8h, v5.8b, #0 \n"
+ "ushll v18.8h, v6.8b, #0 \n"
+ "ushll v19.8h, v7.8b, #0 \n"
+
+ // 3 * line_0 + line_1
+ "umlal v16.8h, v0.8b, v20.8b \n"
+ "umlal v17.8h, v1.8b, v20.8b \n"
+ "umlal v18.8h, v2.8b, v20.8b \n"
+ "umlal v19.8h, v3.8b, v20.8b \n"
+
+ // (3 * line_0 + line_1) >> 2
+ "uqrshrn v0.8b, v16.8h, #2 \n"
+ "uqrshrn v1.8b, v17.8h, #2 \n"
+ "uqrshrn v2.8b, v18.8h, #2 \n"
+ "uqrshrn v3.8b, v19.8h, #2 \n"
+
+ // a0 = (src[0] * 3 + s[1] * 1) >> 2
+ "ushll v16.8h, v1.8b, #0 \n"
+ "umlal v16.8h, v0.8b, v20.8b \n"
+ "uqrshrn v0.8b, v16.8h, #2 \n"
+
+ // a1 = (src[1] * 1 + s[2] * 1) >> 1
+ "urhadd v1.8b, v1.8b, v2.8b \n"
+
+ // a2 = (src[2] * 1 + s[3] * 3) >> 2
+ "ushll v16.8h, v2.8b, #0 \n"
+ "umlal v16.8h, v3.8b, v20.8b \n"
+ "uqrshrn v2.8b, v16.8h, #2 \n"
+
+ "st3 {v0.8b,v1.8b,v2.8b}, [%1], #24 \n"
+
+ "b.gt 1b \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst_ptr), // %1
+ "+r"(dst_width), // %2
+ "+r"(src_stride) // %3
+ :
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17", "v18",
+ "v19", "v20", "memory", "cc");
+}
+
+void ScaleRowDown34_1_Box_NEON(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width) {
+ asm volatile(
+ "movi v20.8b, #3 \n"
+ "add %3, %3, %0 \n"
+ "1: \n"
+ "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // src line 0
+ "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%3], #32 \n" // src line 1
+ "subs %w2, %w2, #24 \n"
+ // average src line 0 with src line 1
+ "urhadd v0.8b, v0.8b, v4.8b \n"
+ "urhadd v1.8b, v1.8b, v5.8b \n"
+ "urhadd v2.8b, v2.8b, v6.8b \n"
+ "urhadd v3.8b, v3.8b, v7.8b \n"
+
+ // a0 = (src[0] * 3 + s[1] * 1) >> 2
+ "ushll v4.8h, v1.8b, #0 \n"
+ "umlal v4.8h, v0.8b, v20.8b \n"
+ "uqrshrn v0.8b, v4.8h, #2 \n"
+
+ // a1 = (src[1] * 1 + s[2] * 1) >> 1
+ "urhadd v1.8b, v1.8b, v2.8b \n"
+
+ // a2 = (src[2] * 1 + s[3] * 3) >> 2
+ "ushll v4.8h, v2.8b, #0 \n"
+ "umlal v4.8h, v3.8b, v20.8b \n"
+ "uqrshrn v2.8b, v4.8h, #2 \n"
+
+ "st3 {v0.8b,v1.8b,v2.8b}, [%1], #24 \n"
+ "b.gt 1b \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst_ptr), // %1
+ "+r"(dst_width), // %2
+ "+r"(src_stride) // %3
+ :
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", "memory", "cc");
+}
+
+static const uvec8 kShuf38 = {0, 3, 6, 8, 11, 14, 16, 19,
+ 22, 24, 27, 30, 0, 0, 0, 0};
+static const uvec8 kShuf38_2 = {0, 16, 32, 2, 18, 33, 4, 20,
+ 34, 6, 22, 35, 0, 0, 0, 0};
+static const vec16 kMult38_Div6 = {65536 / 12, 65536 / 12, 65536 / 12,
+ 65536 / 12, 65536 / 12, 65536 / 12,
+ 65536 / 12, 65536 / 12};
+static const vec16 kMult38_Div9 = {65536 / 18, 65536 / 18, 65536 / 18,
+ 65536 / 18, 65536 / 18, 65536 / 18,
+ 65536 / 18, 65536 / 18};
+
+// 32 -> 12
+void ScaleRowDown38_NEON(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width) {
+ (void)src_stride;
+ asm volatile(
+ "ld1 {v3.16b}, [%3] \n"
+ "1: \n"
+ "ld1 {v0.16b,v1.16b}, [%0], #32 \n"
+ "subs %w2, %w2, #12 \n"
+ "tbl v2.16b, {v0.16b,v1.16b}, v3.16b \n"
+ "st1 {v2.8b}, [%1], #8 \n"
+ "st1 {v2.s}[2], [%1], #4 \n"
+ "b.gt 1b \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst_ptr), // %1
+ "+r"(dst_width) // %2
+ : "r"(&kShuf38) // %3
+ : "v0", "v1", "v2", "v3", "memory", "cc");
+}
+
+// 32x3 -> 12x1
+void OMITFP ScaleRowDown38_3_Box_NEON(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width) {
+ const uint8_t* src_ptr1 = src_ptr + src_stride * 2;
+ ptrdiff_t tmp_src_stride = src_stride;
+
+ asm volatile(
+ "ld1 {v29.8h}, [%5] \n"
+ "ld1 {v30.16b}, [%6] \n"
+ "ld1 {v31.8h}, [%7] \n"
+ "add %2, %2, %0 \n"
+ "1: \n"
+
+ // 00 40 01 41 02 42 03 43
+ // 10 50 11 51 12 52 13 53
+ // 20 60 21 61 22 62 23 63
+ // 30 70 31 71 32 72 33 73
+ "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n"
+ "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%2], #32 \n"
+ "ld4 {v16.8b,v17.8b,v18.8b,v19.8b}, [%3], #32 \n"
+ "subs %w4, %w4, #12 \n"
+
+ // Shuffle the input data around to get align the data
+ // so adjacent data can be added. 0,1 - 2,3 - 4,5 - 6,7
+ // 00 10 01 11 02 12 03 13
+ // 40 50 41 51 42 52 43 53
+ "trn1 v20.8b, v0.8b, v1.8b \n"
+ "trn2 v21.8b, v0.8b, v1.8b \n"
+ "trn1 v22.8b, v4.8b, v5.8b \n"
+ "trn2 v23.8b, v4.8b, v5.8b \n"
+ "trn1 v24.8b, v16.8b, v17.8b \n"
+ "trn2 v25.8b, v16.8b, v17.8b \n"
+
+ // 20 30 21 31 22 32 23 33
+ // 60 70 61 71 62 72 63 73
+ "trn1 v0.8b, v2.8b, v3.8b \n"
+ "trn2 v1.8b, v2.8b, v3.8b \n"
+ "trn1 v4.8b, v6.8b, v7.8b \n"
+ "trn2 v5.8b, v6.8b, v7.8b \n"
+ "trn1 v16.8b, v18.8b, v19.8b \n"
+ "trn2 v17.8b, v18.8b, v19.8b \n"
+
+ // 00+10 01+11 02+12 03+13
+ // 40+50 41+51 42+52 43+53
+ "uaddlp v20.4h, v20.8b \n"
+ "uaddlp v21.4h, v21.8b \n"
+ "uaddlp v22.4h, v22.8b \n"
+ "uaddlp v23.4h, v23.8b \n"
+ "uaddlp v24.4h, v24.8b \n"
+ "uaddlp v25.4h, v25.8b \n"
+
+ // 60+70 61+71 62+72 63+73
+ "uaddlp v1.4h, v1.8b \n"
+ "uaddlp v5.4h, v5.8b \n"
+ "uaddlp v17.4h, v17.8b \n"
+
+ // combine source lines
+ "add v20.4h, v20.4h, v22.4h \n"
+ "add v21.4h, v21.4h, v23.4h \n"
+ "add v20.4h, v20.4h, v24.4h \n"
+ "add v21.4h, v21.4h, v25.4h \n"
+ "add v2.4h, v1.4h, v5.4h \n"
+ "add v2.4h, v2.4h, v17.4h \n"
+
+ // dst_ptr[3] = (s[6 + st * 0] + s[7 + st * 0]
+ // + s[6 + st * 1] + s[7 + st * 1]
+ // + s[6 + st * 2] + s[7 + st * 2]) / 6
+ "sqrdmulh v2.8h, v2.8h, v29.8h \n"
+ "xtn v2.8b, v2.8h \n"
+
+ // Shuffle 2,3 reg around so that 2 can be added to the
+ // 0,1 reg and 3 can be added to the 4,5 reg. This
+ // requires expanding from u8 to u16 as the 0,1 and 4,5
+ // registers are already expanded. Then do transposes
+ // to get aligned.
+ // xx 20 xx 30 xx 21 xx 31 xx 22 xx 32 xx 23 xx 33
+ "ushll v16.8h, v16.8b, #0 \n"
+ "uaddl v0.8h, v0.8b, v4.8b \n"
+
+ // combine source lines
+ "add v0.8h, v0.8h, v16.8h \n"
+
+ // xx 20 xx 21 xx 22 xx 23
+ // xx 30 xx 31 xx 32 xx 33
+ "trn1 v1.8h, v0.8h, v0.8h \n"
+ "trn2 v4.8h, v0.8h, v0.8h \n"
+ "xtn v0.4h, v1.4s \n"
+ "xtn v4.4h, v4.4s \n"
+
+ // 0+1+2, 3+4+5
+ "add v20.8h, v20.8h, v0.8h \n"
+ "add v21.8h, v21.8h, v4.8h \n"
+
+ // Need to divide, but can't downshift as the the value
+ // isn't a power of 2. So multiply by 65536 / n
+ // and take the upper 16 bits.
+ "sqrdmulh v0.8h, v20.8h, v31.8h \n"
+ "sqrdmulh v1.8h, v21.8h, v31.8h \n"
+
+ // Align for table lookup, vtbl requires registers to be adjacent
+ "tbl v3.16b, {v0.16b, v1.16b, v2.16b}, v30.16b \n"
+
+ "st1 {v3.8b}, [%1], #8 \n"
+ "st1 {v3.s}[2], [%1], #4 \n"
+ "b.gt 1b \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst_ptr), // %1
+ "+r"(tmp_src_stride), // %2
+ "+r"(src_ptr1), // %3
+ "+r"(dst_width) // %4
+ : "r"(&kMult38_Div6), // %5
+ "r"(&kShuf38_2), // %6
+ "r"(&kMult38_Div9) // %7
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17", "v18",
+ "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v29", "v30", "v31",
+ "memory", "cc");
+}
+
+// 32x2 -> 12x1
+void ScaleRowDown38_2_Box_NEON(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width) {
+ // TODO(fbarchard): use src_stride directly for clang 3.5+.
+ ptrdiff_t tmp_src_stride = src_stride;
+ asm volatile(
+ "ld1 {v30.8h}, [%4] \n"
+ "ld1 {v31.16b}, [%5] \n"
+ "add %2, %2, %0 \n"
+ "1: \n"
+
+ // 00 40 01 41 02 42 03 43
+ // 10 50 11 51 12 52 13 53
+ // 20 60 21 61 22 62 23 63
+ // 30 70 31 71 32 72 33 73
+ "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n"
+ "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%2], #32 \n"
+ "subs %w3, %w3, #12 \n"
+
+ // Shuffle the input data around to get align the data
+ // so adjacent data can be added. 0,1 - 2,3 - 4,5 - 6,7
+ // 00 10 01 11 02 12 03 13
+ // 40 50 41 51 42 52 43 53
+ "trn1 v16.8b, v0.8b, v1.8b \n"
+ "trn2 v17.8b, v0.8b, v1.8b \n"
+ "trn1 v18.8b, v4.8b, v5.8b \n"
+ "trn2 v19.8b, v4.8b, v5.8b \n"
+
+ // 20 30 21 31 22 32 23 33
+ // 60 70 61 71 62 72 63 73
+ "trn1 v0.8b, v2.8b, v3.8b \n"
+ "trn2 v1.8b, v2.8b, v3.8b \n"
+ "trn1 v4.8b, v6.8b, v7.8b \n"
+ "trn2 v5.8b, v6.8b, v7.8b \n"
+
+ // 00+10 01+11 02+12 03+13
+ // 40+50 41+51 42+52 43+53
+ "uaddlp v16.4h, v16.8b \n"
+ "uaddlp v17.4h, v17.8b \n"
+ "uaddlp v18.4h, v18.8b \n"
+ "uaddlp v19.4h, v19.8b \n"
+
+ // 60+70 61+71 62+72 63+73
+ "uaddlp v1.4h, v1.8b \n"
+ "uaddlp v5.4h, v5.8b \n"
+
+ // combine source lines
+ "add v16.4h, v16.4h, v18.4h \n"
+ "add v17.4h, v17.4h, v19.4h \n"
+ "add v2.4h, v1.4h, v5.4h \n"
+
+ // dst_ptr[3] = (s[6] + s[7] + s[6+st] + s[7+st]) / 4
+ "uqrshrn v2.8b, v2.8h, #2 \n"
+
+ // Shuffle 2,3 reg around so that 2 can be added to the
+ // 0,1 reg and 3 can be added to the 4,5 reg. This
+ // requires expanding from u8 to u16 as the 0,1 and 4,5
+ // registers are already expanded. Then do transposes
+ // to get aligned.
+ // xx 20 xx 30 xx 21 xx 31 xx 22 xx 32 xx 23 xx 33
+
+ // combine source lines
+ "uaddl v0.8h, v0.8b, v4.8b \n"
+
+ // xx 20 xx 21 xx 22 xx 23
+ // xx 30 xx 31 xx 32 xx 33
+ "trn1 v1.8h, v0.8h, v0.8h \n"
+ "trn2 v4.8h, v0.8h, v0.8h \n"
+ "xtn v0.4h, v1.4s \n"
+ "xtn v4.4h, v4.4s \n"
+
+ // 0+1+2, 3+4+5
+ "add v16.8h, v16.8h, v0.8h \n"
+ "add v17.8h, v17.8h, v4.8h \n"
+
+ // Need to divide, but can't downshift as the the value
+ // isn't a power of 2. So multiply by 65536 / n
+ // and take the upper 16 bits.
+ "sqrdmulh v0.8h, v16.8h, v30.8h \n"
+ "sqrdmulh v1.8h, v17.8h, v30.8h \n"
+
+ // Align for table lookup, vtbl requires registers to
+ // be adjacent
+
+ "tbl v3.16b, {v0.16b, v1.16b, v2.16b}, v31.16b \n"
+
+ "st1 {v3.8b}, [%1], #8 \n"
+ "st1 {v3.s}[2], [%1], #4 \n"
+ "b.gt 1b \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst_ptr), // %1
+ "+r"(tmp_src_stride), // %2
+ "+r"(dst_width) // %3
+ : "r"(&kMult38_Div6), // %4
+ "r"(&kShuf38_2) // %5
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17", "v18",
+ "v19", "v30", "v31", "memory", "cc");
+}
+
+void ScaleAddRows_NEON(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint16_t* dst_ptr,
+ int src_width,
+ int src_height) {
+ const uint8_t* src_tmp;
+ asm volatile(
+ "1: \n"
+ "mov %0, %1 \n"
+ "mov w12, %w5 \n"
+ "eor v2.16b, v2.16b, v2.16b \n"
+ "eor v3.16b, v3.16b, v3.16b \n"
+ "2: \n"
+ // load 16 pixels into q0
+ "ld1 {v0.16b}, [%0], %3 \n"
+ "uaddw2 v3.8h, v3.8h, v0.16b \n"
+ "uaddw v2.8h, v2.8h, v0.8b \n"
+ "subs w12, w12, #1 \n"
+ "b.gt 2b \n"
+ "st1 {v2.8h, v3.8h}, [%2], #32 \n" // store pixels
+ "add %1, %1, #16 \n"
+ "subs %w4, %w4, #16 \n" // 16 processed per loop
+ "b.gt 1b \n"
+ : "=&r"(src_tmp), // %0
+ "+r"(src_ptr), // %1
+ "+r"(dst_ptr), // %2
+ "+r"(src_stride), // %3
+ "+r"(src_width), // %4
+ "+r"(src_height) // %5
+ :
+ : "memory", "cc", "w12", "v0", "v1", "v2", "v3" // Clobber List
+ );
+}
+
+// TODO(Yang Zhang): Investigate less load instructions for
+// the x/dx stepping
+#define LOAD2_DATA8_LANE(n) \
+ "lsr %5, %3, #16 \n" \
+ "add %6, %1, %5 \n" \
+ "add %3, %3, %4 \n" \
+ "ld2 {v4.b, v5.b}[" #n "], [%6] \n"
+
+// The NEON version mimics this formula (from row_common.cc):
+// #define BLENDER(a, b, f) (uint8_t)((int)(a) +
+// ((((int)((f)) * ((int)(b) - (int)(a))) + 0x8000) >> 16))
+
+void ScaleFilterCols_NEON(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
+ int dst_width,
+ int x,
+ int dx) {
+ int dx_offset[4] = {0, 1, 2, 3};
+ int* tmp = dx_offset;
+ const uint8_t* src_tmp = src_ptr;
+ int64_t x64 = (int64_t)x; // NOLINT
+ int64_t dx64 = (int64_t)dx; // NOLINT
+ asm volatile (
+ "dup v0.4s, %w3 \n" // x
+ "dup v1.4s, %w4 \n" // dx
+ "ld1 {v2.4s}, [%5] \n" // 0 1 2 3
+ "shl v3.4s, v1.4s, #2 \n" // 4 * dx
+ "mul v1.4s, v1.4s, v2.4s \n"
+ // x , x + 1 * dx, x + 2 * dx, x + 3 * dx
+ "add v1.4s, v1.4s, v0.4s \n"
+ // x + 4 * dx, x + 5 * dx, x + 6 * dx, x + 7 * dx
+ "add v2.4s, v1.4s, v3.4s \n"
+ "shl v0.4s, v3.4s, #1 \n" // 8 * dx
+ "1: \n"
+ LOAD2_DATA8_LANE(0)
+ LOAD2_DATA8_LANE(1)
+ LOAD2_DATA8_LANE(2)
+ LOAD2_DATA8_LANE(3)
+ LOAD2_DATA8_LANE(4)
+ LOAD2_DATA8_LANE(5)
+ LOAD2_DATA8_LANE(6)
+ LOAD2_DATA8_LANE(7)
+ "mov v6.16b, v1.16b \n"
+ "mov v7.16b, v2.16b \n"
+ "uzp1 v6.8h, v6.8h, v7.8h \n"
+ "ushll v4.8h, v4.8b, #0 \n"
+ "ushll v5.8h, v5.8b, #0 \n"
+ "ssubl v16.4s, v5.4h, v4.4h \n"
+ "ssubl2 v17.4s, v5.8h, v4.8h \n"
+ "ushll v7.4s, v6.4h, #0 \n"
+ "ushll2 v6.4s, v6.8h, #0 \n"
+ "mul v16.4s, v16.4s, v7.4s \n"
+ "mul v17.4s, v17.4s, v6.4s \n"
+ "rshrn v6.4h, v16.4s, #16 \n"
+ "rshrn2 v6.8h, v17.4s, #16 \n"
+ "add v4.8h, v4.8h, v6.8h \n"
+ "xtn v4.8b, v4.8h \n"
+
+ "st1 {v4.8b}, [%0], #8 \n" // store pixels
+ "add v1.4s, v1.4s, v0.4s \n"
+ "add v2.4s, v2.4s, v0.4s \n"
+ "subs %w2, %w2, #8 \n" // 8 processed per loop
+ "b.gt 1b \n"
+ : "+r"(dst_ptr), // %0
+ "+r"(src_ptr), // %1
+ "+r"(dst_width), // %2
+ "+r"(x64), // %3
+ "+r"(dx64), // %4
+ "+r"(tmp), // %5
+ "+r"(src_tmp) // %6
+ :
+ : "memory", "cc", "v0", "v1", "v2", "v3",
+ "v4", "v5", "v6", "v7", "v16", "v17"
+ );
+}
+
+#undef LOAD2_DATA8_LANE
+
+// 16x2 -> 16x1
+void ScaleFilterRows_NEON(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ int dst_width,
+ int source_y_fraction) {
+ int y_fraction = 256 - source_y_fraction;
+ asm volatile(
+ "cmp %w4, #0 \n"
+ "b.eq 100f \n"
+ "add %2, %2, %1 \n"
+ "cmp %w4, #64 \n"
+ "b.eq 75f \n"
+ "cmp %w4, #128 \n"
+ "b.eq 50f \n"
+ "cmp %w4, #192 \n"
+ "b.eq 25f \n"
+
+ "dup v5.8b, %w4 \n"
+ "dup v4.8b, %w5 \n"
+ // General purpose row blend.
+ "1: \n"
+ "ld1 {v0.16b}, [%1], #16 \n"
+ "ld1 {v1.16b}, [%2], #16 \n"
+ "subs %w3, %w3, #16 \n"
+ "umull v6.8h, v0.8b, v4.8b \n"
+ "umull2 v7.8h, v0.16b, v4.16b \n"
+ "umlal v6.8h, v1.8b, v5.8b \n"
+ "umlal2 v7.8h, v1.16b, v5.16b \n"
+ "rshrn v0.8b, v6.8h, #8 \n"
+ "rshrn2 v0.16b, v7.8h, #8 \n"
+ "st1 {v0.16b}, [%0], #16 \n"
+ "b.gt 1b \n"
+ "b 99f \n"
+
+ // Blend 25 / 75.
+ "25: \n"
+ "ld1 {v0.16b}, [%1], #16 \n"
+ "ld1 {v1.16b}, [%2], #16 \n"
+ "subs %w3, %w3, #16 \n"
+ "urhadd v0.16b, v0.16b, v1.16b \n"
+ "urhadd v0.16b, v0.16b, v1.16b \n"
+ "st1 {v0.16b}, [%0], #16 \n"
+ "b.gt 25b \n"
+ "b 99f \n"
+
+ // Blend 50 / 50.
+ "50: \n"
+ "ld1 {v0.16b}, [%1], #16 \n"
+ "ld1 {v1.16b}, [%2], #16 \n"
+ "subs %w3, %w3, #16 \n"
+ "urhadd v0.16b, v0.16b, v1.16b \n"
+ "st1 {v0.16b}, [%0], #16 \n"
+ "b.gt 50b \n"
+ "b 99f \n"
+
+ // Blend 75 / 25.
+ "75: \n"
+ "ld1 {v1.16b}, [%1], #16 \n"
+ "ld1 {v0.16b}, [%2], #16 \n"
+ "subs %w3, %w3, #16 \n"
+ "urhadd v0.16b, v0.16b, v1.16b \n"
+ "urhadd v0.16b, v0.16b, v1.16b \n"
+ "st1 {v0.16b}, [%0], #16 \n"
+ "b.gt 75b \n"
+ "b 99f \n"
+
+ // Blend 100 / 0 - Copy row unchanged.
+ "100: \n"
+ "ld1 {v0.16b}, [%1], #16 \n"
+ "subs %w3, %w3, #16 \n"
+ "st1 {v0.16b}, [%0], #16 \n"
+ "b.gt 100b \n"
+
+ "99: \n"
+ "st1 {v0.b}[15], [%0] \n"
+ : "+r"(dst_ptr), // %0
+ "+r"(src_ptr), // %1
+ "+r"(src_stride), // %2
+ "+r"(dst_width), // %3
+ "+r"(source_y_fraction), // %4
+ "+r"(y_fraction) // %5
+ :
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "memory", "cc");
+}
+
+void ScaleARGBRowDown2_NEON(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width) {
+ (void)src_stride;
+ asm volatile(
+ "1: \n"
+ // load 16 ARGB pixels with even pixels into q0/q2, odd into q1/q3
+ "ld4 {v0.4s,v1.4s,v2.4s,v3.4s}, [%0], #64 \n"
+ "subs %w2, %w2, #8 \n" // 8 processed per loop
+ "mov v2.16b, v3.16b \n"
+ "st2 {v1.4s,v2.4s}, [%1], #32 \n" // store 8 odd pixels
+ "b.gt 1b \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst), // %1
+ "+r"(dst_width) // %2
+ :
+ : "memory", "cc", "v0", "v1", "v2", "v3" // Clobber List
+ );
+}
+
+void ScaleARGBRowDown2Linear_NEON(const uint8_t* src_argb,
+ ptrdiff_t src_stride,
+ uint8_t* dst_argb,
+ int dst_width) {
+ (void)src_stride;
+ asm volatile(
+ "1: \n"
+ // load 16 ARGB pixels with even pixels into q0/q2, odd into q1/q3
+ "ld4 {v0.4s,v1.4s,v2.4s,v3.4s}, [%0], #64 \n"
+ "subs %w2, %w2, #8 \n" // 8 processed per loop
+
+ "urhadd v0.16b, v0.16b, v1.16b \n" // rounding half add
+ "urhadd v1.16b, v2.16b, v3.16b \n"
+ "st2 {v0.4s,v1.4s}, [%1], #32 \n" // store 8 pixels
+ "b.gt 1b \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_argb), // %1
+ "+r"(dst_width) // %2
+ :
+ : "memory", "cc", "v0", "v1", "v2", "v3" // Clobber List
+ );
+}
+
+void ScaleARGBRowDown2Box_NEON(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width) {
+ asm volatile(
+ // change the stride to row 2 pointer
+ "add %1, %1, %0 \n"
+ "1: \n"
+ "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 8 ARGB
+ "subs %w3, %w3, #8 \n" // 8 processed per loop.
+ "uaddlp v0.8h, v0.16b \n" // B 16 bytes -> 8 shorts.
+ "uaddlp v1.8h, v1.16b \n" // G 16 bytes -> 8 shorts.
+ "uaddlp v2.8h, v2.16b \n" // R 16 bytes -> 8 shorts.
+ "uaddlp v3.8h, v3.16b \n" // A 16 bytes -> 8 shorts.
+ "ld4 {v16.16b,v17.16b,v18.16b,v19.16b}, [%1], #64 \n" // load 8
+ "uadalp v0.8h, v16.16b \n" // B 16 bytes -> 8 shorts.
+ "uadalp v1.8h, v17.16b \n" // G 16 bytes -> 8 shorts.
+ "uadalp v2.8h, v18.16b \n" // R 16 bytes -> 8 shorts.
+ "uadalp v3.8h, v19.16b \n" // A 16 bytes -> 8 shorts.
+ "rshrn v0.8b, v0.8h, #2 \n" // round and pack
+ "rshrn v1.8b, v1.8h, #2 \n"
+ "rshrn v2.8b, v2.8h, #2 \n"
+ "rshrn v3.8b, v3.8h, #2 \n"
+ "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%2], #32 \n"
+ "b.gt 1b \n"
+ : "+r"(src_ptr), // %0
+ "+r"(src_stride), // %1
+ "+r"(dst), // %2
+ "+r"(dst_width) // %3
+ :
+ : "memory", "cc", "v0", "v1", "v2", "v3", "v16", "v17", "v18", "v19");
+}
+
+// Reads 4 pixels at a time.
+// Alignment requirement: src_argb 4 byte aligned.
+void ScaleARGBRowDownEven_NEON(const uint8_t* src_argb,
+ ptrdiff_t src_stride,
+ int src_stepx,
+ uint8_t* dst_argb,
+ int dst_width) {
+ (void)src_stride;
+ asm volatile(
+ "1: \n"
+ "ld1 {v0.s}[0], [%0], %3 \n"
+ "ld1 {v0.s}[1], [%0], %3 \n"
+ "ld1 {v0.s}[2], [%0], %3 \n"
+ "ld1 {v0.s}[3], [%0], %3 \n"
+ "subs %w2, %w2, #4 \n" // 4 pixels per loop.
+ "st1 {v0.16b}, [%1], #16 \n"
+ "b.gt 1b \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_argb), // %1
+ "+r"(dst_width) // %2
+ : "r"((int64_t)(src_stepx * 4)) // %3
+ : "memory", "cc", "v0");
+}
+
+// Reads 4 pixels at a time.
+// Alignment requirement: src_argb 4 byte aligned.
+// TODO(Yang Zhang): Might be worth another optimization pass in future.
+// It could be upgraded to 8 pixels at a time to start with.
+void ScaleARGBRowDownEvenBox_NEON(const uint8_t* src_argb,
+ ptrdiff_t src_stride,
+ int src_stepx,
+ uint8_t* dst_argb,
+ int dst_width) {
+ asm volatile(
+ "add %1, %1, %0 \n"
+ "1: \n"
+ "ld1 {v0.8b}, [%0], %4 \n" // Read 4 2x2 -> 2x1
+ "ld1 {v1.8b}, [%1], %4 \n"
+ "ld1 {v2.8b}, [%0], %4 \n"
+ "ld1 {v3.8b}, [%1], %4 \n"
+ "ld1 {v4.8b}, [%0], %4 \n"
+ "ld1 {v5.8b}, [%1], %4 \n"
+ "ld1 {v6.8b}, [%0], %4 \n"
+ "ld1 {v7.8b}, [%1], %4 \n"
+ "uaddl v0.8h, v0.8b, v1.8b \n"
+ "uaddl v2.8h, v2.8b, v3.8b \n"
+ "uaddl v4.8h, v4.8b, v5.8b \n"
+ "uaddl v6.8h, v6.8b, v7.8b \n"
+ "mov v16.d[1], v0.d[1] \n" // ab_cd -> ac_bd
+ "mov v0.d[1], v2.d[0] \n"
+ "mov v2.d[0], v16.d[1] \n"
+ "mov v16.d[1], v4.d[1] \n" // ef_gh -> eg_fh
+ "mov v4.d[1], v6.d[0] \n"
+ "mov v6.d[0], v16.d[1] \n"
+ "add v0.8h, v0.8h, v2.8h \n" // (a+b)_(c+d)
+ "add v4.8h, v4.8h, v6.8h \n" // (e+f)_(g+h)
+ "rshrn v0.8b, v0.8h, #2 \n" // first 2 pixels.
+ "rshrn2 v0.16b, v4.8h, #2 \n" // next 2 pixels.
+ "subs %w3, %w3, #4 \n" // 4 pixels per loop.
+ "st1 {v0.16b}, [%2], #16 \n"
+ "b.gt 1b \n"
+ : "+r"(src_argb), // %0
+ "+r"(src_stride), // %1
+ "+r"(dst_argb), // %2
+ "+r"(dst_width) // %3
+ : "r"((int64_t)(src_stepx * 4)) // %4
+ : "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16");
+}
+
+// TODO(Yang Zhang): Investigate less load instructions for
+// the x/dx stepping
+#define LOAD1_DATA32_LANE(vn, n) \
+ "lsr %5, %3, #16 \n" \
+ "add %6, %1, %5, lsl #2 \n" \
+ "add %3, %3, %4 \n" \
+ "ld1 {" #vn ".s}[" #n "], [%6] \n"
+
+void ScaleARGBCols_NEON(uint8_t* dst_argb,
+ const uint8_t* src_argb,
+ int dst_width,
+ int x,
+ int dx) {
+ const uint8_t* src_tmp = src_argb;
+ int64_t x64 = (int64_t)x; // NOLINT
+ int64_t dx64 = (int64_t)dx; // NOLINT
+ int64_t tmp64;
+ asm volatile(
+ "1: \n"
+ // clang-format off
+ LOAD1_DATA32_LANE(v0, 0)
+ LOAD1_DATA32_LANE(v0, 1)
+ LOAD1_DATA32_LANE(v0, 2)
+ LOAD1_DATA32_LANE(v0, 3)
+ LOAD1_DATA32_LANE(v1, 0)
+ LOAD1_DATA32_LANE(v1, 1)
+ LOAD1_DATA32_LANE(v1, 2)
+ LOAD1_DATA32_LANE(v1, 3)
+ // clang-format on
+ "st1 {v0.4s, v1.4s}, [%0], #32 \n" // store pixels
+ "subs %w2, %w2, #8 \n" // 8 processed per loop
+ "b.gt 1b \n"
+ : "+r"(dst_argb), // %0
+ "+r"(src_argb), // %1
+ "+r"(dst_width), // %2
+ "+r"(x64), // %3
+ "+r"(dx64), // %4
+ "=&r"(tmp64), // %5
+ "+r"(src_tmp) // %6
+ :
+ : "memory", "cc", "v0", "v1");
+}
+
+#undef LOAD1_DATA32_LANE
+
+// TODO(Yang Zhang): Investigate less load instructions for
+// the x/dx stepping
+#define LOAD2_DATA32_LANE(vn1, vn2, n) \
+ "lsr %5, %3, #16 \n" \
+ "add %6, %1, %5, lsl #2 \n" \
+ "add %3, %3, %4 \n" \
+ "ld2 {" #vn1 ".s, " #vn2 ".s}[" #n "], [%6] \n"
+
+void ScaleARGBFilterCols_NEON(uint8_t* dst_argb,
+ const uint8_t* src_argb,
+ int dst_width,
+ int x,
+ int dx) {
+ int dx_offset[4] = {0, 1, 2, 3};
+ int* tmp = dx_offset;
+ const uint8_t* src_tmp = src_argb;
+ int64_t x64 = (int64_t)x; // NOLINT
+ int64_t dx64 = (int64_t)dx; // NOLINT
+ asm volatile (
+ "dup v0.4s, %w3 \n" // x
+ "dup v1.4s, %w4 \n" // dx
+ "ld1 {v2.4s}, [%5] \n" // 0 1 2 3
+ "shl v6.4s, v1.4s, #2 \n" // 4 * dx
+ "mul v1.4s, v1.4s, v2.4s \n"
+ "movi v3.16b, #0x7f \n" // 0x7F
+ "movi v4.8h, #0x7f \n" // 0x7F
+ // x , x + 1 * dx, x + 2 * dx, x + 3 * dx
+ "add v5.4s, v1.4s, v0.4s \n"
+ "1: \n"
+ // d0, d1: a
+ // d2, d3: b
+ LOAD2_DATA32_LANE(v0, v1, 0)
+ LOAD2_DATA32_LANE(v0, v1, 1)
+ LOAD2_DATA32_LANE(v0, v1, 2)
+ LOAD2_DATA32_LANE(v0, v1, 3)
+ "shrn v2.4h, v5.4s, #9 \n"
+ "and v2.8b, v2.8b, v4.8b \n"
+ "dup v16.8b, v2.b[0] \n"
+ "dup v17.8b, v2.b[2] \n"
+ "dup v18.8b, v2.b[4] \n"
+ "dup v19.8b, v2.b[6] \n"
+ "ext v2.8b, v16.8b, v17.8b, #4 \n"
+ "ext v17.8b, v18.8b, v19.8b, #4 \n"
+ "ins v2.d[1], v17.d[0] \n" // f
+ "eor v7.16b, v2.16b, v3.16b \n" // 0x7f ^ f
+ "umull v16.8h, v0.8b, v7.8b \n"
+ "umull2 v17.8h, v0.16b, v7.16b \n"
+ "umull v18.8h, v1.8b, v2.8b \n"
+ "umull2 v19.8h, v1.16b, v2.16b \n"
+ "add v16.8h, v16.8h, v18.8h \n"
+ "add v17.8h, v17.8h, v19.8h \n"
+ "shrn v0.8b, v16.8h, #7 \n"
+ "shrn2 v0.16b, v17.8h, #7 \n"
+
+ "st1 {v0.4s}, [%0], #16 \n" // store pixels
+ "add v5.4s, v5.4s, v6.4s \n"
+ "subs %w2, %w2, #4 \n" // 4 processed per loop
+ "b.gt 1b \n"
+ : "+r"(dst_argb), // %0
+ "+r"(src_argb), // %1
+ "+r"(dst_width), // %2
+ "+r"(x64), // %3
+ "+r"(dx64), // %4
+ "+r"(tmp), // %5
+ "+r"(src_tmp) // %6
+ :
+ : "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5",
+ "v6", "v7", "v16", "v17", "v18", "v19"
+ );
+}
+
+#undef LOAD2_DATA32_LANE
+
+// Read 16x2 average down and write 8x1.
+void ScaleRowDown2Box_16_NEON(const uint16_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint16_t* dst,
+ int dst_width) {
+ asm volatile(
+ // change the stride to row 2 pointer
+ "add %1, %0, %1, lsl #1 \n" // ptr + stide * 2
+ "1: \n"
+ "ld1 {v0.8h, v1.8h}, [%0], #32 \n" // load row 1 and post inc
+ "ld1 {v2.8h, v3.8h}, [%1], #32 \n" // load row 2 and post inc
+ "subs %w3, %w3, #8 \n" // 8 processed per loop
+ "uaddlp v0.4s, v0.8h \n" // row 1 add adjacent
+ "uaddlp v1.4s, v1.8h \n"
+ "uadalp v0.4s, v2.8h \n" // +row 2 add adjacent
+ "uadalp v1.4s, v3.8h \n"
+ "rshrn v0.4h, v0.4s, #2 \n" // round and pack
+ "rshrn2 v0.8h, v1.4s, #2 \n"
+ "st1 {v0.8h}, [%2], #16 \n"
+ "b.gt 1b \n"
+ : "+r"(src_ptr), // %0
+ "+r"(src_stride), // %1
+ "+r"(dst), // %2
+ "+r"(dst_width) // %3
+ :
+ : "v0", "v1", "v2", "v3" // Clobber List
+ );
+}
+
+// Read 8x2 upsample with filtering and write 16x1.
+// Actually reads an extra pixel, so 9x2.
+void ScaleRowUp2_16_NEON(const uint16_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint16_t* dst,
+ int dst_width) {
+ asm volatile(
+ "add %1, %0, %1, lsl #1 \n" // ptr + stide * 2
+ "movi v0.8h, #9 \n" // constants
+ "movi v1.4s, #3 \n"
+
+ "1: \n"
+ "ld1 {v3.8h}, [%0], %4 \n" // TL read first 8
+ "ld1 {v4.8h}, [%0], %5 \n" // TR read 8 offset by 1
+ "ld1 {v5.8h}, [%1], %4 \n" // BL read 8 from next row
+ "ld1 {v6.8h}, [%1], %5 \n" // BR offset by 1
+ "subs %w3, %w3, #16 \n" // 16 dst pixels per loop
+ "umull v16.4s, v3.4h, v0.4h \n"
+ "umull2 v7.4s, v3.8h, v0.8h \n"
+ "umull v18.4s, v4.4h, v0.4h \n"
+ "umull2 v17.4s, v4.8h, v0.8h \n"
+ "uaddw v16.4s, v16.4s, v6.4h \n"
+ "uaddl2 v19.4s, v6.8h, v3.8h \n"
+ "uaddl v3.4s, v6.4h, v3.4h \n"
+ "uaddw2 v6.4s, v7.4s, v6.8h \n"
+ "uaddl2 v7.4s, v5.8h, v4.8h \n"
+ "uaddl v4.4s, v5.4h, v4.4h \n"
+ "uaddw v18.4s, v18.4s, v5.4h \n"
+ "mla v16.4s, v4.4s, v1.4s \n"
+ "mla v18.4s, v3.4s, v1.4s \n"
+ "mla v6.4s, v7.4s, v1.4s \n"
+ "uaddw2 v4.4s, v17.4s, v5.8h \n"
+ "uqrshrn v16.4h, v16.4s, #4 \n"
+ "mla v4.4s, v19.4s, v1.4s \n"
+ "uqrshrn2 v16.8h, v6.4s, #4 \n"
+ "uqrshrn v17.4h, v18.4s, #4 \n"
+ "uqrshrn2 v17.8h, v4.4s, #4 \n"
+ "st2 {v16.8h-v17.8h}, [%2], #32 \n"
+ "b.gt 1b \n"
+ : "+r"(src_ptr), // %0
+ "+r"(src_stride), // %1
+ "+r"(dst), // %2
+ "+r"(dst_width) // %3
+ : "r"(2LL), // %4
+ "r"(14LL) // %5
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17", "v18",
+ "v19" // Clobber List
+ );
+}
+
+#endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
+
+#ifdef __cplusplus
+} // extern "C"
+} // namespace libyuv
+#endif
diff --git a/media/libvpx/libvpx/third_party/libyuv/source/scale_win.cc b/media/libvpx/libvpx/third_party/libyuv/source/scale_win.cc
new file mode 100644
index 0000000000..c5fc86f3e9
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/libyuv/source/scale_win.cc
@@ -0,0 +1,1391 @@
+/*
+ * Copyright 2013 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/row.h"
+#include "libyuv/scale_row.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+// This module is for 32 bit Visual C x86 and clangcl
+#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)
+
+// Offsets for source bytes 0 to 9
+static const uvec8 kShuf0 = {0, 1, 3, 4, 5, 7, 8, 9,
+ 128, 128, 128, 128, 128, 128, 128, 128};
+
+// Offsets for source bytes 11 to 20 with 8 subtracted = 3 to 12.
+static const uvec8 kShuf1 = {3, 4, 5, 7, 8, 9, 11, 12,
+ 128, 128, 128, 128, 128, 128, 128, 128};
+
+// Offsets for source bytes 21 to 31 with 16 subtracted = 5 to 31.
+static const uvec8 kShuf2 = {5, 7, 8, 9, 11, 12, 13, 15,
+ 128, 128, 128, 128, 128, 128, 128, 128};
+
+// Offsets for source bytes 0 to 10
+static const uvec8 kShuf01 = {0, 1, 1, 2, 2, 3, 4, 5, 5, 6, 6, 7, 8, 9, 9, 10};
+
+// Offsets for source bytes 10 to 21 with 8 subtracted = 3 to 13.
+static const uvec8 kShuf11 = {2, 3, 4, 5, 5, 6, 6, 7,
+ 8, 9, 9, 10, 10, 11, 12, 13};
+
+// Offsets for source bytes 21 to 31 with 16 subtracted = 5 to 31.
+static const uvec8 kShuf21 = {5, 6, 6, 7, 8, 9, 9, 10,
+ 10, 11, 12, 13, 13, 14, 14, 15};
+
+// Coefficients for source bytes 0 to 10
+static const uvec8 kMadd01 = {3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2};
+
+// Coefficients for source bytes 10 to 21
+static const uvec8 kMadd11 = {1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1};
+
+// Coefficients for source bytes 21 to 31
+static const uvec8 kMadd21 = {2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3};
+
+// Coefficients for source bytes 21 to 31
+static const vec16 kRound34 = {2, 2, 2, 2, 2, 2, 2, 2};
+
+static const uvec8 kShuf38a = {0, 3, 6, 8, 11, 14, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128};
+
+static const uvec8 kShuf38b = {128, 128, 128, 128, 128, 128, 0, 3,
+ 6, 8, 11, 14, 128, 128, 128, 128};
+
+// Arrange words 0,3,6 into 0,1,2
+static const uvec8 kShufAc = {0, 1, 6, 7, 12, 13, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128};
+
+// Arrange words 0,3,6 into 3,4,5
+static const uvec8 kShufAc3 = {128, 128, 128, 128, 128, 128, 0, 1,
+ 6, 7, 12, 13, 128, 128, 128, 128};
+
+// Scaling values for boxes of 3x3 and 2x3
+static const uvec16 kScaleAc33 = {65536 / 9, 65536 / 9, 65536 / 6, 65536 / 9,
+ 65536 / 9, 65536 / 6, 0, 0};
+
+// Arrange first value for pixels 0,1,2,3,4,5
+static const uvec8 kShufAb0 = {0, 128, 3, 128, 6, 128, 8, 128,
+ 11, 128, 14, 128, 128, 128, 128, 128};
+
+// Arrange second value for pixels 0,1,2,3,4,5
+static const uvec8 kShufAb1 = {1, 128, 4, 128, 7, 128, 9, 128,
+ 12, 128, 15, 128, 128, 128, 128, 128};
+
+// Arrange third value for pixels 0,1,2,3,4,5
+static const uvec8 kShufAb2 = {2, 128, 5, 128, 128, 128, 10, 128,
+ 13, 128, 128, 128, 128, 128, 128, 128};
+
+// Scaling values for boxes of 3x2 and 2x2
+static const uvec16 kScaleAb2 = {65536 / 3, 65536 / 3, 65536 / 2, 65536 / 3,
+ 65536 / 3, 65536 / 2, 0, 0};
+
+// Reads 32 pixels, throws half away and writes 16 pixels.
+__declspec(naked) void ScaleRowDown2_SSSE3(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width) {
+ __asm {
+ mov eax, [esp + 4] // src_ptr
+ // src_stride ignored
+ mov edx, [esp + 12] // dst_ptr
+ mov ecx, [esp + 16] // dst_width
+
+ wloop:
+ movdqu xmm0, [eax]
+ movdqu xmm1, [eax + 16]
+ lea eax, [eax + 32]
+ psrlw xmm0, 8 // isolate odd pixels.
+ psrlw xmm1, 8
+ packuswb xmm0, xmm1
+ movdqu [edx], xmm0
+ lea edx, [edx + 16]
+ sub ecx, 16
+ jg wloop
+
+ ret
+ }
+}
+
+// Blends 32x1 rectangle to 16x1.
+__declspec(naked) void ScaleRowDown2Linear_SSSE3(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width) {
+ __asm {
+ mov eax, [esp + 4] // src_ptr
+ // src_stride
+ mov edx, [esp + 12] // dst_ptr
+ mov ecx, [esp + 16] // dst_width
+
+ pcmpeqb xmm4, xmm4 // constant 0x0101
+ psrlw xmm4, 15
+ packuswb xmm4, xmm4
+ pxor xmm5, xmm5 // constant 0
+
+ wloop:
+ movdqu xmm0, [eax]
+ movdqu xmm1, [eax + 16]
+ lea eax, [eax + 32]
+ pmaddubsw xmm0, xmm4 // horizontal add
+ pmaddubsw xmm1, xmm4
+ pavgw xmm0, xmm5 // (x + 1) / 2
+ pavgw xmm1, xmm5
+ packuswb xmm0, xmm1
+ movdqu [edx], xmm0
+ lea edx, [edx + 16]
+ sub ecx, 16
+ jg wloop
+
+ ret
+ }
+}
+
+// Blends 32x2 rectangle to 16x1.
+__declspec(naked) void ScaleRowDown2Box_SSSE3(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width) {
+ __asm {
+ push esi
+ mov eax, [esp + 4 + 4] // src_ptr
+ mov esi, [esp + 4 + 8] // src_stride
+ mov edx, [esp + 4 + 12] // dst_ptr
+ mov ecx, [esp + 4 + 16] // dst_width
+
+ pcmpeqb xmm4, xmm4 // constant 0x0101
+ psrlw xmm4, 15
+ packuswb xmm4, xmm4
+ pxor xmm5, xmm5 // constant 0
+
+ wloop:
+ movdqu xmm0, [eax]
+ movdqu xmm1, [eax + 16]
+ movdqu xmm2, [eax + esi]
+ movdqu xmm3, [eax + esi + 16]
+ lea eax, [eax + 32]
+ pmaddubsw xmm0, xmm4 // horizontal add
+ pmaddubsw xmm1, xmm4
+ pmaddubsw xmm2, xmm4
+ pmaddubsw xmm3, xmm4
+ paddw xmm0, xmm2 // vertical add
+ paddw xmm1, xmm3
+ psrlw xmm0, 1
+ psrlw xmm1, 1
+ pavgw xmm0, xmm5 // (x + 1) / 2
+ pavgw xmm1, xmm5
+ packuswb xmm0, xmm1
+ movdqu [edx], xmm0
+ lea edx, [edx + 16]
+ sub ecx, 16
+ jg wloop
+
+ pop esi
+ ret
+ }
+}
+
+#ifdef HAS_SCALEROWDOWN2_AVX2
+// Reads 64 pixels, throws half away and writes 32 pixels.
+__declspec(naked) void ScaleRowDown2_AVX2(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width) {
+ __asm {
+ mov eax, [esp + 4] // src_ptr
+ // src_stride ignored
+ mov edx, [esp + 12] // dst_ptr
+ mov ecx, [esp + 16] // dst_width
+
+ wloop:
+ vmovdqu ymm0, [eax]
+ vmovdqu ymm1, [eax + 32]
+ lea eax, [eax + 64]
+ vpsrlw ymm0, ymm0, 8 // isolate odd pixels.
+ vpsrlw ymm1, ymm1, 8
+ vpackuswb ymm0, ymm0, ymm1
+ vpermq ymm0, ymm0, 0xd8 // unmutate vpackuswb
+ vmovdqu [edx], ymm0
+ lea edx, [edx + 32]
+ sub ecx, 32
+ jg wloop
+
+ vzeroupper
+ ret
+ }
+}
+
+// Blends 64x1 rectangle to 32x1.
+__declspec(naked) void ScaleRowDown2Linear_AVX2(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width) {
+ __asm {
+ mov eax, [esp + 4] // src_ptr
+ // src_stride
+ mov edx, [esp + 12] // dst_ptr
+ mov ecx, [esp + 16] // dst_width
+
+ vpcmpeqb ymm4, ymm4, ymm4 // '1' constant, 8b
+ vpsrlw ymm4, ymm4, 15
+ vpackuswb ymm4, ymm4, ymm4
+ vpxor ymm5, ymm5, ymm5 // constant 0
+
+ wloop:
+ vmovdqu ymm0, [eax]
+ vmovdqu ymm1, [eax + 32]
+ lea eax, [eax + 64]
+ vpmaddubsw ymm0, ymm0, ymm4 // horizontal add
+ vpmaddubsw ymm1, ymm1, ymm4
+ vpavgw ymm0, ymm0, ymm5 // (x + 1) / 2
+ vpavgw ymm1, ymm1, ymm5
+ vpackuswb ymm0, ymm0, ymm1
+ vpermq ymm0, ymm0, 0xd8 // unmutate vpackuswb
+ vmovdqu [edx], ymm0
+ lea edx, [edx + 32]
+ sub ecx, 32
+ jg wloop
+
+ vzeroupper
+ ret
+ }
+}
+
+// For rounding, average = (sum + 2) / 4
+// becomes average((sum >> 1), 0)
+// Blends 64x2 rectangle to 32x1.
+__declspec(naked) void ScaleRowDown2Box_AVX2(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width) {
+ __asm {
+ push esi
+ mov eax, [esp + 4 + 4] // src_ptr
+ mov esi, [esp + 4 + 8] // src_stride
+ mov edx, [esp + 4 + 12] // dst_ptr
+ mov ecx, [esp + 4 + 16] // dst_width
+
+ vpcmpeqb ymm4, ymm4, ymm4 // '1' constant, 8b
+ vpsrlw ymm4, ymm4, 15
+ vpackuswb ymm4, ymm4, ymm4
+ vpxor ymm5, ymm5, ymm5 // constant 0
+
+ wloop:
+ vmovdqu ymm0, [eax]
+ vmovdqu ymm1, [eax + 32]
+ vmovdqu ymm2, [eax + esi]
+ vmovdqu ymm3, [eax + esi + 32]
+ lea eax, [eax + 64]
+ vpmaddubsw ymm0, ymm0, ymm4 // horizontal add
+ vpmaddubsw ymm1, ymm1, ymm4
+ vpmaddubsw ymm2, ymm2, ymm4
+ vpmaddubsw ymm3, ymm3, ymm4
+ vpaddw ymm0, ymm0, ymm2 // vertical add
+ vpaddw ymm1, ymm1, ymm3
+ vpsrlw ymm0, ymm0, 1 // (x + 2) / 4 = (x / 2 + 1) / 2
+ vpsrlw ymm1, ymm1, 1
+ vpavgw ymm0, ymm0, ymm5 // (x + 1) / 2
+ vpavgw ymm1, ymm1, ymm5
+ vpackuswb ymm0, ymm0, ymm1
+ vpermq ymm0, ymm0, 0xd8 // unmutate vpackuswb
+ vmovdqu [edx], ymm0
+ lea edx, [edx + 32]
+ sub ecx, 32
+ jg wloop
+
+ pop esi
+ vzeroupper
+ ret
+ }
+}
+#endif // HAS_SCALEROWDOWN2_AVX2
+
+// Point samples 32 pixels to 8 pixels.
+__declspec(naked) void ScaleRowDown4_SSSE3(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width) {
+ __asm {
+ mov eax, [esp + 4] // src_ptr
+ // src_stride ignored
+ mov edx, [esp + 12] // dst_ptr
+ mov ecx, [esp + 16] // dst_width
+ pcmpeqb xmm5, xmm5 // generate mask 0x00ff0000
+ psrld xmm5, 24
+ pslld xmm5, 16
+
+ wloop:
+ movdqu xmm0, [eax]
+ movdqu xmm1, [eax + 16]
+ lea eax, [eax + 32]
+ pand xmm0, xmm5
+ pand xmm1, xmm5
+ packuswb xmm0, xmm1
+ psrlw xmm0, 8
+ packuswb xmm0, xmm0
+ movq qword ptr [edx], xmm0
+ lea edx, [edx + 8]
+ sub ecx, 8
+ jg wloop
+
+ ret
+ }
+}
+
+// Blends 32x4 rectangle to 8x1.
+__declspec(naked) void ScaleRowDown4Box_SSSE3(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width) {
+ __asm {
+ push esi
+ push edi
+ mov eax, [esp + 8 + 4] // src_ptr
+ mov esi, [esp + 8 + 8] // src_stride
+ mov edx, [esp + 8 + 12] // dst_ptr
+ mov ecx, [esp + 8 + 16] // dst_width
+ lea edi, [esi + esi * 2] // src_stride * 3
+ pcmpeqb xmm4, xmm4 // constant 0x0101
+ psrlw xmm4, 15
+ movdqa xmm5, xmm4
+ packuswb xmm4, xmm4
+ psllw xmm5, 3 // constant 0x0008
+
+ wloop:
+ movdqu xmm0, [eax] // average rows
+ movdqu xmm1, [eax + 16]
+ movdqu xmm2, [eax + esi]
+ movdqu xmm3, [eax + esi + 16]
+ pmaddubsw xmm0, xmm4 // horizontal add
+ pmaddubsw xmm1, xmm4
+ pmaddubsw xmm2, xmm4
+ pmaddubsw xmm3, xmm4
+ paddw xmm0, xmm2 // vertical add rows 0, 1
+ paddw xmm1, xmm3
+ movdqu xmm2, [eax + esi * 2]
+ movdqu xmm3, [eax + esi * 2 + 16]
+ pmaddubsw xmm2, xmm4
+ pmaddubsw xmm3, xmm4
+ paddw xmm0, xmm2 // add row 2
+ paddw xmm1, xmm3
+ movdqu xmm2, [eax + edi]
+ movdqu xmm3, [eax + edi + 16]
+ lea eax, [eax + 32]
+ pmaddubsw xmm2, xmm4
+ pmaddubsw xmm3, xmm4
+ paddw xmm0, xmm2 // add row 3
+ paddw xmm1, xmm3
+ phaddw xmm0, xmm1
+ paddw xmm0, xmm5 // + 8 for round
+ psrlw xmm0, 4 // /16 for average of 4 * 4
+ packuswb xmm0, xmm0
+ movq qword ptr [edx], xmm0
+ lea edx, [edx + 8]
+ sub ecx, 8
+ jg wloop
+
+ pop edi
+ pop esi
+ ret
+ }
+}
+
+#ifdef HAS_SCALEROWDOWN4_AVX2
+// Point samples 64 pixels to 16 pixels.
+__declspec(naked) void ScaleRowDown4_AVX2(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width) {
+ __asm {
+ mov eax, [esp + 4] // src_ptr
+ // src_stride ignored
+ mov edx, [esp + 12] // dst_ptr
+ mov ecx, [esp + 16] // dst_width
+ vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0x00ff0000
+ vpsrld ymm5, ymm5, 24
+ vpslld ymm5, ymm5, 16
+
+ wloop:
+ vmovdqu ymm0, [eax]
+ vmovdqu ymm1, [eax + 32]
+ lea eax, [eax + 64]
+ vpand ymm0, ymm0, ymm5
+ vpand ymm1, ymm1, ymm5
+ vpackuswb ymm0, ymm0, ymm1
+ vpermq ymm0, ymm0, 0xd8 // unmutate vpackuswb
+ vpsrlw ymm0, ymm0, 8
+ vpackuswb ymm0, ymm0, ymm0
+ vpermq ymm0, ymm0, 0xd8 // unmutate vpackuswb
+ vmovdqu [edx], xmm0
+ lea edx, [edx + 16]
+ sub ecx, 16
+ jg wloop
+
+ vzeroupper
+ ret
+ }
+}
+
+// Blends 64x4 rectangle to 16x1.
+__declspec(naked) void ScaleRowDown4Box_AVX2(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width) {
+ __asm {
+ push esi
+ push edi
+ mov eax, [esp + 8 + 4] // src_ptr
+ mov esi, [esp + 8 + 8] // src_stride
+ mov edx, [esp + 8 + 12] // dst_ptr
+ mov ecx, [esp + 8 + 16] // dst_width
+ lea edi, [esi + esi * 2] // src_stride * 3
+ vpcmpeqb ymm4, ymm4, ymm4 // constant 0x0101
+ vpsrlw ymm4, ymm4, 15
+ vpsllw ymm5, ymm4, 3 // constant 0x0008
+ vpackuswb ymm4, ymm4, ymm4
+
+ wloop:
+ vmovdqu ymm0, [eax] // average rows
+ vmovdqu ymm1, [eax + 32]
+ vmovdqu ymm2, [eax + esi]
+ vmovdqu ymm3, [eax + esi + 32]
+ vpmaddubsw ymm0, ymm0, ymm4 // horizontal add
+ vpmaddubsw ymm1, ymm1, ymm4
+ vpmaddubsw ymm2, ymm2, ymm4
+ vpmaddubsw ymm3, ymm3, ymm4
+ vpaddw ymm0, ymm0, ymm2 // vertical add rows 0, 1
+ vpaddw ymm1, ymm1, ymm3
+ vmovdqu ymm2, [eax + esi * 2]
+ vmovdqu ymm3, [eax + esi * 2 + 32]
+ vpmaddubsw ymm2, ymm2, ymm4
+ vpmaddubsw ymm3, ymm3, ymm4
+ vpaddw ymm0, ymm0, ymm2 // add row 2
+ vpaddw ymm1, ymm1, ymm3
+ vmovdqu ymm2, [eax + edi]
+ vmovdqu ymm3, [eax + edi + 32]
+ lea eax, [eax + 64]
+ vpmaddubsw ymm2, ymm2, ymm4
+ vpmaddubsw ymm3, ymm3, ymm4
+ vpaddw ymm0, ymm0, ymm2 // add row 3
+ vpaddw ymm1, ymm1, ymm3
+ vphaddw ymm0, ymm0, ymm1 // mutates
+ vpermq ymm0, ymm0, 0xd8 // unmutate vphaddw
+ vpaddw ymm0, ymm0, ymm5 // + 8 for round
+ vpsrlw ymm0, ymm0, 4 // /32 for average of 4 * 4
+ vpackuswb ymm0, ymm0, ymm0
+ vpermq ymm0, ymm0, 0xd8 // unmutate vpackuswb
+ vmovdqu [edx], xmm0
+ lea edx, [edx + 16]
+ sub ecx, 16
+ jg wloop
+
+ pop edi
+ pop esi
+ vzeroupper
+ ret
+ }
+}
+#endif // HAS_SCALEROWDOWN4_AVX2
+
+// Point samples 32 pixels to 24 pixels.
+// Produces three 8 byte values. For each 8 bytes, 16 bytes are read.
+// Then shuffled to do the scaling.
+
+__declspec(naked) void ScaleRowDown34_SSSE3(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width) {
+ __asm {
+ mov eax, [esp + 4] // src_ptr
+ // src_stride ignored
+ mov edx, [esp + 12] // dst_ptr
+ mov ecx, [esp + 16] // dst_width
+ movdqa xmm3, xmmword ptr kShuf0
+ movdqa xmm4, xmmword ptr kShuf1
+ movdqa xmm5, xmmword ptr kShuf2
+
+ wloop:
+ movdqu xmm0, [eax]
+ movdqu xmm1, [eax + 16]
+ lea eax, [eax + 32]
+ movdqa xmm2, xmm1
+ palignr xmm1, xmm0, 8
+ pshufb xmm0, xmm3
+ pshufb xmm1, xmm4
+ pshufb xmm2, xmm5
+ movq qword ptr [edx], xmm0
+ movq qword ptr [edx + 8], xmm1
+ movq qword ptr [edx + 16], xmm2
+ lea edx, [edx + 24]
+ sub ecx, 24
+ jg wloop
+
+ ret
+ }
+}
+
+// Blends 32x2 rectangle to 24x1
+// Produces three 8 byte values. For each 8 bytes, 16 bytes are read.
+// Then shuffled to do the scaling.
+
+// Register usage:
+// xmm0 src_row 0
+// xmm1 src_row 1
+// xmm2 shuf 0
+// xmm3 shuf 1
+// xmm4 shuf 2
+// xmm5 madd 0
+// xmm6 madd 1
+// xmm7 kRound34
+
+// Note that movdqa+palign may be better than movdqu.
+__declspec(naked) void ScaleRowDown34_1_Box_SSSE3(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width) {
+ __asm {
+ push esi
+ mov eax, [esp + 4 + 4] // src_ptr
+ mov esi, [esp + 4 + 8] // src_stride
+ mov edx, [esp + 4 + 12] // dst_ptr
+ mov ecx, [esp + 4 + 16] // dst_width
+ movdqa xmm2, xmmword ptr kShuf01
+ movdqa xmm3, xmmword ptr kShuf11
+ movdqa xmm4, xmmword ptr kShuf21
+ movdqa xmm5, xmmword ptr kMadd01
+ movdqa xmm6, xmmword ptr kMadd11
+ movdqa xmm7, xmmword ptr kRound34
+
+ wloop:
+ movdqu xmm0, [eax] // pixels 0..7
+ movdqu xmm1, [eax + esi]
+ pavgb xmm0, xmm1
+ pshufb xmm0, xmm2
+ pmaddubsw xmm0, xmm5
+ paddsw xmm0, xmm7
+ psrlw xmm0, 2
+ packuswb xmm0, xmm0
+ movq qword ptr [edx], xmm0
+ movdqu xmm0, [eax + 8] // pixels 8..15
+ movdqu xmm1, [eax + esi + 8]
+ pavgb xmm0, xmm1
+ pshufb xmm0, xmm3
+ pmaddubsw xmm0, xmm6
+ paddsw xmm0, xmm7
+ psrlw xmm0, 2
+ packuswb xmm0, xmm0
+ movq qword ptr [edx + 8], xmm0
+ movdqu xmm0, [eax + 16] // pixels 16..23
+ movdqu xmm1, [eax + esi + 16]
+ lea eax, [eax + 32]
+ pavgb xmm0, xmm1
+ pshufb xmm0, xmm4
+ movdqa xmm1, xmmword ptr kMadd21
+ pmaddubsw xmm0, xmm1
+ paddsw xmm0, xmm7
+ psrlw xmm0, 2
+ packuswb xmm0, xmm0
+ movq qword ptr [edx + 16], xmm0
+ lea edx, [edx + 24]
+ sub ecx, 24
+ jg wloop
+
+ pop esi
+ ret
+ }
+}
+
+// Note that movdqa+palign may be better than movdqu.
+__declspec(naked) void ScaleRowDown34_0_Box_SSSE3(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width) {
+ __asm {
+ push esi
+ mov eax, [esp + 4 + 4] // src_ptr
+ mov esi, [esp + 4 + 8] // src_stride
+ mov edx, [esp + 4 + 12] // dst_ptr
+ mov ecx, [esp + 4 + 16] // dst_width
+ movdqa xmm2, xmmword ptr kShuf01
+ movdqa xmm3, xmmword ptr kShuf11
+ movdqa xmm4, xmmword ptr kShuf21
+ movdqa xmm5, xmmword ptr kMadd01
+ movdqa xmm6, xmmword ptr kMadd11
+ movdqa xmm7, xmmword ptr kRound34
+
+ wloop:
+ movdqu xmm0, [eax] // pixels 0..7
+ movdqu xmm1, [eax + esi]
+ pavgb xmm1, xmm0
+ pavgb xmm0, xmm1
+ pshufb xmm0, xmm2
+ pmaddubsw xmm0, xmm5
+ paddsw xmm0, xmm7
+ psrlw xmm0, 2
+ packuswb xmm0, xmm0
+ movq qword ptr [edx], xmm0
+ movdqu xmm0, [eax + 8] // pixels 8..15
+ movdqu xmm1, [eax + esi + 8]
+ pavgb xmm1, xmm0
+ pavgb xmm0, xmm1
+ pshufb xmm0, xmm3
+ pmaddubsw xmm0, xmm6
+ paddsw xmm0, xmm7
+ psrlw xmm0, 2
+ packuswb xmm0, xmm0
+ movq qword ptr [edx + 8], xmm0
+ movdqu xmm0, [eax + 16] // pixels 16..23
+ movdqu xmm1, [eax + esi + 16]
+ lea eax, [eax + 32]
+ pavgb xmm1, xmm0
+ pavgb xmm0, xmm1
+ pshufb xmm0, xmm4
+ movdqa xmm1, xmmword ptr kMadd21
+ pmaddubsw xmm0, xmm1
+ paddsw xmm0, xmm7
+ psrlw xmm0, 2
+ packuswb xmm0, xmm0
+ movq qword ptr [edx + 16], xmm0
+ lea edx, [edx+24]
+ sub ecx, 24
+ jg wloop
+
+ pop esi
+ ret
+ }
+}
+
+// 3/8 point sampler
+
+// Scale 32 pixels to 12
+__declspec(naked) void ScaleRowDown38_SSSE3(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width) {
+ __asm {
+ mov eax, [esp + 4] // src_ptr
+ // src_stride ignored
+ mov edx, [esp + 12] // dst_ptr
+ mov ecx, [esp + 16] // dst_width
+ movdqa xmm4, xmmword ptr kShuf38a
+ movdqa xmm5, xmmword ptr kShuf38b
+
+ xloop:
+ movdqu xmm0, [eax] // 16 pixels -> 0,1,2,3,4,5
+ movdqu xmm1, [eax + 16] // 16 pixels -> 6,7,8,9,10,11
+ lea eax, [eax + 32]
+ pshufb xmm0, xmm4
+ pshufb xmm1, xmm5
+ paddusb xmm0, xmm1
+
+ movq qword ptr [edx], xmm0 // write 12 pixels
+ movhlps xmm1, xmm0
+ movd [edx + 8], xmm1
+ lea edx, [edx + 12]
+ sub ecx, 12
+ jg xloop
+
+ ret
+ }
+}
+
+// Scale 16x3 pixels to 6x1 with interpolation
+__declspec(naked) void ScaleRowDown38_3_Box_SSSE3(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width) {
+ __asm {
+ push esi
+ mov eax, [esp + 4 + 4] // src_ptr
+ mov esi, [esp + 4 + 8] // src_stride
+ mov edx, [esp + 4 + 12] // dst_ptr
+ mov ecx, [esp + 4 + 16] // dst_width
+ movdqa xmm2, xmmword ptr kShufAc
+ movdqa xmm3, xmmword ptr kShufAc3
+ movdqa xmm4, xmmword ptr kScaleAc33
+ pxor xmm5, xmm5
+
+ xloop:
+ movdqu xmm0, [eax] // sum up 3 rows into xmm0/1
+ movdqu xmm6, [eax + esi]
+ movhlps xmm1, xmm0
+ movhlps xmm7, xmm6
+ punpcklbw xmm0, xmm5
+ punpcklbw xmm1, xmm5
+ punpcklbw xmm6, xmm5
+ punpcklbw xmm7, xmm5
+ paddusw xmm0, xmm6
+ paddusw xmm1, xmm7
+ movdqu xmm6, [eax + esi * 2]
+ lea eax, [eax + 16]
+ movhlps xmm7, xmm6
+ punpcklbw xmm6, xmm5
+ punpcklbw xmm7, xmm5
+ paddusw xmm0, xmm6
+ paddusw xmm1, xmm7
+
+ movdqa xmm6, xmm0 // 8 pixels -> 0,1,2 of xmm6
+ psrldq xmm0, 2
+ paddusw xmm6, xmm0
+ psrldq xmm0, 2
+ paddusw xmm6, xmm0
+ pshufb xmm6, xmm2
+
+ movdqa xmm7, xmm1 // 8 pixels -> 3,4,5 of xmm6
+ psrldq xmm1, 2
+ paddusw xmm7, xmm1
+ psrldq xmm1, 2
+ paddusw xmm7, xmm1
+ pshufb xmm7, xmm3
+ paddusw xmm6, xmm7
+
+ pmulhuw xmm6, xmm4 // divide by 9,9,6, 9,9,6
+ packuswb xmm6, xmm6
+
+ movd [edx], xmm6 // write 6 pixels
+ psrlq xmm6, 16
+ movd [edx + 2], xmm6
+ lea edx, [edx + 6]
+ sub ecx, 6
+ jg xloop
+
+ pop esi
+ ret
+ }
+}
+
+// Scale 16x2 pixels to 6x1 with interpolation
+__declspec(naked) void ScaleRowDown38_2_Box_SSSE3(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width) {
+ __asm {
+ push esi
+ mov eax, [esp + 4 + 4] // src_ptr
+ mov esi, [esp + 4 + 8] // src_stride
+ mov edx, [esp + 4 + 12] // dst_ptr
+ mov ecx, [esp + 4 + 16] // dst_width
+ movdqa xmm2, xmmword ptr kShufAb0
+ movdqa xmm3, xmmword ptr kShufAb1
+ movdqa xmm4, xmmword ptr kShufAb2
+ movdqa xmm5, xmmword ptr kScaleAb2
+
+ xloop:
+ movdqu xmm0, [eax] // average 2 rows into xmm0
+ movdqu xmm1, [eax + esi]
+ lea eax, [eax + 16]
+ pavgb xmm0, xmm1
+
+ movdqa xmm1, xmm0 // 16 pixels -> 0,1,2,3,4,5 of xmm1
+ pshufb xmm1, xmm2
+ movdqa xmm6, xmm0
+ pshufb xmm6, xmm3
+ paddusw xmm1, xmm6
+ pshufb xmm0, xmm4
+ paddusw xmm1, xmm0
+
+ pmulhuw xmm1, xmm5 // divide by 3,3,2, 3,3,2
+ packuswb xmm1, xmm1
+
+ movd [edx], xmm1 // write 6 pixels
+ psrlq xmm1, 16
+ movd [edx + 2], xmm1
+ lea edx, [edx + 6]
+ sub ecx, 6
+ jg xloop
+
+ pop esi
+ ret
+ }
+}
+
+// Reads 16 bytes and accumulates to 16 shorts at a time.
+__declspec(naked) void ScaleAddRow_SSE2(const uint8_t* src_ptr,
+ uint16_t* dst_ptr,
+ int src_width) {
+ __asm {
+ mov eax, [esp + 4] // src_ptr
+ mov edx, [esp + 8] // dst_ptr
+ mov ecx, [esp + 12] // src_width
+ pxor xmm5, xmm5
+
+ // sum rows
+ xloop:
+ movdqu xmm3, [eax] // read 16 bytes
+ lea eax, [eax + 16]
+ movdqu xmm0, [edx] // read 16 words from destination
+ movdqu xmm1, [edx + 16]
+ movdqa xmm2, xmm3
+ punpcklbw xmm2, xmm5
+ punpckhbw xmm3, xmm5
+ paddusw xmm0, xmm2 // sum 16 words
+ paddusw xmm1, xmm3
+ movdqu [edx], xmm0 // write 16 words to destination
+ movdqu [edx + 16], xmm1
+ lea edx, [edx + 32]
+ sub ecx, 16
+ jg xloop
+ ret
+ }
+}
+
+#ifdef HAS_SCALEADDROW_AVX2
+// Reads 32 bytes and accumulates to 32 shorts at a time.
+__declspec(naked) void ScaleAddRow_AVX2(const uint8_t* src_ptr,
+ uint16_t* dst_ptr,
+ int src_width) {
+ __asm {
+ mov eax, [esp + 4] // src_ptr
+ mov edx, [esp + 8] // dst_ptr
+ mov ecx, [esp + 12] // src_width
+ vpxor ymm5, ymm5, ymm5
+
+ // sum rows
+ xloop:
+ vmovdqu ymm3, [eax] // read 32 bytes
+ lea eax, [eax + 32]
+ vpermq ymm3, ymm3, 0xd8 // unmutate for vpunpck
+ vpunpcklbw ymm2, ymm3, ymm5
+ vpunpckhbw ymm3, ymm3, ymm5
+ vpaddusw ymm0, ymm2, [edx] // sum 16 words
+ vpaddusw ymm1, ymm3, [edx + 32]
+ vmovdqu [edx], ymm0 // write 32 words to destination
+ vmovdqu [edx + 32], ymm1
+ lea edx, [edx + 64]
+ sub ecx, 32
+ jg xloop
+
+ vzeroupper
+ ret
+ }
+}
+#endif // HAS_SCALEADDROW_AVX2
+
+// Constant for making pixels signed to avoid pmaddubsw
+// saturation.
+static const uvec8 kFsub80 = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80};
+
+// Constant for making pixels unsigned and adding .5 for rounding.
+static const uvec16 kFadd40 = {0x4040, 0x4040, 0x4040, 0x4040,
+ 0x4040, 0x4040, 0x4040, 0x4040};
+
+// Bilinear column filtering. SSSE3 version.
+__declspec(naked) void ScaleFilterCols_SSSE3(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
+ int dst_width,
+ int x,
+ int dx) {
+ __asm {
+ push ebx
+ push esi
+ push edi
+ mov edi, [esp + 12 + 4] // dst_ptr
+ mov esi, [esp + 12 + 8] // src_ptr
+ mov ecx, [esp + 12 + 12] // dst_width
+ movd xmm2, [esp + 12 + 16] // x
+ movd xmm3, [esp + 12 + 20] // dx
+ mov eax, 0x04040000 // shuffle to line up fractions with pixel.
+ movd xmm5, eax
+ pcmpeqb xmm6, xmm6 // generate 0x007f for inverting fraction.
+ psrlw xmm6, 9
+ pcmpeqb xmm7, xmm7 // generate 0x0001
+ psrlw xmm7, 15
+ pextrw eax, xmm2, 1 // get x0 integer. preroll
+ sub ecx, 2
+ jl xloop29
+
+ movdqa xmm0, xmm2 // x1 = x0 + dx
+ paddd xmm0, xmm3
+ punpckldq xmm2, xmm0 // x0 x1
+ punpckldq xmm3, xmm3 // dx dx
+ paddd xmm3, xmm3 // dx * 2, dx * 2
+ pextrw edx, xmm2, 3 // get x1 integer. preroll
+
+ // 2 Pixel loop.
+ xloop2:
+ movdqa xmm1, xmm2 // x0, x1 fractions.
+ paddd xmm2, xmm3 // x += dx
+ movzx ebx, word ptr [esi + eax] // 2 source x0 pixels
+ movd xmm0, ebx
+ psrlw xmm1, 9 // 7 bit fractions.
+ movzx ebx, word ptr [esi + edx] // 2 source x1 pixels
+ movd xmm4, ebx
+ pshufb xmm1, xmm5 // 0011
+ punpcklwd xmm0, xmm4
+ psubb xmm0, xmmword ptr kFsub80 // make pixels signed.
+ pxor xmm1, xmm6 // 0..7f and 7f..0
+ paddusb xmm1, xmm7 // +1 so 0..7f and 80..1
+ pmaddubsw xmm1, xmm0 // 16 bit, 2 pixels.
+ pextrw eax, xmm2, 1 // get x0 integer. next iteration.
+ pextrw edx, xmm2, 3 // get x1 integer. next iteration.
+ paddw xmm1, xmmword ptr kFadd40 // make pixels unsigned and round.
+ psrlw xmm1, 7 // 8.7 fixed point to low 8 bits.
+ packuswb xmm1, xmm1 // 8 bits, 2 pixels.
+ movd ebx, xmm1
+ mov [edi], bx
+ lea edi, [edi + 2]
+ sub ecx, 2 // 2 pixels
+ jge xloop2
+
+ xloop29:
+ add ecx, 2 - 1
+ jl xloop99
+
+ // 1 pixel remainder
+ movzx ebx, word ptr [esi + eax] // 2 source x0 pixels
+ movd xmm0, ebx
+ psrlw xmm2, 9 // 7 bit fractions.
+ pshufb xmm2, xmm5 // 0011
+ psubb xmm0, xmmword ptr kFsub80 // make pixels signed.
+ pxor xmm2, xmm6 // 0..7f and 7f..0
+ paddusb xmm2, xmm7 // +1 so 0..7f and 80..1
+ pmaddubsw xmm2, xmm0 // 16 bit
+ paddw xmm2, xmmword ptr kFadd40 // make pixels unsigned and round.
+ psrlw xmm2, 7 // 8.7 fixed point to low 8 bits.
+ packuswb xmm2, xmm2 // 8 bits
+ movd ebx, xmm2
+ mov [edi], bl
+
+ xloop99:
+
+ pop edi
+ pop esi
+ pop ebx
+ ret
+ }
+}
+
+// Reads 16 pixels, duplicates them and writes 32 pixels.
+__declspec(naked) void ScaleColsUp2_SSE2(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
+ int dst_width,
+ int x,
+ int dx) {
+ __asm {
+ mov edx, [esp + 4] // dst_ptr
+ mov eax, [esp + 8] // src_ptr
+ mov ecx, [esp + 12] // dst_width
+
+ wloop:
+ movdqu xmm0, [eax]
+ lea eax, [eax + 16]
+ movdqa xmm1, xmm0
+ punpcklbw xmm0, xmm0
+ punpckhbw xmm1, xmm1
+ movdqu [edx], xmm0
+ movdqu [edx + 16], xmm1
+ lea edx, [edx + 32]
+ sub ecx, 32
+ jg wloop
+
+ ret
+ }
+}
+
+// Reads 8 pixels, throws half away and writes 4 even pixels (0, 2, 4, 6)
+__declspec(naked) void ScaleARGBRowDown2_SSE2(const uint8_t* src_argb,
+ ptrdiff_t src_stride,
+ uint8_t* dst_argb,
+ int dst_width) {
+ __asm {
+ mov eax, [esp + 4] // src_argb
+ // src_stride ignored
+ mov edx, [esp + 12] // dst_argb
+ mov ecx, [esp + 16] // dst_width
+
+ wloop:
+ movdqu xmm0, [eax]
+ movdqu xmm1, [eax + 16]
+ lea eax, [eax + 32]
+ shufps xmm0, xmm1, 0xdd
+ movdqu [edx], xmm0
+ lea edx, [edx + 16]
+ sub ecx, 4
+ jg wloop
+
+ ret
+ }
+}
+
+// Blends 8x1 rectangle to 4x1.
+__declspec(naked) void ScaleARGBRowDown2Linear_SSE2(const uint8_t* src_argb,
+ ptrdiff_t src_stride,
+ uint8_t* dst_argb,
+ int dst_width) {
+ __asm {
+ mov eax, [esp + 4] // src_argb
+ // src_stride ignored
+ mov edx, [esp + 12] // dst_argb
+ mov ecx, [esp + 16] // dst_width
+
+ wloop:
+ movdqu xmm0, [eax]
+ movdqu xmm1, [eax + 16]
+ lea eax, [eax + 32]
+ movdqa xmm2, xmm0
+ shufps xmm0, xmm1, 0x88 // even pixels
+ shufps xmm2, xmm1, 0xdd // odd pixels
+ pavgb xmm0, xmm2
+ movdqu [edx], xmm0
+ lea edx, [edx + 16]
+ sub ecx, 4
+ jg wloop
+
+ ret
+ }
+}
+
+// Blends 8x2 rectangle to 4x1.
+__declspec(naked) void ScaleARGBRowDown2Box_SSE2(const uint8_t* src_argb,
+ ptrdiff_t src_stride,
+ uint8_t* dst_argb,
+ int dst_width) {
+ __asm {
+ push esi
+ mov eax, [esp + 4 + 4] // src_argb
+ mov esi, [esp + 4 + 8] // src_stride
+ mov edx, [esp + 4 + 12] // dst_argb
+ mov ecx, [esp + 4 + 16] // dst_width
+
+ wloop:
+ movdqu xmm0, [eax]
+ movdqu xmm1, [eax + 16]
+ movdqu xmm2, [eax + esi]
+ movdqu xmm3, [eax + esi + 16]
+ lea eax, [eax + 32]
+ pavgb xmm0, xmm2 // average rows
+ pavgb xmm1, xmm3
+ movdqa xmm2, xmm0 // average columns (8 to 4 pixels)
+ shufps xmm0, xmm1, 0x88 // even pixels
+ shufps xmm2, xmm1, 0xdd // odd pixels
+ pavgb xmm0, xmm2
+ movdqu [edx], xmm0
+ lea edx, [edx + 16]
+ sub ecx, 4
+ jg wloop
+
+ pop esi
+ ret
+ }
+}
+
+// Reads 4 pixels at a time.
+__declspec(naked) void ScaleARGBRowDownEven_SSE2(const uint8_t* src_argb,
+ ptrdiff_t src_stride,
+ int src_stepx,
+ uint8_t* dst_argb,
+ int dst_width) {
+ __asm {
+ push ebx
+ push edi
+ mov eax, [esp + 8 + 4] // src_argb
+ // src_stride ignored
+ mov ebx, [esp + 8 + 12] // src_stepx
+ mov edx, [esp + 8 + 16] // dst_argb
+ mov ecx, [esp + 8 + 20] // dst_width
+ lea ebx, [ebx * 4]
+ lea edi, [ebx + ebx * 2]
+
+ wloop:
+ movd xmm0, [eax]
+ movd xmm1, [eax + ebx]
+ punpckldq xmm0, xmm1
+ movd xmm2, [eax + ebx * 2]
+ movd xmm3, [eax + edi]
+ lea eax, [eax + ebx * 4]
+ punpckldq xmm2, xmm3
+ punpcklqdq xmm0, xmm2
+ movdqu [edx], xmm0
+ lea edx, [edx + 16]
+ sub ecx, 4
+ jg wloop
+
+ pop edi
+ pop ebx
+ ret
+ }
+}
+
+// Blends four 2x2 to 4x1.
+__declspec(naked) void ScaleARGBRowDownEvenBox_SSE2(const uint8_t* src_argb,
+ ptrdiff_t src_stride,
+ int src_stepx,
+ uint8_t* dst_argb,
+ int dst_width) {
+ __asm {
+ push ebx
+ push esi
+ push edi
+ mov eax, [esp + 12 + 4] // src_argb
+ mov esi, [esp + 12 + 8] // src_stride
+ mov ebx, [esp + 12 + 12] // src_stepx
+ mov edx, [esp + 12 + 16] // dst_argb
+ mov ecx, [esp + 12 + 20] // dst_width
+ lea esi, [eax + esi] // row1 pointer
+ lea ebx, [ebx * 4]
+ lea edi, [ebx + ebx * 2]
+
+ wloop:
+ movq xmm0, qword ptr [eax] // row0 4 pairs
+ movhps xmm0, qword ptr [eax + ebx]
+ movq xmm1, qword ptr [eax + ebx * 2]
+ movhps xmm1, qword ptr [eax + edi]
+ lea eax, [eax + ebx * 4]
+ movq xmm2, qword ptr [esi] // row1 4 pairs
+ movhps xmm2, qword ptr [esi + ebx]
+ movq xmm3, qword ptr [esi + ebx * 2]
+ movhps xmm3, qword ptr [esi + edi]
+ lea esi, [esi + ebx * 4]
+ pavgb xmm0, xmm2 // average rows
+ pavgb xmm1, xmm3
+ movdqa xmm2, xmm0 // average columns (8 to 4 pixels)
+ shufps xmm0, xmm1, 0x88 // even pixels
+ shufps xmm2, xmm1, 0xdd // odd pixels
+ pavgb xmm0, xmm2
+ movdqu [edx], xmm0
+ lea edx, [edx + 16]
+ sub ecx, 4
+ jg wloop
+
+ pop edi
+ pop esi
+ pop ebx
+ ret
+ }
+}
+
+// Column scaling unfiltered. SSE2 version.
+__declspec(naked) void ScaleARGBCols_SSE2(uint8_t* dst_argb,
+ const uint8_t* src_argb,
+ int dst_width,
+ int x,
+ int dx) {
+ __asm {
+ push edi
+ push esi
+ mov edi, [esp + 8 + 4] // dst_argb
+ mov esi, [esp + 8 + 8] // src_argb
+ mov ecx, [esp + 8 + 12] // dst_width
+ movd xmm2, [esp + 8 + 16] // x
+ movd xmm3, [esp + 8 + 20] // dx
+
+ pshufd xmm2, xmm2, 0 // x0 x0 x0 x0
+ pshufd xmm0, xmm3, 0x11 // dx 0 dx 0
+ paddd xmm2, xmm0
+ paddd xmm3, xmm3 // 0, 0, 0, dx * 2
+ pshufd xmm0, xmm3, 0x05 // dx * 2, dx * 2, 0, 0
+ paddd xmm2, xmm0 // x3 x2 x1 x0
+ paddd xmm3, xmm3 // 0, 0, 0, dx * 4
+ pshufd xmm3, xmm3, 0 // dx * 4, dx * 4, dx * 4, dx * 4
+
+ pextrw eax, xmm2, 1 // get x0 integer.
+ pextrw edx, xmm2, 3 // get x1 integer.
+
+ cmp ecx, 0
+ jle xloop99
+ sub ecx, 4
+ jl xloop49
+
+ // 4 Pixel loop.
+ xloop4:
+ movd xmm0, [esi + eax * 4] // 1 source x0 pixels
+ movd xmm1, [esi + edx * 4] // 1 source x1 pixels
+ pextrw eax, xmm2, 5 // get x2 integer.
+ pextrw edx, xmm2, 7 // get x3 integer.
+ paddd xmm2, xmm3 // x += dx
+ punpckldq xmm0, xmm1 // x0 x1
+
+ movd xmm1, [esi + eax * 4] // 1 source x2 pixels
+ movd xmm4, [esi + edx * 4] // 1 source x3 pixels
+ pextrw eax, xmm2, 1 // get x0 integer. next iteration.
+ pextrw edx, xmm2, 3 // get x1 integer. next iteration.
+ punpckldq xmm1, xmm4 // x2 x3
+ punpcklqdq xmm0, xmm1 // x0 x1 x2 x3
+ movdqu [edi], xmm0
+ lea edi, [edi + 16]
+ sub ecx, 4 // 4 pixels
+ jge xloop4
+
+ xloop49:
+ test ecx, 2
+ je xloop29
+
+ // 2 Pixels.
+ movd xmm0, [esi + eax * 4] // 1 source x0 pixels
+ movd xmm1, [esi + edx * 4] // 1 source x1 pixels
+ pextrw eax, xmm2, 5 // get x2 integer.
+ punpckldq xmm0, xmm1 // x0 x1
+
+ movq qword ptr [edi], xmm0
+ lea edi, [edi + 8]
+
+ xloop29:
+ test ecx, 1
+ je xloop99
+
+ // 1 Pixels.
+ movd xmm0, [esi + eax * 4] // 1 source x2 pixels
+ movd dword ptr [edi], xmm0
+ xloop99:
+
+ pop esi
+ pop edi
+ ret
+ }
+}
+
+// Bilinear row filtering combines 2x1 -> 1x1. SSSE3 version.
+// TODO(fbarchard): Port to Neon
+
+// Shuffle table for arranging 2 pixels into pairs for pmaddubsw
+static const uvec8 kShuffleColARGB = {
+ 0u, 4u, 1u, 5u, 2u, 6u, 3u, 7u, // bbggrraa 1st pixel
+ 8u, 12u, 9u, 13u, 10u, 14u, 11u, 15u // bbggrraa 2nd pixel
+};
+
+// Shuffle table for duplicating 2 fractions into 8 bytes each
+static const uvec8 kShuffleFractions = {
+ 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 4u, 4u, 4u, 4u, 4u, 4u, 4u, 4u,
+};
+
+__declspec(naked) void ScaleARGBFilterCols_SSSE3(uint8_t* dst_argb,
+ const uint8_t* src_argb,
+ int dst_width,
+ int x,
+ int dx) {
+ __asm {
+ push esi
+ push edi
+ mov edi, [esp + 8 + 4] // dst_argb
+ mov esi, [esp + 8 + 8] // src_argb
+ mov ecx, [esp + 8 + 12] // dst_width
+ movd xmm2, [esp + 8 + 16] // x
+ movd xmm3, [esp + 8 + 20] // dx
+ movdqa xmm4, xmmword ptr kShuffleColARGB
+ movdqa xmm5, xmmword ptr kShuffleFractions
+ pcmpeqb xmm6, xmm6 // generate 0x007f for inverting fraction.
+ psrlw xmm6, 9
+ pextrw eax, xmm2, 1 // get x0 integer. preroll
+ sub ecx, 2
+ jl xloop29
+
+ movdqa xmm0, xmm2 // x1 = x0 + dx
+ paddd xmm0, xmm3
+ punpckldq xmm2, xmm0 // x0 x1
+ punpckldq xmm3, xmm3 // dx dx
+ paddd xmm3, xmm3 // dx * 2, dx * 2
+ pextrw edx, xmm2, 3 // get x1 integer. preroll
+
+ // 2 Pixel loop.
+ xloop2:
+ movdqa xmm1, xmm2 // x0, x1 fractions.
+ paddd xmm2, xmm3 // x += dx
+ movq xmm0, qword ptr [esi + eax * 4] // 2 source x0 pixels
+ psrlw xmm1, 9 // 7 bit fractions.
+ movhps xmm0, qword ptr [esi + edx * 4] // 2 source x1 pixels
+ pshufb xmm1, xmm5 // 0000000011111111
+ pshufb xmm0, xmm4 // arrange pixels into pairs
+ pxor xmm1, xmm6 // 0..7f and 7f..0
+ pmaddubsw xmm0, xmm1 // argb_argb 16 bit, 2 pixels.
+ pextrw eax, xmm2, 1 // get x0 integer. next iteration.
+ pextrw edx, xmm2, 3 // get x1 integer. next iteration.
+ psrlw xmm0, 7 // argb 8.7 fixed point to low 8 bits.
+ packuswb xmm0, xmm0 // argb_argb 8 bits, 2 pixels.
+ movq qword ptr [edi], xmm0
+ lea edi, [edi + 8]
+ sub ecx, 2 // 2 pixels
+ jge xloop2
+
+ xloop29:
+
+ add ecx, 2 - 1
+ jl xloop99
+
+ // 1 pixel remainder
+ psrlw xmm2, 9 // 7 bit fractions.
+ movq xmm0, qword ptr [esi + eax * 4] // 2 source x0 pixels
+ pshufb xmm2, xmm5 // 00000000
+ pshufb xmm0, xmm4 // arrange pixels into pairs
+ pxor xmm2, xmm6 // 0..7f and 7f..0
+ pmaddubsw xmm0, xmm2 // argb 16 bit, 1 pixel.
+ psrlw xmm0, 7
+ packuswb xmm0, xmm0 // argb 8 bits, 1 pixel.
+ movd [edi], xmm0
+
+ xloop99:
+
+ pop edi
+ pop esi
+ ret
+ }
+}
+
+// Reads 4 pixels, duplicates them and writes 8 pixels.
+__declspec(naked) void ScaleARGBColsUp2_SSE2(uint8_t* dst_argb,
+ const uint8_t* src_argb,
+ int dst_width,
+ int x,
+ int dx) {
+ __asm {
+ mov edx, [esp + 4] // dst_argb
+ mov eax, [esp + 8] // src_argb
+ mov ecx, [esp + 12] // dst_width
+
+ wloop:
+ movdqu xmm0, [eax]
+ lea eax, [eax + 16]
+ movdqa xmm1, xmm0
+ punpckldq xmm0, xmm0
+ punpckhdq xmm1, xmm1
+ movdqu [edx], xmm0
+ movdqu [edx + 16], xmm1
+ lea edx, [edx + 32]
+ sub ecx, 8
+ jg wloop
+
+ ret
+ }
+}
+
+// Divide num by div and return as 16.16 fixed point result.
+__declspec(naked) int FixedDiv_X86(int num, int div) {
+ __asm {
+ mov eax, [esp + 4] // num
+ cdq // extend num to 64 bits
+ shld edx, eax, 16 // 32.16
+ shl eax, 16
+ idiv dword ptr [esp + 8]
+ ret
+ }
+}
+
+// Divide num by div and return as 16.16 fixed point result.
+__declspec(naked) int FixedDiv1_X86(int num, int div) {
+ __asm {
+ mov eax, [esp + 4] // num
+ mov ecx, [esp + 8] // denom
+ cdq // extend num to 64 bits
+ shld edx, eax, 16 // 32.16
+ shl eax, 16
+ sub eax, 0x00010001
+ sbb edx, 0
+ sub ecx, 1
+ idiv ecx
+ ret
+ }
+}
+#endif // !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86)
+
+#ifdef __cplusplus
+} // extern "C"
+} // namespace libyuv
+#endif
diff --git a/media/libvpx/libvpx/third_party/libyuv/source/video_common.cc b/media/libvpx/libvpx/third_party/libyuv/source/video_common.cc
new file mode 100644
index 0000000000..92384c050c
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/libyuv/source/video_common.cc
@@ -0,0 +1,62 @@
+/*
+ * Copyright 2011 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/video_common.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+struct FourCCAliasEntry {
+ uint32_t alias;
+ uint32_t canonical;
+};
+
+#define NUM_ALIASES 18
+static const struct FourCCAliasEntry kFourCCAliases[NUM_ALIASES] = {
+ {FOURCC_IYUV, FOURCC_I420},
+ {FOURCC_YU12, FOURCC_I420},
+ {FOURCC_YU16, FOURCC_I422},
+ {FOURCC_YU24, FOURCC_I444},
+ {FOURCC_YUYV, FOURCC_YUY2},
+ {FOURCC_YUVS, FOURCC_YUY2}, // kCMPixelFormat_422YpCbCr8_yuvs
+ {FOURCC_HDYC, FOURCC_UYVY},
+ {FOURCC_2VUY, FOURCC_UYVY}, // kCMPixelFormat_422YpCbCr8
+ {FOURCC_JPEG, FOURCC_MJPG}, // Note: JPEG has DHT while MJPG does not.
+ {FOURCC_DMB1, FOURCC_MJPG},
+ {FOURCC_BA81, FOURCC_BGGR}, // deprecated.
+ {FOURCC_RGB3, FOURCC_RAW},
+ {FOURCC_BGR3, FOURCC_24BG},
+ {FOURCC_CM32, FOURCC_BGRA}, // kCMPixelFormat_32ARGB
+ {FOURCC_CM24, FOURCC_RAW}, // kCMPixelFormat_24RGB
+ {FOURCC_L555, FOURCC_RGBO}, // kCMPixelFormat_16LE555
+ {FOURCC_L565, FOURCC_RGBP}, // kCMPixelFormat_16LE565
+ {FOURCC_5551, FOURCC_RGBO}, // kCMPixelFormat_16LE5551
+};
+// TODO(fbarchard): Consider mapping kCMPixelFormat_32BGRA to FOURCC_ARGB.
+// {FOURCC_BGRA, FOURCC_ARGB}, // kCMPixelFormat_32BGRA
+
+LIBYUV_API
+uint32_t CanonicalFourCC(uint32_t fourcc) {
+ int i;
+ for (i = 0; i < NUM_ALIASES; ++i) {
+ if (kFourCCAliases[i].alias == fourcc) {
+ return kFourCCAliases[i].canonical;
+ }
+ }
+ // Not an alias, so return it as-is.
+ return fourcc;
+}
+
+#ifdef __cplusplus
+} // extern "C"
+} // namespace libyuv
+#endif
diff --git a/media/libvpx/libvpx/third_party/x86inc/LICENSE b/media/libvpx/libvpx/third_party/x86inc/LICENSE
new file mode 100644
index 0000000000..7d07645a17
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/x86inc/LICENSE
@@ -0,0 +1,18 @@
+Copyright (C) 2005-2012 x264 project
+
+Authors: Loren Merritt <lorenm@u.washington.edu>
+ Anton Mitrofanov <BugMaster@narod.ru>
+ Jason Garrett-Glaser <darkshikari@gmail.com>
+ Henrik Gramner <hengar-6@student.ltu.se>
+
+Permission to use, copy, modify, and/or distribute this software for any
+purpose with or without fee is hereby granted, provided that the above
+copyright notice and this permission notice appear in all copies.
+
+THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
diff --git a/media/libvpx/libvpx/third_party/x86inc/README.libvpx b/media/libvpx/libvpx/third_party/x86inc/README.libvpx
new file mode 100644
index 0000000000..195654f7bb
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/x86inc/README.libvpx
@@ -0,0 +1,19 @@
+URL: https://git.videolan.org/git/x264.git
+Version: 3e5aed95cc470f37e2db3e6506a8deb89b527720
+License: ISC
+License File: LICENSE
+
+Description:
+x264/libav's framework for x86 assembly. Contains a variety of macros and
+defines that help automatically allow assembly to work cross-platform.
+
+Local Modifications:
+Get configuration from vpx_config.asm.
+Prefix functions with vpx by default.
+Manage name mangling (prefixing with '_') manually because 'PREFIX' does not
+ exist in libvpx.
+Copy PIC 'GLOBAL' macros from x86_abi_support.asm
+Use .text instead of .rodata on macho to avoid broken tables in PIC mode.
+Use .text with no alignment for aout.
+Only use 'hidden' visibility with Chromium.
+Prefix ARCH_* with VPX_.
diff --git a/media/libvpx/libvpx/third_party/x86inc/x86inc.asm b/media/libvpx/libvpx/third_party/x86inc/x86inc.asm
new file mode 100644
index 0000000000..3d55e921c7
--- /dev/null
+++ b/media/libvpx/libvpx/third_party/x86inc/x86inc.asm
@@ -0,0 +1,1923 @@
+;*****************************************************************************
+;* x86inc.asm: x264asm abstraction layer
+;*****************************************************************************
+;* Copyright (C) 2005-2019 x264 project
+;*
+;* Authors: Loren Merritt <lorenm@u.washington.edu>
+;* Henrik Gramner <henrik@gramner.com>
+;* Anton Mitrofanov <BugMaster@narod.ru>
+;* Fiona Glaser <fiona@x264.com>
+;*
+;* Permission to use, copy, modify, and/or distribute this software for any
+;* purpose with or without fee is hereby granted, provided that the above
+;* copyright notice and this permission notice appear in all copies.
+;*
+;* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+;* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+;* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+;* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+;* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+;* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+;* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+;*****************************************************************************
+
+; This is a header file for the x264ASM assembly language, which uses
+; NASM/YASM syntax combined with a large number of macros to provide easy
+; abstraction between different calling conventions (x86_32, win64, linux64).
+; It also has various other useful features to simplify writing the kind of
+; DSP functions that are most often used in x264.
+
+; Unlike the rest of x264, this file is available under an ISC license, as it
+; has significant usefulness outside of x264 and we want it to be available
+; to the largest audience possible. Of course, if you modify it for your own
+; purposes to add a new feature, we strongly encourage contributing a patch
+; as this feature might be useful for others as well. Send patches or ideas
+; to x264-devel@videolan.org .
+
+%include "vpx_config.asm"
+
+%ifndef private_prefix
+ %define private_prefix vpx
+%endif
+
+%ifndef public_prefix
+ %define public_prefix private_prefix
+%endif
+
+%ifndef STACK_ALIGNMENT
+ %if VPX_ARCH_X86_64
+ %define STACK_ALIGNMENT 16
+ %else
+ %define STACK_ALIGNMENT 4
+ %endif
+%endif
+
+%define WIN64 0
+%define UNIX64 0
+%if VPX_ARCH_X86_64
+ %ifidn __OUTPUT_FORMAT__,win32
+ %define WIN64 1
+ %elifidn __OUTPUT_FORMAT__,win64
+ %define WIN64 1
+ %elifidn __OUTPUT_FORMAT__,x64
+ %define WIN64 1
+ %else
+ %define UNIX64 1
+ %endif
+%endif
+
+%define FORMAT_ELF 0
+%define FORMAT_MACHO 0
+%ifidn __OUTPUT_FORMAT__,elf
+ %define FORMAT_ELF 1
+%elifidn __OUTPUT_FORMAT__,elf32
+ %define FORMAT_ELF 1
+%elifidn __OUTPUT_FORMAT__,elf64
+ %define FORMAT_ELF 1
+%elifidn __OUTPUT_FORMAT__,macho
+ %define FORMAT_MACHO 1
+%elifidn __OUTPUT_FORMAT__,macho32
+ %define FORMAT_MACHO 1
+%elifidn __OUTPUT_FORMAT__,macho64
+ %define FORMAT_MACHO 1
+%endif
+
+; Set PREFIX for libvpx builds.
+%if FORMAT_ELF
+ %undef PREFIX
+%elif WIN64
+ %undef PREFIX
+%else
+ %define PREFIX
+%endif
+
+%ifdef PREFIX
+ %define mangle(x) _ %+ x
+%else
+ %define mangle(x) x
+%endif
+
+; In some instances macho32 tables get misaligned when using .rodata.
+; When looking at the disassembly it appears that the offset is either
+; correct or consistently off by 90. Placing them in the .text section
+; works around the issue. It appears to be specific to the way libvpx
+; handles the tables.
+%macro SECTION_RODATA 0-1 16
+ %ifidn __OUTPUT_FORMAT__,win32
+ SECTION .rdata align=%1
+ %elif WIN64
+ SECTION .rdata align=%1
+ %elifidn __OUTPUT_FORMAT__,macho32
+ SECTION .text align=%1
+ fakegot:
+ %elifidn __OUTPUT_FORMAT__,aout
+ SECTION .text
+ %else
+ SECTION .rodata align=%1
+ %endif
+%endmacro
+
+; PIC macros from vpx_ports/x86_abi_support.asm.
+%ifidn __OUTPUT_FORMAT__,elf32
+%define ABI_IS_32BIT 1
+%elifidn __OUTPUT_FORMAT__,macho32
+%define ABI_IS_32BIT 1
+%elifidn __OUTPUT_FORMAT__,win32
+%define ABI_IS_32BIT 1
+%elifidn __OUTPUT_FORMAT__,aout
+%define ABI_IS_32BIT 1
+%else
+%define ABI_IS_32BIT 0
+%endif
+
+%if ABI_IS_32BIT
+ %if CONFIG_PIC=1
+ %ifidn __OUTPUT_FORMAT__,elf32
+ %define GET_GOT_DEFINED 1
+ %define WRT_PLT wrt ..plt
+ %macro GET_GOT 1
+ extern _GLOBAL_OFFSET_TABLE_
+ push %1
+ call %%get_got
+ %%sub_offset:
+ jmp %%exitGG
+ %%get_got:
+ mov %1, [esp]
+ add %1, _GLOBAL_OFFSET_TABLE_ + $$ - %%sub_offset wrt ..gotpc
+ ret
+ %%exitGG:
+ %undef GLOBAL
+ %define GLOBAL(x) x + %1 wrt ..gotoff
+ %undef RESTORE_GOT
+ %define RESTORE_GOT pop %1
+ %endmacro
+ %elifidn __OUTPUT_FORMAT__,macho32
+ %define GET_GOT_DEFINED 1
+ %macro GET_GOT 1
+ push %1
+ call %%get_got
+ %%get_got:
+ pop %1
+ %undef GLOBAL
+ %define GLOBAL(x) x + %1 - %%get_got
+ %undef RESTORE_GOT
+ %define RESTORE_GOT pop %1
+ %endmacro
+ %else
+ %define GET_GOT_DEFINED 0
+ %endif
+ %endif
+
+ %if VPX_ARCH_X86_64 == 0
+ %undef PIC
+ %endif
+
+%else
+ %macro GET_GOT 1
+ %endmacro
+ %define GLOBAL(x) rel x
+ %define WRT_PLT wrt ..plt
+
+ %if WIN64
+ %define PIC
+ %elifidn __OUTPUT_FORMAT__,macho64
+ %define PIC
+ %elif CONFIG_PIC
+ %define PIC
+ %endif
+%endif
+
+%ifnmacro GET_GOT
+ %macro GET_GOT 1
+ %endmacro
+ %define GLOBAL(x) x
+%endif
+%ifndef RESTORE_GOT
+ %define RESTORE_GOT
+%endif
+%ifndef WRT_PLT
+ %define WRT_PLT
+%endif
+
+%ifdef PIC
+ default rel
+%endif
+
+%ifndef GET_GOT_DEFINED
+ %define GET_GOT_DEFINED 0
+%endif
+; End PIC macros from vpx_ports/x86_abi_support.asm.
+
+; libvpx explicitly sets visibilty in shared object builds. Avoid setting
+; visibility to hidden as it may break builds that split sources on e.g.,
+; directory boundaries.
+%ifdef CHROMIUM
+ %define VISIBILITY hidden
+ %define HAVE_PRIVATE_EXTERN 1
+%else
+ %define VISIBILITY
+ %define HAVE_PRIVATE_EXTERN 0
+%endif
+
+%ifdef __NASM_VER__
+ %use smartalign
+ %if __NASM_VERSION_ID__ < 0x020e0000 ; 2.14
+ %define HAVE_PRIVATE_EXTERN 0
+ %endif
+%endif
+
+; Macros to eliminate most code duplication between x86_32 and x86_64:
+; Currently this works only for leaf functions which load all their arguments
+; into registers at the start, and make no other use of the stack. Luckily that
+; covers most of x264's asm.
+
+; PROLOGUE:
+; %1 = number of arguments. loads them from stack if needed.
+; %2 = number of registers used. pushes callee-saved regs if needed.
+; %3 = number of xmm registers used. pushes callee-saved xmm regs if needed.
+; %4 = (optional) stack size to be allocated. The stack will be aligned before
+; allocating the specified stack size. If the required stack alignment is
+; larger than the known stack alignment the stack will be manually aligned
+; and an extra register will be allocated to hold the original stack
+; pointer (to not invalidate r0m etc.). To prevent the use of an extra
+; register as stack pointer, request a negative stack size.
+; %4+/%5+ = list of names to define to registers
+; PROLOGUE can also be invoked by adding the same options to cglobal
+
+; e.g.
+; cglobal foo, 2,3,7,0x40, dst, src, tmp
+; declares a function (foo) that automatically loads two arguments (dst and
+; src) into registers, uses one additional register (tmp) plus 7 vector
+; registers (m0-m6) and allocates 0x40 bytes of stack space.
+
+; TODO Some functions can use some args directly from the stack. If they're the
+; last args then you can just not declare them, but if they're in the middle
+; we need more flexible macro.
+
+; RET:
+; Pops anything that was pushed by PROLOGUE, and returns.
+
+; REP_RET:
+; Use this instead of RET if it's a branch target.
+
+; registers:
+; rN and rNq are the native-size register holding function argument N
+; rNd, rNw, rNb are dword, word, and byte size
+; rNh is the high 8 bits of the word size
+; rNm is the original location of arg N (a register or on the stack), dword
+; rNmp is native size
+
+%macro DECLARE_REG 2-3
+ %define r%1q %2
+ %define r%1d %2d
+ %define r%1w %2w
+ %define r%1b %2b
+ %define r%1h %2h
+ %define %2q %2
+ %if %0 == 2
+ %define r%1m %2d
+ %define r%1mp %2
+ %elif VPX_ARCH_X86_64 ; memory
+ %define r%1m [rstk + stack_offset + %3]
+ %define r%1mp qword r %+ %1 %+ m
+ %else
+ %define r%1m [rstk + stack_offset + %3]
+ %define r%1mp dword r %+ %1 %+ m
+ %endif
+ %define r%1 %2
+%endmacro
+
+%macro DECLARE_REG_SIZE 3
+ %define r%1q r%1
+ %define e%1q r%1
+ %define r%1d e%1
+ %define e%1d e%1
+ %define r%1w %1
+ %define e%1w %1
+ %define r%1h %3
+ %define e%1h %3
+ %define r%1b %2
+ %define e%1b %2
+ %if VPX_ARCH_X86_64 == 0
+ %define r%1 e%1
+ %endif
+%endmacro
+
+DECLARE_REG_SIZE ax, al, ah
+DECLARE_REG_SIZE bx, bl, bh
+DECLARE_REG_SIZE cx, cl, ch
+DECLARE_REG_SIZE dx, dl, dh
+DECLARE_REG_SIZE si, sil, null
+DECLARE_REG_SIZE di, dil, null
+DECLARE_REG_SIZE bp, bpl, null
+
+; t# defines for when per-arch register allocation is more complex than just function arguments
+
+%macro DECLARE_REG_TMP 1-*
+ %assign %%i 0
+ %rep %0
+ CAT_XDEFINE t, %%i, r%1
+ %assign %%i %%i+1
+ %rotate 1
+ %endrep
+%endmacro
+
+%macro DECLARE_REG_TMP_SIZE 0-*
+ %rep %0
+ %define t%1q t%1 %+ q
+ %define t%1d t%1 %+ d
+ %define t%1w t%1 %+ w
+ %define t%1h t%1 %+ h
+ %define t%1b t%1 %+ b
+ %rotate 1
+ %endrep
+%endmacro
+
+DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
+
+%if VPX_ARCH_X86_64
+ %define gprsize 8
+%else
+ %define gprsize 4
+%endif
+
+%macro LEA 2
+%if VPX_ARCH_X86_64
+ lea %1, [%2]
+%elif PIC
+ call $+5 ; special-cased to not affect the RSB on most CPU:s
+ pop %1
+ add %1, (%2)-$+1
+%else
+ mov %1, %2
+%endif
+%endmacro
+
+%macro PUSH 1
+ push %1
+ %ifidn rstk, rsp
+ %assign stack_offset stack_offset+gprsize
+ %endif
+%endmacro
+
+%macro POP 1
+ pop %1
+ %ifidn rstk, rsp
+ %assign stack_offset stack_offset-gprsize
+ %endif
+%endmacro
+
+%macro PUSH_IF_USED 1-*
+ %rep %0
+ %if %1 < regs_used
+ PUSH r%1
+ %endif
+ %rotate 1
+ %endrep
+%endmacro
+
+%macro POP_IF_USED 1-*
+ %rep %0
+ %if %1 < regs_used
+ pop r%1
+ %endif
+ %rotate 1
+ %endrep
+%endmacro
+
+%macro LOAD_IF_USED 1-*
+ %rep %0
+ %if %1 < num_args
+ mov r%1, r %+ %1 %+ mp
+ %endif
+ %rotate 1
+ %endrep
+%endmacro
+
+%macro SUB 2
+ sub %1, %2
+ %ifidn %1, rstk
+ %assign stack_offset stack_offset+(%2)
+ %endif
+%endmacro
+
+%macro ADD 2
+ add %1, %2
+ %ifidn %1, rstk
+ %assign stack_offset stack_offset-(%2)
+ %endif
+%endmacro
+
+%macro movifnidn 2
+ %ifnidn %1, %2
+ mov %1, %2
+ %endif
+%endmacro
+
+%if VPX_ARCH_X86_64 == 0
+ %define movsxd movifnidn
+%endif
+
+%macro movsxdifnidn 2
+ %ifnidn %1, %2
+ movsxd %1, %2
+ %endif
+%endmacro
+
+%macro ASSERT 1
+ %if (%1) == 0
+ %error assertion ``%1'' failed
+ %endif
+%endmacro
+
+%macro DEFINE_ARGS 0-*
+ %ifdef n_arg_names
+ %assign %%i 0
+ %rep n_arg_names
+ CAT_UNDEF arg_name %+ %%i, q
+ CAT_UNDEF arg_name %+ %%i, d
+ CAT_UNDEF arg_name %+ %%i, w
+ CAT_UNDEF arg_name %+ %%i, h
+ CAT_UNDEF arg_name %+ %%i, b
+ CAT_UNDEF arg_name %+ %%i, m
+ CAT_UNDEF arg_name %+ %%i, mp
+ CAT_UNDEF arg_name, %%i
+ %assign %%i %%i+1
+ %endrep
+ %endif
+
+ %xdefine %%stack_offset stack_offset
+ %undef stack_offset ; so that the current value of stack_offset doesn't get baked in by xdefine
+ %assign %%i 0
+ %rep %0
+ %xdefine %1q r %+ %%i %+ q
+ %xdefine %1d r %+ %%i %+ d
+ %xdefine %1w r %+ %%i %+ w
+ %xdefine %1h r %+ %%i %+ h
+ %xdefine %1b r %+ %%i %+ b
+ %xdefine %1m r %+ %%i %+ m
+ %xdefine %1mp r %+ %%i %+ mp
+ CAT_XDEFINE arg_name, %%i, %1
+ %assign %%i %%i+1
+ %rotate 1
+ %endrep
+ %xdefine stack_offset %%stack_offset
+ %assign n_arg_names %0
+%endmacro
+
+%define required_stack_alignment ((mmsize + 15) & ~15)
+%define vzeroupper_required (mmsize > 16 && (VPX_ARCH_X86_64 == 0 || xmm_regs_used > 16 || notcpuflag(avx512)))
+%define high_mm_regs (16*cpuflag(avx512))
+
+%macro ALLOC_STACK 1-2 0 ; stack_size, n_xmm_regs (for win64 only)
+ %ifnum %1
+ %if %1 != 0
+ %assign %%pad 0
+ %assign stack_size %1
+ %if stack_size < 0
+ %assign stack_size -stack_size
+ %endif
+ %if WIN64
+ %assign %%pad %%pad + 32 ; shadow space
+ %if mmsize != 8
+ %assign xmm_regs_used %2
+ %if xmm_regs_used > 8
+ %assign %%pad %%pad + (xmm_regs_used-8)*16 ; callee-saved xmm registers
+ %endif
+ %endif
+ %endif
+ %if required_stack_alignment <= STACK_ALIGNMENT
+ ; maintain the current stack alignment
+ %assign stack_size_padded stack_size + %%pad + ((-%%pad-stack_offset-gprsize) & (STACK_ALIGNMENT-1))
+ SUB rsp, stack_size_padded
+ %else
+ %assign %%reg_num (regs_used - 1)
+ %xdefine rstk r %+ %%reg_num
+ ; align stack, and save original stack location directly above
+ ; it, i.e. in [rsp+stack_size_padded], so we can restore the
+ ; stack in a single instruction (i.e. mov rsp, rstk or mov
+ ; rsp, [rsp+stack_size_padded])
+ %if %1 < 0 ; need to store rsp on stack
+ %xdefine rstkm [rsp + stack_size + %%pad]
+ %assign %%pad %%pad + gprsize
+ %else ; can keep rsp in rstk during whole function
+ %xdefine rstkm rstk
+ %endif
+ %assign stack_size_padded stack_size + ((%%pad + required_stack_alignment-1) & ~(required_stack_alignment-1))
+ mov rstk, rsp
+ and rsp, ~(required_stack_alignment-1)
+ sub rsp, stack_size_padded
+ movifnidn rstkm, rstk
+ %endif
+ WIN64_PUSH_XMM
+ %endif
+ %endif
+%endmacro
+
+%macro SETUP_STACK_POINTER 1
+ %ifnum %1
+ %if %1 != 0 && required_stack_alignment > STACK_ALIGNMENT
+ %if %1 > 0
+ ; Reserve an additional register for storing the original stack pointer, but avoid using
+ ; eax/rax for this purpose since it can potentially get overwritten as a return value.
+ %assign regs_used (regs_used + 1)
+ %if VPX_ARCH_X86_64 && regs_used == 7
+ %assign regs_used 8
+ %elif VPX_ARCH_X86_64 == 0 && regs_used == 1
+ %assign regs_used 2
+ %endif
+ %endif
+ %if VPX_ARCH_X86_64 && regs_used < 5 + UNIX64 * 3
+ ; Ensure that we don't clobber any registers containing arguments. For UNIX64 we also preserve r6 (rax)
+ ; since it's used as a hidden argument in vararg functions to specify the number of vector registers used.
+ %assign regs_used 5 + UNIX64 * 3
+ %endif
+ %endif
+ %endif
+%endmacro
+
+%macro DEFINE_ARGS_INTERNAL 3+
+ %ifnum %2
+ DEFINE_ARGS %3
+ %elif %1 == 4
+ DEFINE_ARGS %2
+ %elif %1 > 4
+ DEFINE_ARGS %2, %3
+ %endif
+%endmacro
+
+%if WIN64 ; Windows x64 ;=================================================
+
+DECLARE_REG 0, rcx
+DECLARE_REG 1, rdx
+DECLARE_REG 2, R8
+DECLARE_REG 3, R9
+DECLARE_REG 4, R10, 40
+DECLARE_REG 5, R11, 48
+DECLARE_REG 6, rax, 56
+DECLARE_REG 7, rdi, 64
+DECLARE_REG 8, rsi, 72
+DECLARE_REG 9, rbx, 80
+DECLARE_REG 10, rbp, 88
+DECLARE_REG 11, R14, 96
+DECLARE_REG 12, R15, 104
+DECLARE_REG 13, R12, 112
+DECLARE_REG 14, R13, 120
+
+%macro PROLOGUE 2-5+ 0 ; #args, #regs, #xmm_regs, [stack_size,] arg_names...
+ %assign num_args %1
+ %assign regs_used %2
+ ASSERT regs_used >= num_args
+ SETUP_STACK_POINTER %4
+ ASSERT regs_used <= 15
+ PUSH_IF_USED 7, 8, 9, 10, 11, 12, 13, 14
+ ALLOC_STACK %4, %3
+ %if mmsize != 8 && stack_size == 0
+ WIN64_SPILL_XMM %3
+ %endif
+ LOAD_IF_USED 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14
+ DEFINE_ARGS_INTERNAL %0, %4, %5
+%endmacro
+
+%macro WIN64_PUSH_XMM 0
+ ; Use the shadow space to store XMM6 and XMM7, the rest needs stack space allocated.
+ %if xmm_regs_used > 6 + high_mm_regs
+ movaps [rstk + stack_offset + 8], xmm6
+ %endif
+ %if xmm_regs_used > 7 + high_mm_regs
+ movaps [rstk + stack_offset + 24], xmm7
+ %endif
+ %assign %%xmm_regs_on_stack xmm_regs_used - high_mm_regs - 8
+ %if %%xmm_regs_on_stack > 0
+ %assign %%i 8
+ %rep %%xmm_regs_on_stack
+ movaps [rsp + (%%i-8)*16 + stack_size + 32], xmm %+ %%i
+ %assign %%i %%i+1
+ %endrep
+ %endif
+%endmacro
+
+%macro WIN64_SPILL_XMM 1
+ %assign xmm_regs_used %1
+ ASSERT xmm_regs_used <= 16 + high_mm_regs
+ %assign %%xmm_regs_on_stack xmm_regs_used - high_mm_regs - 8
+ %if %%xmm_regs_on_stack > 0
+ ; Allocate stack space for callee-saved xmm registers plus shadow space and align the stack.
+ %assign %%pad %%xmm_regs_on_stack*16 + 32
+ %assign stack_size_padded %%pad + ((-%%pad-stack_offset-gprsize) & (STACK_ALIGNMENT-1))
+ SUB rsp, stack_size_padded
+ %endif
+ WIN64_PUSH_XMM
+%endmacro
+
+%macro WIN64_RESTORE_XMM_INTERNAL 0
+ %assign %%pad_size 0
+ %assign %%xmm_regs_on_stack xmm_regs_used - high_mm_regs - 8
+ %if %%xmm_regs_on_stack > 0
+ %assign %%i xmm_regs_used - high_mm_regs
+ %rep %%xmm_regs_on_stack
+ %assign %%i %%i-1
+ movaps xmm %+ %%i, [rsp + (%%i-8)*16 + stack_size + 32]
+ %endrep
+ %endif
+ %if stack_size_padded > 0
+ %if stack_size > 0 && required_stack_alignment > STACK_ALIGNMENT
+ mov rsp, rstkm
+ %else
+ add rsp, stack_size_padded
+ %assign %%pad_size stack_size_padded
+ %endif
+ %endif
+ %if xmm_regs_used > 7 + high_mm_regs
+ movaps xmm7, [rsp + stack_offset - %%pad_size + 24]
+ %endif
+ %if xmm_regs_used > 6 + high_mm_regs
+ movaps xmm6, [rsp + stack_offset - %%pad_size + 8]
+ %endif
+%endmacro
+
+%macro WIN64_RESTORE_XMM 0
+ WIN64_RESTORE_XMM_INTERNAL
+ %assign stack_offset (stack_offset-stack_size_padded)
+ %assign stack_size_padded 0
+ %assign xmm_regs_used 0
+%endmacro
+
+%define has_epilogue regs_used > 7 || stack_size > 0 || vzeroupper_required || xmm_regs_used > 6+high_mm_regs
+
+%macro RET 0
+ WIN64_RESTORE_XMM_INTERNAL
+ POP_IF_USED 14, 13, 12, 11, 10, 9, 8, 7
+ %if vzeroupper_required
+ vzeroupper
+ %endif
+ AUTO_REP_RET
+%endmacro
+
+%elif VPX_ARCH_X86_64 ; *nix x64 ;=============================================
+
+DECLARE_REG 0, rdi
+DECLARE_REG 1, rsi
+DECLARE_REG 2, rdx
+DECLARE_REG 3, rcx
+DECLARE_REG 4, R8
+DECLARE_REG 5, R9
+DECLARE_REG 6, rax, 8
+DECLARE_REG 7, R10, 16
+DECLARE_REG 8, R11, 24
+DECLARE_REG 9, rbx, 32
+DECLARE_REG 10, rbp, 40
+DECLARE_REG 11, R14, 48
+DECLARE_REG 12, R15, 56
+DECLARE_REG 13, R12, 64
+DECLARE_REG 14, R13, 72
+
+%macro PROLOGUE 2-5+ 0 ; #args, #regs, #xmm_regs, [stack_size,] arg_names...
+ %assign num_args %1
+ %assign regs_used %2
+ %assign xmm_regs_used %3
+ ASSERT regs_used >= num_args
+ SETUP_STACK_POINTER %4
+ ASSERT regs_used <= 15
+ PUSH_IF_USED 9, 10, 11, 12, 13, 14
+ ALLOC_STACK %4
+ LOAD_IF_USED 6, 7, 8, 9, 10, 11, 12, 13, 14
+ DEFINE_ARGS_INTERNAL %0, %4, %5
+%endmacro
+
+%define has_epilogue regs_used > 9 || stack_size > 0 || vzeroupper_required
+
+%macro RET 0
+ %if stack_size_padded > 0
+ %if required_stack_alignment > STACK_ALIGNMENT
+ mov rsp, rstkm
+ %else
+ add rsp, stack_size_padded
+ %endif
+ %endif
+ POP_IF_USED 14, 13, 12, 11, 10, 9
+ %if vzeroupper_required
+ vzeroupper
+ %endif
+ AUTO_REP_RET
+%endmacro
+
+%else ; X86_32 ;==============================================================
+
+DECLARE_REG 0, eax, 4
+DECLARE_REG 1, ecx, 8
+DECLARE_REG 2, edx, 12
+DECLARE_REG 3, ebx, 16
+DECLARE_REG 4, esi, 20
+DECLARE_REG 5, edi, 24
+DECLARE_REG 6, ebp, 28
+%define rsp esp
+
+%macro DECLARE_ARG 1-*
+ %rep %0
+ %define r%1m [rstk + stack_offset + 4*%1 + 4]
+ %define r%1mp dword r%1m
+ %rotate 1
+ %endrep
+%endmacro
+
+DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14
+
+%macro PROLOGUE 2-5+ ; #args, #regs, #xmm_regs, [stack_size,] arg_names...
+ %assign num_args %1
+ %assign regs_used %2
+ ASSERT regs_used >= num_args
+ %if num_args > 7
+ %assign num_args 7
+ %endif
+ %if regs_used > 7
+ %assign regs_used 7
+ %endif
+ SETUP_STACK_POINTER %4
+ ASSERT regs_used <= 7
+ PUSH_IF_USED 3, 4, 5, 6
+ ALLOC_STACK %4
+ LOAD_IF_USED 0, 1, 2, 3, 4, 5, 6
+ DEFINE_ARGS_INTERNAL %0, %4, %5
+%endmacro
+
+%define has_epilogue regs_used > 3 || stack_size > 0 || vzeroupper_required
+
+%macro RET 0
+ %if stack_size_padded > 0
+ %if required_stack_alignment > STACK_ALIGNMENT
+ mov rsp, rstkm
+ %else
+ add rsp, stack_size_padded
+ %endif
+ %endif
+ POP_IF_USED 6, 5, 4, 3
+ %if vzeroupper_required
+ vzeroupper
+ %endif
+ AUTO_REP_RET
+%endmacro
+
+%endif ;======================================================================
+
+%if WIN64 == 0
+ %macro WIN64_SPILL_XMM 1
+ %endmacro
+ %macro WIN64_RESTORE_XMM 0
+ %endmacro
+ %macro WIN64_PUSH_XMM 0
+ %endmacro
+%endif
+
+; On AMD cpus <=K10, an ordinary ret is slow if it immediately follows either
+; a branch or a branch target. So switch to a 2-byte form of ret in that case.
+; We can automatically detect "follows a branch", but not a branch target.
+; (SSSE3 is a sufficient condition to know that your cpu doesn't have this problem.)
+%macro REP_RET 0
+ %if has_epilogue || cpuflag(ssse3)
+ RET
+ %else
+ rep ret
+ %endif
+ annotate_function_size
+%endmacro
+
+%define last_branch_adr $$
+%macro AUTO_REP_RET 0
+ %if notcpuflag(ssse3)
+ times ((last_branch_adr-$)>>31)+1 rep ; times 1 iff $ == last_branch_adr.
+ %endif
+ ret
+ annotate_function_size
+%endmacro
+
+%macro BRANCH_INSTR 0-*
+ %rep %0
+ %macro %1 1-2 %1
+ %2 %1
+ %if notcpuflag(ssse3)
+ %%branch_instr equ $
+ %xdefine last_branch_adr %%branch_instr
+ %endif
+ %endmacro
+ %rotate 1
+ %endrep
+%endmacro
+
+BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae, jna, jnae, jb, jbe, jnb, jnbe, jc, jnc, js, jns, jo, jno, jp, jnp
+
+%macro TAIL_CALL 1-2 1 ; callee, is_nonadjacent
+ %if has_epilogue
+ call %1
+ RET
+ %elif %2
+ jmp %1
+ %endif
+ annotate_function_size
+%endmacro
+
+;=============================================================================
+; arch-independent part
+;=============================================================================
+
+%assign function_align 16
+
+; Begin a function.
+; Applies any symbol mangling needed for C linkage, and sets up a define such that
+; subsequent uses of the function name automatically refer to the mangled version.
+; Appends cpuflags to the function name if cpuflags has been specified.
+; The "" empty default parameter is a workaround for nasm, which fails if SUFFIX
+; is empty and we call cglobal_internal with just %1 %+ SUFFIX (without %2).
+%macro cglobal 1-2+ "" ; name, [PROLOGUE args]
+ cglobal_internal 1, %1 %+ SUFFIX, %2
+%endmacro
+%macro cvisible 1-2+ "" ; name, [PROLOGUE args]
+ cglobal_internal 0, %1 %+ SUFFIX, %2
+%endmacro
+%macro cglobal_internal 2-3+
+ annotate_function_size
+ %ifndef cglobaled_%2
+ %if %1
+ %xdefine %2 mangle(private_prefix %+ _ %+ %2)
+ %else
+ %xdefine %2 mangle(public_prefix %+ _ %+ %2)
+ %endif
+ %xdefine %2.skip_prologue %2 %+ .skip_prologue
+ CAT_XDEFINE cglobaled_, %2, 1
+ %endif
+ %xdefine current_function %2
+ %xdefine current_function_section __SECT__
+ %if FORMAT_ELF
+ %if %1
+ global %2:function VISIBILITY
+ %else
+ global %2:function
+ %endif
+ %elif FORMAT_MACHO && HAVE_PRIVATE_EXTERN && %1
+ global %2:private_extern
+ %else
+ global %2
+ %endif
+ align function_align
+ %2:
+ RESET_MM_PERMUTATION ; needed for x86-64, also makes disassembly somewhat nicer
+ %xdefine rstk rsp ; copy of the original stack pointer, used when greater alignment than the known stack alignment is required
+ %assign stack_offset 0 ; stack pointer offset relative to the return address
+ %assign stack_size 0 ; amount of stack space that can be freely used inside a function
+ %assign stack_size_padded 0 ; total amount of allocated stack space, including space for callee-saved xmm registers on WIN64 and alignment padding
+ %assign xmm_regs_used 0 ; number of XMM registers requested, used for dealing with callee-saved registers on WIN64 and vzeroupper
+ %ifnidn %3, ""
+ PROLOGUE %3
+ %endif
+%endmacro
+
+; Create a global symbol from a local label with the correct name mangling and type
+%macro cglobal_label 1
+ %if FORMAT_ELF
+ global current_function %+ %1:function VISIBILITY
+ %elif FORMAT_MACHO && HAVE_PRIVATE_EXTERN
+ global current_function %+ %1:private_extern
+ %else
+ global current_function %+ %1
+ %endif
+ %1:
+%endmacro
+
+%macro cextern 1
+ %xdefine %1 mangle(private_prefix %+ _ %+ %1)
+ CAT_XDEFINE cglobaled_, %1, 1
+ extern %1
+%endmacro
+
+; like cextern, but without the prefix
+%macro cextern_naked 1
+ %ifdef PREFIX
+ %xdefine %1 mangle(%1)
+ %endif
+ CAT_XDEFINE cglobaled_, %1, 1
+ extern %1
+%endmacro
+
+%macro const 1-2+
+ %xdefine %1 mangle(private_prefix %+ _ %+ %1)
+ %if FORMAT_ELF
+ global %1:data VISIBILITY
+ %elif FORMAT_MACHO && HAVE_PRIVATE_EXTERN
+ global %1:private_extern
+ %else
+ global %1
+ %endif
+ %1: %2
+%endmacro
+
+; This is needed for ELF, otherwise the GNU linker assumes the stack is executable by default.
+%if FORMAT_ELF
+ [SECTION .note.GNU-stack noalloc noexec nowrite progbits]
+%endif
+
+; Tell debuggers how large the function was.
+; This may be invoked multiple times per function; we rely on later instances overriding earlier ones.
+; This is invoked by RET and similar macros, and also cglobal does it for the previous function,
+; but if the last function in a source file doesn't use any of the standard macros for its epilogue,
+; then its size might be unspecified.
+%macro annotate_function_size 0
+ %ifdef __YASM_VER__
+ %ifdef current_function
+ %if FORMAT_ELF
+ current_function_section
+ %%ecf equ $
+ size current_function %%ecf - current_function
+ __SECT__
+ %endif
+ %endif
+ %endif
+%endmacro
+
+; cpuflags
+
+%assign cpuflags_mmx (1<<0)
+%assign cpuflags_mmx2 (1<<1) | cpuflags_mmx
+%assign cpuflags_3dnow (1<<2) | cpuflags_mmx
+%assign cpuflags_3dnowext (1<<3) | cpuflags_3dnow
+%assign cpuflags_sse (1<<4) | cpuflags_mmx2
+%assign cpuflags_sse2 (1<<5) | cpuflags_sse
+%assign cpuflags_sse2slow (1<<6) | cpuflags_sse2
+%assign cpuflags_lzcnt (1<<7) | cpuflags_sse2
+%assign cpuflags_sse3 (1<<8) | cpuflags_sse2
+%assign cpuflags_ssse3 (1<<9) | cpuflags_sse3
+%assign cpuflags_sse4 (1<<10)| cpuflags_ssse3
+%assign cpuflags_sse42 (1<<11)| cpuflags_sse4
+%assign cpuflags_aesni (1<<12)| cpuflags_sse42
+%assign cpuflags_gfni (1<<13)| cpuflags_sse42
+%assign cpuflags_avx (1<<14)| cpuflags_sse42
+%assign cpuflags_xop (1<<15)| cpuflags_avx
+%assign cpuflags_fma4 (1<<16)| cpuflags_avx
+%assign cpuflags_fma3 (1<<17)| cpuflags_avx
+%assign cpuflags_bmi1 (1<<18)| cpuflags_avx|cpuflags_lzcnt
+%assign cpuflags_bmi2 (1<<19)| cpuflags_bmi1
+%assign cpuflags_avx2 (1<<20)| cpuflags_fma3|cpuflags_bmi2
+%assign cpuflags_avx512 (1<<21)| cpuflags_avx2 ; F, CD, BW, DQ, VL
+
+%assign cpuflags_cache32 (1<<22)
+%assign cpuflags_cache64 (1<<23)
+%assign cpuflags_aligned (1<<24) ; not a cpu feature, but a function variant
+%assign cpuflags_atom (1<<25)
+
+; Returns a boolean value expressing whether or not the specified cpuflag is enabled.
+%define cpuflag(x) (((((cpuflags & (cpuflags_ %+ x)) ^ (cpuflags_ %+ x)) - 1) >> 31) & 1)
+%define notcpuflag(x) (cpuflag(x) ^ 1)
+
+; Takes an arbitrary number of cpuflags from the above list.
+; All subsequent functions (up to the next INIT_CPUFLAGS) is built for the specified cpu.
+; You shouldn't need to invoke this macro directly, it's a subroutine for INIT_MMX &co.
+%macro INIT_CPUFLAGS 0-*
+ %xdefine SUFFIX
+ %undef cpuname
+ %assign cpuflags 0
+
+ %if %0 >= 1
+ %rep %0
+ %ifdef cpuname
+ %xdefine cpuname cpuname %+ _%1
+ %else
+ %xdefine cpuname %1
+ %endif
+ %assign cpuflags cpuflags | cpuflags_%1
+ %rotate 1
+ %endrep
+ %xdefine SUFFIX _ %+ cpuname
+
+ %if cpuflag(avx)
+ %assign avx_enabled 1
+ %endif
+ %if (mmsize == 16 && notcpuflag(sse2)) || (mmsize == 32 && notcpuflag(avx2))
+ %define mova movaps
+ %define movu movups
+ %define movnta movntps
+ %endif
+ %if cpuflag(aligned)
+ %define movu mova
+ %elif cpuflag(sse3) && notcpuflag(ssse3)
+ %define movu lddqu
+ %endif
+ %endif
+
+ %if VPX_ARCH_X86_64 || cpuflag(sse2)
+ %ifdef __NASM_VER__
+ ALIGNMODE p6
+ %else
+ CPU amdnop
+ %endif
+ %else
+ %ifdef __NASM_VER__
+ ALIGNMODE nop
+ %else
+ CPU basicnop
+ %endif
+ %endif
+%endmacro
+
+; Merge mmx, sse*, and avx*
+; m# is a simd register of the currently selected size
+; xm# is the corresponding xmm register if mmsize >= 16, otherwise the same as m#
+; ym# is the corresponding ymm register if mmsize >= 32, otherwise the same as m#
+; zm# is the corresponding zmm register if mmsize >= 64, otherwise the same as m#
+; (All 4 remain in sync through SWAP.)
+
+%macro CAT_XDEFINE 3
+ %xdefine %1%2 %3
+%endmacro
+
+%macro CAT_UNDEF 2
+ %undef %1%2
+%endmacro
+
+%macro DEFINE_MMREGS 1 ; mmtype
+ %assign %%prev_mmregs 0
+ %ifdef num_mmregs
+ %assign %%prev_mmregs num_mmregs
+ %endif
+
+ %assign num_mmregs 8
+ %if VPX_ARCH_X86_64 && mmsize >= 16
+ %assign num_mmregs 16
+ %if cpuflag(avx512) || mmsize == 64
+ %assign num_mmregs 32
+ %endif
+ %endif
+
+ %assign %%i 0
+ %rep num_mmregs
+ CAT_XDEFINE m, %%i, %1 %+ %%i
+ CAT_XDEFINE nn%1, %%i, %%i
+ %assign %%i %%i+1
+ %endrep
+ %if %%prev_mmregs > num_mmregs
+ %rep %%prev_mmregs - num_mmregs
+ CAT_UNDEF m, %%i
+ CAT_UNDEF nn %+ mmtype, %%i
+ %assign %%i %%i+1
+ %endrep
+ %endif
+ %xdefine mmtype %1
+%endmacro
+
+; Prefer registers 16-31 over 0-15 to avoid having to use vzeroupper
+%macro AVX512_MM_PERMUTATION 0-1 0 ; start_reg
+ %if VPX_ARCH_X86_64 && cpuflag(avx512)
+ %assign %%i %1
+ %rep 16-%1
+ %assign %%i_high %%i+16
+ SWAP %%i, %%i_high
+ %assign %%i %%i+1
+ %endrep
+ %endif
+%endmacro
+
+%macro INIT_MMX 0-1+
+ %assign avx_enabled 0
+ %define RESET_MM_PERMUTATION INIT_MMX %1
+ %define mmsize 8
+ %define mova movq
+ %define movu movq
+ %define movh movd
+ %define movnta movntq
+ INIT_CPUFLAGS %1
+ DEFINE_MMREGS mm
+%endmacro
+
+%macro INIT_XMM 0-1+
+ %assign avx_enabled 0
+ %define RESET_MM_PERMUTATION INIT_XMM %1
+ %define mmsize 16
+ %define mova movdqa
+ %define movu movdqu
+ %define movh movq
+ %define movnta movntdq
+ INIT_CPUFLAGS %1
+ DEFINE_MMREGS xmm
+ %if WIN64
+ AVX512_MM_PERMUTATION 6 ; Swap callee-saved registers with volatile registers
+ %endif
+%endmacro
+
+%macro INIT_YMM 0-1+
+ %assign avx_enabled 1
+ %define RESET_MM_PERMUTATION INIT_YMM %1
+ %define mmsize 32
+ %define mova movdqa
+ %define movu movdqu
+ %undef movh
+ %define movnta movntdq
+ INIT_CPUFLAGS %1
+ DEFINE_MMREGS ymm
+ AVX512_MM_PERMUTATION
+%endmacro
+
+%macro INIT_ZMM 0-1+
+ %assign avx_enabled 1
+ %define RESET_MM_PERMUTATION INIT_ZMM %1
+ %define mmsize 64
+ %define mova movdqa
+ %define movu movdqu
+ %undef movh
+ %define movnta movntdq
+ INIT_CPUFLAGS %1
+ DEFINE_MMREGS zmm
+ AVX512_MM_PERMUTATION
+%endmacro
+
+INIT_XMM
+
+%macro DECLARE_MMCAST 1
+ %define mmmm%1 mm%1
+ %define mmxmm%1 mm%1
+ %define mmymm%1 mm%1
+ %define mmzmm%1 mm%1
+ %define xmmmm%1 mm%1
+ %define xmmxmm%1 xmm%1
+ %define xmmymm%1 xmm%1
+ %define xmmzmm%1 xmm%1
+ %define ymmmm%1 mm%1
+ %define ymmxmm%1 xmm%1
+ %define ymmymm%1 ymm%1
+ %define ymmzmm%1 ymm%1
+ %define zmmmm%1 mm%1
+ %define zmmxmm%1 xmm%1
+ %define zmmymm%1 ymm%1
+ %define zmmzmm%1 zmm%1
+ %define xm%1 xmm %+ m%1
+ %define ym%1 ymm %+ m%1
+ %define zm%1 zmm %+ m%1
+%endmacro
+
+%assign i 0
+%rep 32
+ DECLARE_MMCAST i
+ %assign i i+1
+%endrep
+
+; I often want to use macros that permute their arguments. e.g. there's no
+; efficient way to implement butterfly or transpose or dct without swapping some
+; arguments.
+;
+; I would like to not have to manually keep track of the permutations:
+; If I insert a permutation in the middle of a function, it should automatically
+; change everything that follows. For more complex macros I may also have multiple
+; implementations, e.g. the SSE2 and SSSE3 versions may have different permutations.
+;
+; Hence these macros. Insert a PERMUTE or some SWAPs at the end of a macro that
+; permutes its arguments. It's equivalent to exchanging the contents of the
+; registers, except that this way you exchange the register names instead, so it
+; doesn't cost any cycles.
+
+%macro PERMUTE 2-* ; takes a list of pairs to swap
+ %rep %0/2
+ %xdefine %%tmp%2 m%2
+ %rotate 2
+ %endrep
+ %rep %0/2
+ %xdefine m%1 %%tmp%2
+ CAT_XDEFINE nn, m%1, %1
+ %rotate 2
+ %endrep
+%endmacro
+
+%macro SWAP 2+ ; swaps a single chain (sometimes more concise than pairs)
+ %ifnum %1 ; SWAP 0, 1, ...
+ SWAP_INTERNAL_NUM %1, %2
+ %else ; SWAP m0, m1, ...
+ SWAP_INTERNAL_NAME %1, %2
+ %endif
+%endmacro
+
+%macro SWAP_INTERNAL_NUM 2-*
+ %rep %0-1
+ %xdefine %%tmp m%1
+ %xdefine m%1 m%2
+ %xdefine m%2 %%tmp
+ CAT_XDEFINE nn, m%1, %1
+ CAT_XDEFINE nn, m%2, %2
+ %rotate 1
+ %endrep
+%endmacro
+
+%macro SWAP_INTERNAL_NAME 2-*
+ %xdefine %%args nn %+ %1
+ %rep %0-1
+ %xdefine %%args %%args, nn %+ %2
+ %rotate 1
+ %endrep
+ SWAP_INTERNAL_NUM %%args
+%endmacro
+
+; If SAVE_MM_PERMUTATION is placed at the end of a function, then any later
+; calls to that function will automatically load the permutation, so values can
+; be returned in mmregs.
+%macro SAVE_MM_PERMUTATION 0-1
+ %if %0
+ %xdefine %%f %1_m
+ %else
+ %xdefine %%f current_function %+ _m
+ %endif
+ %assign %%i 0
+ %rep num_mmregs
+ %xdefine %%tmp m %+ %%i
+ CAT_XDEFINE %%f, %%i, regnumof %+ %%tmp
+ %assign %%i %%i+1
+ %endrep
+%endmacro
+
+%macro LOAD_MM_PERMUTATION 0-1 ; name to load from
+ %if %0
+ %xdefine %%f %1_m
+ %else
+ %xdefine %%f current_function %+ _m
+ %endif
+ %xdefine %%tmp %%f %+ 0
+ %ifnum %%tmp
+ RESET_MM_PERMUTATION
+ %assign %%i 0
+ %rep num_mmregs
+ %xdefine %%tmp %%f %+ %%i
+ CAT_XDEFINE %%m, %%i, m %+ %%tmp
+ %assign %%i %%i+1
+ %endrep
+ %rep num_mmregs
+ %assign %%i %%i-1
+ CAT_XDEFINE m, %%i, %%m %+ %%i
+ CAT_XDEFINE nn, m %+ %%i, %%i
+ %endrep
+ %endif
+%endmacro
+
+; Append cpuflags to the callee's name iff the appended name is known and the plain name isn't
+%macro call 1
+ %ifid %1
+ call_internal %1 %+ SUFFIX, %1
+ %else
+ call %1
+ %endif
+%endmacro
+%macro call_internal 2
+ %xdefine %%i %2
+ %ifndef cglobaled_%2
+ %ifdef cglobaled_%1
+ %xdefine %%i %1
+ %endif
+ %endif
+ call %%i
+ LOAD_MM_PERMUTATION %%i
+%endmacro
+
+; Substitutions that reduce instruction size but are functionally equivalent
+%macro add 2
+ %ifnum %2
+ %if %2==128
+ sub %1, -128
+ %else
+ add %1, %2
+ %endif
+ %else
+ add %1, %2
+ %endif
+%endmacro
+
+%macro sub 2
+ %ifnum %2
+ %if %2==128
+ add %1, -128
+ %else
+ sub %1, %2
+ %endif
+ %else
+ sub %1, %2
+ %endif
+%endmacro
+
+;=============================================================================
+; AVX abstraction layer
+;=============================================================================
+
+%assign i 0
+%rep 32
+ %if i < 8
+ CAT_XDEFINE sizeofmm, i, 8
+ CAT_XDEFINE regnumofmm, i, i
+ %endif
+ CAT_XDEFINE sizeofxmm, i, 16
+ CAT_XDEFINE sizeofymm, i, 32
+ CAT_XDEFINE sizeofzmm, i, 64
+ CAT_XDEFINE regnumofxmm, i, i
+ CAT_XDEFINE regnumofymm, i, i
+ CAT_XDEFINE regnumofzmm, i, i
+ %assign i i+1
+%endrep
+%undef i
+
+%macro CHECK_AVX_INSTR_EMU 3-*
+ %xdefine %%opcode %1
+ %xdefine %%dst %2
+ %rep %0-2
+ %ifidn %%dst, %3
+ %error non-avx emulation of ``%%opcode'' is not supported
+ %endif
+ %rotate 1
+ %endrep
+%endmacro
+
+;%1 == instruction
+;%2 == minimal instruction set
+;%3 == 1 if float, 0 if int
+;%4 == 1 if 4-operand emulation, 0 if 3-operand emulation, 255 otherwise (no emulation)
+;%5 == 1 if commutative (i.e. doesn't matter which src arg is which), 0 if not
+;%6+: operands
+%macro RUN_AVX_INSTR 6-9+
+ %ifnum sizeof%7
+ %assign __sizeofreg sizeof%7
+ %elifnum sizeof%6
+ %assign __sizeofreg sizeof%6
+ %else
+ %assign __sizeofreg mmsize
+ %endif
+ %assign __emulate_avx 0
+ %if avx_enabled && __sizeofreg >= 16
+ %xdefine __instr v%1
+ %else
+ %xdefine __instr %1
+ %if %0 >= 8+%4
+ %assign __emulate_avx 1
+ %endif
+ %endif
+ %ifnidn %2, fnord
+ %ifdef cpuname
+ %if notcpuflag(%2)
+ %error use of ``%1'' %2 instruction in cpuname function: current_function
+ %elif %3 == 0 && __sizeofreg == 16 && notcpuflag(sse2)
+ %error use of ``%1'' sse2 instruction in cpuname function: current_function
+ %elif %3 == 0 && __sizeofreg == 32 && notcpuflag(avx2)
+ %error use of ``%1'' avx2 instruction in cpuname function: current_function
+ %elif __sizeofreg == 16 && notcpuflag(sse)
+ %error use of ``%1'' sse instruction in cpuname function: current_function
+ %elif __sizeofreg == 32 && notcpuflag(avx)
+ %error use of ``%1'' avx instruction in cpuname function: current_function
+ %elif __sizeofreg == 64 && notcpuflag(avx512)
+ %error use of ``%1'' avx512 instruction in cpuname function: current_function
+ %elifidn %1, pextrw ; special case because the base instruction is mmx2,
+ %ifnid %6 ; but sse4 is required for memory operands
+ %if notcpuflag(sse4)
+ %error use of ``%1'' sse4 instruction in cpuname function: current_function
+ %endif
+ %endif
+ %endif
+ %endif
+ %endif
+
+ %if __emulate_avx
+ %xdefine __src1 %7
+ %xdefine __src2 %8
+ %if %5 && %4 == 0
+ %ifnidn %6, %7
+ %ifidn %6, %8
+ %xdefine __src1 %8
+ %xdefine __src2 %7
+ %elifnnum sizeof%8
+ ; 3-operand AVX instructions with a memory arg can only have it in src2,
+ ; whereas SSE emulation prefers to have it in src1 (i.e. the mov).
+ ; So, if the instruction is commutative with a memory arg, swap them.
+ %xdefine __src1 %8
+ %xdefine __src2 %7
+ %endif
+ %endif
+ %endif
+ %ifnidn %6, __src1
+ %if %0 >= 9
+ CHECK_AVX_INSTR_EMU {%1 %6, %7, %8, %9}, %6, __src2, %9
+ %else
+ CHECK_AVX_INSTR_EMU {%1 %6, %7, %8}, %6, __src2
+ %endif
+ %if __sizeofreg == 8
+ MOVQ %6, __src1
+ %elif %3
+ MOVAPS %6, __src1
+ %else
+ MOVDQA %6, __src1
+ %endif
+ %endif
+ %if %0 >= 9
+ %1 %6, __src2, %9
+ %else
+ %1 %6, __src2
+ %endif
+ %elif %0 >= 9
+ __instr %6, %7, %8, %9
+ %elif %0 == 8
+ %if avx_enabled && %5
+ %xdefine __src1 %7
+ %xdefine __src2 %8
+ %ifnum regnumof%7
+ %ifnum regnumof%8
+ %if regnumof%7 < 8 && regnumof%8 >= 8 && regnumof%8 < 16 && sizeof%8 <= 32
+ ; Most VEX-encoded instructions require an additional byte to encode when
+ ; src2 is a high register (e.g. m8..15). If the instruction is commutative
+ ; we can swap src1 and src2 when doing so reduces the instruction length.
+ %xdefine __src1 %8
+ %xdefine __src2 %7
+ %endif
+ %endif
+ %endif
+ __instr %6, __src1, __src2
+ %else
+ __instr %6, %7, %8
+ %endif
+ %elif %0 == 7
+ %if avx_enabled && %5
+ %xdefine __src1 %6
+ %xdefine __src2 %7
+ %ifnum regnumof%6
+ %ifnum regnumof%7
+ %if regnumof%6 < 8 && regnumof%7 >= 8 && regnumof%7 < 16 && sizeof%7 <= 32
+ %xdefine __src1 %7
+ %xdefine __src2 %6
+ %endif
+ %endif
+ %endif
+ __instr %6, __src1, __src2
+ %else
+ __instr %6, %7
+ %endif
+ %else
+ __instr %6
+ %endif
+%endmacro
+
+;%1 == instruction
+;%2 == minimal instruction set
+;%3 == 1 if float, 0 if int
+;%4 == 1 if 4-operand emulation, 0 if 3-operand emulation, 255 otherwise (no emulation)
+;%5 == 1 if commutative (i.e. doesn't matter which src arg is which), 0 if not
+%macro AVX_INSTR 1-5 fnord, 0, 255, 0
+ %macro %1 1-10 fnord, fnord, fnord, fnord, %1, %2, %3, %4, %5
+ %ifidn %2, fnord
+ RUN_AVX_INSTR %6, %7, %8, %9, %10, %1
+ %elifidn %3, fnord
+ RUN_AVX_INSTR %6, %7, %8, %9, %10, %1, %2
+ %elifidn %4, fnord
+ RUN_AVX_INSTR %6, %7, %8, %9, %10, %1, %2, %3
+ %elifidn %5, fnord
+ RUN_AVX_INSTR %6, %7, %8, %9, %10, %1, %2, %3, %4
+ %else
+ RUN_AVX_INSTR %6, %7, %8, %9, %10, %1, %2, %3, %4, %5
+ %endif
+ %endmacro
+%endmacro
+
+; Instructions with both VEX/EVEX and legacy encodings
+; Non-destructive instructions are written without parameters
+AVX_INSTR addpd, sse2, 1, 0, 1
+AVX_INSTR addps, sse, 1, 0, 1
+AVX_INSTR addsd, sse2, 1, 0, 0
+AVX_INSTR addss, sse, 1, 0, 0
+AVX_INSTR addsubpd, sse3, 1, 0, 0
+AVX_INSTR addsubps, sse3, 1, 0, 0
+AVX_INSTR aesdec, aesni, 0, 0, 0
+AVX_INSTR aesdeclast, aesni, 0, 0, 0
+AVX_INSTR aesenc, aesni, 0, 0, 0
+AVX_INSTR aesenclast, aesni, 0, 0, 0
+AVX_INSTR aesimc, aesni
+AVX_INSTR aeskeygenassist, aesni
+AVX_INSTR andnpd, sse2, 1, 0, 0
+AVX_INSTR andnps, sse, 1, 0, 0
+AVX_INSTR andpd, sse2, 1, 0, 1
+AVX_INSTR andps, sse, 1, 0, 1
+AVX_INSTR blendpd, sse4, 1, 1, 0
+AVX_INSTR blendps, sse4, 1, 1, 0
+AVX_INSTR blendvpd, sse4 ; can't be emulated
+AVX_INSTR blendvps, sse4 ; can't be emulated
+AVX_INSTR cmpeqpd, sse2, 1, 0, 1
+AVX_INSTR cmpeqps, sse, 1, 0, 1
+AVX_INSTR cmpeqsd, sse2, 1, 0, 0
+AVX_INSTR cmpeqss, sse, 1, 0, 0
+AVX_INSTR cmplepd, sse2, 1, 0, 0
+AVX_INSTR cmpleps, sse, 1, 0, 0
+AVX_INSTR cmplesd, sse2, 1, 0, 0
+AVX_INSTR cmpless, sse, 1, 0, 0
+AVX_INSTR cmpltpd, sse2, 1, 0, 0
+AVX_INSTR cmpltps, sse, 1, 0, 0
+AVX_INSTR cmpltsd, sse2, 1, 0, 0
+AVX_INSTR cmpltss, sse, 1, 0, 0
+AVX_INSTR cmpneqpd, sse2, 1, 0, 1
+AVX_INSTR cmpneqps, sse, 1, 0, 1
+AVX_INSTR cmpneqsd, sse2, 1, 0, 0
+AVX_INSTR cmpneqss, sse, 1, 0, 0
+AVX_INSTR cmpnlepd, sse2, 1, 0, 0
+AVX_INSTR cmpnleps, sse, 1, 0, 0
+AVX_INSTR cmpnlesd, sse2, 1, 0, 0
+AVX_INSTR cmpnless, sse, 1, 0, 0
+AVX_INSTR cmpnltpd, sse2, 1, 0, 0
+AVX_INSTR cmpnltps, sse, 1, 0, 0
+AVX_INSTR cmpnltsd, sse2, 1, 0, 0
+AVX_INSTR cmpnltss, sse, 1, 0, 0
+AVX_INSTR cmpordpd, sse2 1, 0, 1
+AVX_INSTR cmpordps, sse 1, 0, 1
+AVX_INSTR cmpordsd, sse2 1, 0, 0
+AVX_INSTR cmpordss, sse 1, 0, 0
+AVX_INSTR cmppd, sse2, 1, 1, 0
+AVX_INSTR cmpps, sse, 1, 1, 0
+AVX_INSTR cmpsd, sse2, 1, 1, 0
+AVX_INSTR cmpss, sse, 1, 1, 0
+AVX_INSTR cmpunordpd, sse2, 1, 0, 1
+AVX_INSTR cmpunordps, sse, 1, 0, 1
+AVX_INSTR cmpunordsd, sse2, 1, 0, 0
+AVX_INSTR cmpunordss, sse, 1, 0, 0
+AVX_INSTR comisd, sse2, 1
+AVX_INSTR comiss, sse, 1
+AVX_INSTR cvtdq2pd, sse2, 1
+AVX_INSTR cvtdq2ps, sse2, 1
+AVX_INSTR cvtpd2dq, sse2, 1
+AVX_INSTR cvtpd2ps, sse2, 1
+AVX_INSTR cvtps2dq, sse2, 1
+AVX_INSTR cvtps2pd, sse2, 1
+AVX_INSTR cvtsd2si, sse2, 1
+AVX_INSTR cvtsd2ss, sse2, 1, 0, 0
+AVX_INSTR cvtsi2sd, sse2, 1, 0, 0
+AVX_INSTR cvtsi2ss, sse, 1, 0, 0
+AVX_INSTR cvtss2sd, sse2, 1, 0, 0
+AVX_INSTR cvtss2si, sse, 1
+AVX_INSTR cvttpd2dq, sse2, 1
+AVX_INSTR cvttps2dq, sse2, 1
+AVX_INSTR cvttsd2si, sse2, 1
+AVX_INSTR cvttss2si, sse, 1
+AVX_INSTR divpd, sse2, 1, 0, 0
+AVX_INSTR divps, sse, 1, 0, 0
+AVX_INSTR divsd, sse2, 1, 0, 0
+AVX_INSTR divss, sse, 1, 0, 0
+AVX_INSTR dppd, sse4, 1, 1, 0
+AVX_INSTR dpps, sse4, 1, 1, 0
+AVX_INSTR extractps, sse4, 1
+AVX_INSTR gf2p8affineinvqb, gfni, 0, 1, 0
+AVX_INSTR gf2p8affineqb, gfni, 0, 1, 0
+AVX_INSTR gf2p8mulb, gfni, 0, 0, 0
+AVX_INSTR haddpd, sse3, 1, 0, 0
+AVX_INSTR haddps, sse3, 1, 0, 0
+AVX_INSTR hsubpd, sse3, 1, 0, 0
+AVX_INSTR hsubps, sse3, 1, 0, 0
+AVX_INSTR insertps, sse4, 1, 1, 0
+AVX_INSTR lddqu, sse3
+AVX_INSTR ldmxcsr, sse, 1
+AVX_INSTR maskmovdqu, sse2
+AVX_INSTR maxpd, sse2, 1, 0, 1
+AVX_INSTR maxps, sse, 1, 0, 1
+AVX_INSTR maxsd, sse2, 1, 0, 0
+AVX_INSTR maxss, sse, 1, 0, 0
+AVX_INSTR minpd, sse2, 1, 0, 1
+AVX_INSTR minps, sse, 1, 0, 1
+AVX_INSTR minsd, sse2, 1, 0, 0
+AVX_INSTR minss, sse, 1, 0, 0
+AVX_INSTR movapd, sse2, 1
+AVX_INSTR movaps, sse, 1
+AVX_INSTR movd, mmx
+AVX_INSTR movddup, sse3, 1
+AVX_INSTR movdqa, sse2
+AVX_INSTR movdqu, sse2
+AVX_INSTR movhlps, sse, 1, 0, 0
+AVX_INSTR movhpd, sse2, 1, 0, 0
+AVX_INSTR movhps, sse, 1, 0, 0
+AVX_INSTR movlhps, sse, 1, 0, 0
+AVX_INSTR movlpd, sse2, 1, 0, 0
+AVX_INSTR movlps, sse, 1, 0, 0
+AVX_INSTR movmskpd, sse2, 1
+AVX_INSTR movmskps, sse, 1
+AVX_INSTR movntdq, sse2
+AVX_INSTR movntdqa, sse4
+AVX_INSTR movntpd, sse2, 1
+AVX_INSTR movntps, sse, 1
+AVX_INSTR movq, mmx
+AVX_INSTR movsd, sse2, 1, 0, 0
+AVX_INSTR movshdup, sse3, 1
+AVX_INSTR movsldup, sse3, 1
+AVX_INSTR movss, sse, 1, 0, 0
+AVX_INSTR movupd, sse2, 1
+AVX_INSTR movups, sse, 1
+AVX_INSTR mpsadbw, sse4, 0, 1, 0
+AVX_INSTR mulpd, sse2, 1, 0, 1
+AVX_INSTR mulps, sse, 1, 0, 1
+AVX_INSTR mulsd, sse2, 1, 0, 0
+AVX_INSTR mulss, sse, 1, 0, 0
+AVX_INSTR orpd, sse2, 1, 0, 1
+AVX_INSTR orps, sse, 1, 0, 1
+AVX_INSTR pabsb, ssse3
+AVX_INSTR pabsd, ssse3
+AVX_INSTR pabsw, ssse3
+AVX_INSTR packsswb, mmx, 0, 0, 0
+AVX_INSTR packssdw, mmx, 0, 0, 0
+AVX_INSTR packuswb, mmx, 0, 0, 0
+AVX_INSTR packusdw, sse4, 0, 0, 0
+AVX_INSTR paddb, mmx, 0, 0, 1
+AVX_INSTR paddw, mmx, 0, 0, 1
+AVX_INSTR paddd, mmx, 0, 0, 1
+AVX_INSTR paddq, sse2, 0, 0, 1
+AVX_INSTR paddsb, mmx, 0, 0, 1
+AVX_INSTR paddsw, mmx, 0, 0, 1
+AVX_INSTR paddusb, mmx, 0, 0, 1
+AVX_INSTR paddusw, mmx, 0, 0, 1
+AVX_INSTR palignr, ssse3, 0, 1, 0
+AVX_INSTR pand, mmx, 0, 0, 1
+AVX_INSTR pandn, mmx, 0, 0, 0
+AVX_INSTR pavgb, mmx2, 0, 0, 1
+AVX_INSTR pavgw, mmx2, 0, 0, 1
+AVX_INSTR pblendvb, sse4 ; can't be emulated
+AVX_INSTR pblendw, sse4, 0, 1, 0
+AVX_INSTR pclmulqdq, fnord, 0, 1, 0
+AVX_INSTR pclmulhqhqdq, fnord, 0, 0, 0
+AVX_INSTR pclmulhqlqdq, fnord, 0, 0, 0
+AVX_INSTR pclmullqhqdq, fnord, 0, 0, 0
+AVX_INSTR pclmullqlqdq, fnord, 0, 0, 0
+AVX_INSTR pcmpestri, sse42
+AVX_INSTR pcmpestrm, sse42
+AVX_INSTR pcmpistri, sse42
+AVX_INSTR pcmpistrm, sse42
+AVX_INSTR pcmpeqb, mmx, 0, 0, 1
+AVX_INSTR pcmpeqw, mmx, 0, 0, 1
+AVX_INSTR pcmpeqd, mmx, 0, 0, 1
+AVX_INSTR pcmpeqq, sse4, 0, 0, 1
+AVX_INSTR pcmpgtb, mmx, 0, 0, 0
+AVX_INSTR pcmpgtw, mmx, 0, 0, 0
+AVX_INSTR pcmpgtd, mmx, 0, 0, 0
+AVX_INSTR pcmpgtq, sse42, 0, 0, 0
+AVX_INSTR pextrb, sse4
+AVX_INSTR pextrd, sse4
+AVX_INSTR pextrq, sse4
+AVX_INSTR pextrw, mmx2
+AVX_INSTR phaddw, ssse3, 0, 0, 0
+AVX_INSTR phaddd, ssse3, 0, 0, 0
+AVX_INSTR phaddsw, ssse3, 0, 0, 0
+AVX_INSTR phminposuw, sse4
+AVX_INSTR phsubw, ssse3, 0, 0, 0
+AVX_INSTR phsubd, ssse3, 0, 0, 0
+AVX_INSTR phsubsw, ssse3, 0, 0, 0
+AVX_INSTR pinsrb, sse4, 0, 1, 0
+AVX_INSTR pinsrd, sse4, 0, 1, 0
+AVX_INSTR pinsrq, sse4, 0, 1, 0
+AVX_INSTR pinsrw, mmx2, 0, 1, 0
+AVX_INSTR pmaddwd, mmx, 0, 0, 1
+AVX_INSTR pmaddubsw, ssse3, 0, 0, 0
+AVX_INSTR pmaxsb, sse4, 0, 0, 1
+AVX_INSTR pmaxsw, mmx2, 0, 0, 1
+AVX_INSTR pmaxsd, sse4, 0, 0, 1
+AVX_INSTR pmaxub, mmx2, 0, 0, 1
+AVX_INSTR pmaxuw, sse4, 0, 0, 1
+AVX_INSTR pmaxud, sse4, 0, 0, 1
+AVX_INSTR pminsb, sse4, 0, 0, 1
+AVX_INSTR pminsw, mmx2, 0, 0, 1
+AVX_INSTR pminsd, sse4, 0, 0, 1
+AVX_INSTR pminub, mmx2, 0, 0, 1
+AVX_INSTR pminuw, sse4, 0, 0, 1
+AVX_INSTR pminud, sse4, 0, 0, 1
+AVX_INSTR pmovmskb, mmx2
+AVX_INSTR pmovsxbw, sse4
+AVX_INSTR pmovsxbd, sse4
+AVX_INSTR pmovsxbq, sse4
+AVX_INSTR pmovsxwd, sse4
+AVX_INSTR pmovsxwq, sse4
+AVX_INSTR pmovsxdq, sse4
+AVX_INSTR pmovzxbw, sse4
+AVX_INSTR pmovzxbd, sse4
+AVX_INSTR pmovzxbq, sse4
+AVX_INSTR pmovzxwd, sse4
+AVX_INSTR pmovzxwq, sse4
+AVX_INSTR pmovzxdq, sse4
+AVX_INSTR pmuldq, sse4, 0, 0, 1
+AVX_INSTR pmulhrsw, ssse3, 0, 0, 1
+AVX_INSTR pmulhuw, mmx2, 0, 0, 1
+AVX_INSTR pmulhw, mmx, 0, 0, 1
+AVX_INSTR pmullw, mmx, 0, 0, 1
+AVX_INSTR pmulld, sse4, 0, 0, 1
+AVX_INSTR pmuludq, sse2, 0, 0, 1
+AVX_INSTR por, mmx, 0, 0, 1
+AVX_INSTR psadbw, mmx2, 0, 0, 1
+AVX_INSTR pshufb, ssse3, 0, 0, 0
+AVX_INSTR pshufd, sse2
+AVX_INSTR pshufhw, sse2
+AVX_INSTR pshuflw, sse2
+AVX_INSTR psignb, ssse3, 0, 0, 0
+AVX_INSTR psignw, ssse3, 0, 0, 0
+AVX_INSTR psignd, ssse3, 0, 0, 0
+AVX_INSTR psllw, mmx, 0, 0, 0
+AVX_INSTR pslld, mmx, 0, 0, 0
+AVX_INSTR psllq, mmx, 0, 0, 0
+AVX_INSTR pslldq, sse2, 0, 0, 0
+AVX_INSTR psraw, mmx, 0, 0, 0
+AVX_INSTR psrad, mmx, 0, 0, 0
+AVX_INSTR psrlw, mmx, 0, 0, 0
+AVX_INSTR psrld, mmx, 0, 0, 0
+AVX_INSTR psrlq, mmx, 0, 0, 0
+AVX_INSTR psrldq, sse2, 0, 0, 0
+AVX_INSTR psubb, mmx, 0, 0, 0
+AVX_INSTR psubw, mmx, 0, 0, 0
+AVX_INSTR psubd, mmx, 0, 0, 0
+AVX_INSTR psubq, sse2, 0, 0, 0
+AVX_INSTR psubsb, mmx, 0, 0, 0
+AVX_INSTR psubsw, mmx, 0, 0, 0
+AVX_INSTR psubusb, mmx, 0, 0, 0
+AVX_INSTR psubusw, mmx, 0, 0, 0
+AVX_INSTR ptest, sse4
+AVX_INSTR punpckhbw, mmx, 0, 0, 0
+AVX_INSTR punpckhwd, mmx, 0, 0, 0
+AVX_INSTR punpckhdq, mmx, 0, 0, 0
+AVX_INSTR punpckhqdq, sse2, 0, 0, 0
+AVX_INSTR punpcklbw, mmx, 0, 0, 0
+AVX_INSTR punpcklwd, mmx, 0, 0, 0
+AVX_INSTR punpckldq, mmx, 0, 0, 0
+AVX_INSTR punpcklqdq, sse2, 0, 0, 0
+AVX_INSTR pxor, mmx, 0, 0, 1
+AVX_INSTR rcpps, sse, 1
+AVX_INSTR rcpss, sse, 1, 0, 0
+AVX_INSTR roundpd, sse4, 1
+AVX_INSTR roundps, sse4, 1
+AVX_INSTR roundsd, sse4, 1, 1, 0
+AVX_INSTR roundss, sse4, 1, 1, 0
+AVX_INSTR rsqrtps, sse, 1
+AVX_INSTR rsqrtss, sse, 1, 0, 0
+AVX_INSTR shufpd, sse2, 1, 1, 0
+AVX_INSTR shufps, sse, 1, 1, 0
+AVX_INSTR sqrtpd, sse2, 1
+AVX_INSTR sqrtps, sse, 1
+AVX_INSTR sqrtsd, sse2, 1, 0, 0
+AVX_INSTR sqrtss, sse, 1, 0, 0
+AVX_INSTR stmxcsr, sse, 1
+AVX_INSTR subpd, sse2, 1, 0, 0
+AVX_INSTR subps, sse, 1, 0, 0
+AVX_INSTR subsd, sse2, 1, 0, 0
+AVX_INSTR subss, sse, 1, 0, 0
+AVX_INSTR ucomisd, sse2, 1
+AVX_INSTR ucomiss, sse, 1
+AVX_INSTR unpckhpd, sse2, 1, 0, 0
+AVX_INSTR unpckhps, sse, 1, 0, 0
+AVX_INSTR unpcklpd, sse2, 1, 0, 0
+AVX_INSTR unpcklps, sse, 1, 0, 0
+AVX_INSTR xorpd, sse2, 1, 0, 1
+AVX_INSTR xorps, sse, 1, 0, 1
+
+; 3DNow instructions, for sharing code between AVX, SSE and 3DN
+AVX_INSTR pfadd, 3dnow, 1, 0, 1
+AVX_INSTR pfsub, 3dnow, 1, 0, 0
+AVX_INSTR pfmul, 3dnow, 1, 0, 1
+
+;%1 == instruction
+;%2 == minimal instruction set
+%macro GPR_INSTR 2
+ %macro %1 2-5 fnord, %1, %2
+ %ifdef cpuname
+ %if notcpuflag(%5)
+ %error use of ``%4'' %5 instruction in cpuname function: current_function
+ %endif
+ %endif
+ %ifidn %3, fnord
+ %4 %1, %2
+ %else
+ %4 %1, %2, %3
+ %endif
+ %endmacro
+%endmacro
+
+GPR_INSTR andn, bmi1
+GPR_INSTR bextr, bmi1
+GPR_INSTR blsi, bmi1
+GPR_INSTR blsr, bmi1
+GPR_INSTR blsmsk, bmi1
+GPR_INSTR bzhi, bmi2
+GPR_INSTR mulx, bmi2
+GPR_INSTR pdep, bmi2
+GPR_INSTR pext, bmi2
+GPR_INSTR popcnt, sse42
+GPR_INSTR rorx, bmi2
+GPR_INSTR sarx, bmi2
+GPR_INSTR shlx, bmi2
+GPR_INSTR shrx, bmi2
+
+; base-4 constants for shuffles
+%assign i 0
+%rep 256
+ %assign j ((i>>6)&3)*1000 + ((i>>4)&3)*100 + ((i>>2)&3)*10 + (i&3)
+ %if j < 10
+ CAT_XDEFINE q000, j, i
+ %elif j < 100
+ CAT_XDEFINE q00, j, i
+ %elif j < 1000
+ CAT_XDEFINE q0, j, i
+ %else
+ CAT_XDEFINE q, j, i
+ %endif
+ %assign i i+1
+%endrep
+%undef i
+%undef j
+
+%macro FMA_INSTR 3
+ %macro %1 4-7 %1, %2, %3
+ %if cpuflag(xop)
+ v%5 %1, %2, %3, %4
+ %elifnidn %1, %4
+ %6 %1, %2, %3
+ %7 %1, %4
+ %else
+ %error non-xop emulation of ``%5 %1, %2, %3, %4'' is not supported
+ %endif
+ %endmacro
+%endmacro
+
+FMA_INSTR pmacsww, pmullw, paddw
+FMA_INSTR pmacsdd, pmulld, paddd ; sse4 emulation
+FMA_INSTR pmacsdql, pmuldq, paddq ; sse4 emulation
+FMA_INSTR pmadcswd, pmaddwd, paddd
+
+; Macros for consolidating FMA3 and FMA4 using 4-operand (dst, src1, src2, src3) syntax.
+; FMA3 is only possible if dst is the same as one of the src registers.
+; Either src2 or src3 can be a memory operand.
+%macro FMA4_INSTR 2-*
+ %push fma4_instr
+ %xdefine %$prefix %1
+ %rep %0 - 1
+ %macro %$prefix%2 4-6 %$prefix, %2
+ %if notcpuflag(fma3) && notcpuflag(fma4)
+ %error use of ``%5%6'' fma instruction in cpuname function: current_function
+ %elif cpuflag(fma4)
+ v%5%6 %1, %2, %3, %4
+ %elifidn %1, %2
+ ; If %3 or %4 is a memory operand it needs to be encoded as the last operand.
+ %ifnum sizeof%3
+ v%{5}213%6 %2, %3, %4
+ %else
+ v%{5}132%6 %2, %4, %3
+ %endif
+ %elifidn %1, %3
+ v%{5}213%6 %3, %2, %4
+ %elifidn %1, %4
+ v%{5}231%6 %4, %2, %3
+ %else
+ %error fma3 emulation of ``%5%6 %1, %2, %3, %4'' is not supported
+ %endif
+ %endmacro
+ %rotate 1
+ %endrep
+ %pop
+%endmacro
+
+FMA4_INSTR fmadd, pd, ps, sd, ss
+FMA4_INSTR fmaddsub, pd, ps
+FMA4_INSTR fmsub, pd, ps, sd, ss
+FMA4_INSTR fmsubadd, pd, ps
+FMA4_INSTR fnmadd, pd, ps, sd, ss
+FMA4_INSTR fnmsub, pd, ps, sd, ss
+
+; Macros for converting VEX instructions to equivalent EVEX ones.
+%macro EVEX_INSTR 2-3 0 ; vex, evex, prefer_evex
+ %macro %1 2-7 fnord, fnord, %1, %2, %3
+ %ifidn %3, fnord
+ %define %%args %1, %2
+ %elifidn %4, fnord
+ %define %%args %1, %2, %3
+ %else
+ %define %%args %1, %2, %3, %4
+ %endif
+ %assign %%evex_required cpuflag(avx512) & %7
+ %ifnum regnumof%1
+ %if regnumof%1 >= 16 || sizeof%1 > 32
+ %assign %%evex_required 1
+ %endif
+ %endif
+ %ifnum regnumof%2
+ %if regnumof%2 >= 16 || sizeof%2 > 32
+ %assign %%evex_required 1
+ %endif
+ %endif
+ %ifnum regnumof%3
+ %if regnumof%3 >= 16 || sizeof%3 > 32
+ %assign %%evex_required 1
+ %endif
+ %endif
+ %if %%evex_required
+ %6 %%args
+ %else
+ %5 %%args ; Prefer VEX over EVEX due to shorter instruction length
+ %endif
+ %endmacro
+%endmacro
+
+EVEX_INSTR vbroadcastf128, vbroadcastf32x4
+EVEX_INSTR vbroadcasti128, vbroadcasti32x4
+EVEX_INSTR vextractf128, vextractf32x4
+EVEX_INSTR vextracti128, vextracti32x4
+EVEX_INSTR vinsertf128, vinsertf32x4
+EVEX_INSTR vinserti128, vinserti32x4
+EVEX_INSTR vmovdqa, vmovdqa32
+EVEX_INSTR vmovdqu, vmovdqu32
+EVEX_INSTR vpand, vpandd
+EVEX_INSTR vpandn, vpandnd
+EVEX_INSTR vpor, vpord
+EVEX_INSTR vpxor, vpxord
+EVEX_INSTR vrcpps, vrcp14ps, 1 ; EVEX versions have higher precision
+EVEX_INSTR vrcpss, vrcp14ss, 1
+EVEX_INSTR vrsqrtps, vrsqrt14ps, 1
+EVEX_INSTR vrsqrtss, vrsqrt14ss, 1