diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 17:32:43 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 17:32:43 +0000 |
commit | 6bf0a5cb5034a7e684dcc3500e841785237ce2dd (patch) | |
tree | a68f146d7fa01f0134297619fbe7e33db084e0aa /tools/fuzzing | |
parent | Initial commit. (diff) | |
download | thunderbird-6bf0a5cb5034a7e684dcc3500e841785237ce2dd.tar.xz thunderbird-6bf0a5cb5034a7e684dcc3500e841785237ce2dd.zip |
Adding upstream version 1:115.7.0.upstream/1%115.7.0upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
103 files changed, 16046 insertions, 0 deletions
diff --git a/tools/fuzzing/common/FuzzingMutate.cpp b/tools/fuzzing/common/FuzzingMutate.cpp new file mode 100644 index 0000000000..bb5e930125 --- /dev/null +++ b/tools/fuzzing/common/FuzzingMutate.cpp @@ -0,0 +1,32 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "FuzzingMutate.h" +#include "FuzzingTraits.h" + +namespace mozilla { +namespace fuzzing { + +/** + * Randomly mutates a byte inside |aData| by using bit manipulation. + */ +/* static */ +void FuzzingMutate::ChangeBit(uint8_t* aData, size_t aLength) { + size_t offset = RandomIntegerRange<size_t>(0, aLength); + aData[offset] ^= (1 << FuzzingTraits::Random(9)); +} + +/** + * Randomly replaces a byte inside |aData| with one in the range of [0, 255]. + */ +/* static */ +void FuzzingMutate::ChangeByte(uint8_t* aData, size_t aLength) { + size_t offset = RandomIntegerRange<size_t>(0, aLength); + aData[offset] = RandomInteger<unsigned char>(); +} + +} // namespace fuzzing +} // namespace mozilla diff --git a/tools/fuzzing/common/FuzzingMutate.h b/tools/fuzzing/common/FuzzingMutate.h new file mode 100644 index 0000000000..f24f557669 --- /dev/null +++ b/tools/fuzzing/common/FuzzingMutate.h @@ -0,0 +1,24 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef mozilla_fuzzing_FuzzingMutate_h +#define mozilla_fuzzing_FuzzingMutate_h + +#include <random> + +namespace mozilla { +namespace fuzzing { + +class FuzzingMutate { + public: + static void ChangeBit(uint8_t* aData, size_t aLength); + static void ChangeByte(uint8_t* aData, size_t aLength); +}; + +} // namespace fuzzing +} // namespace mozilla + +#endif diff --git a/tools/fuzzing/common/FuzzingTraits.cpp b/tools/fuzzing/common/FuzzingTraits.cpp new file mode 100644 index 0000000000..9e6ba3ac1d --- /dev/null +++ b/tools/fuzzing/common/FuzzingTraits.cpp @@ -0,0 +1,41 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include <prinrval.h> +#include <thread> +#include <mutex> +#include "FuzzingTraits.h" + +namespace mozilla { +namespace fuzzing { + +/* static */ +unsigned int FuzzingTraits::Random(unsigned int aMax) { + MOZ_ASSERT(aMax > 0, "aMax needs to be bigger than 0"); + std::uniform_int_distribution<unsigned int> d(0, aMax); + return d(Rng()); +} + +/* static */ +bool FuzzingTraits::Sometimes(unsigned int aProbability) { + return FuzzingTraits::Random(aProbability) == 0; +} + +/* static */ +size_t FuzzingTraits::Frequency(const size_t aSize, const uint64_t aFactor) { + return RandomIntegerRange<size_t>(0, ceil(float(aSize) / aFactor)) + 1; +} + +/* static */ +std::mt19937_64& FuzzingTraits::Rng() { + static std::mt19937_64 rng; + static std::once_flag flag; + std::call_once(flag, [&] { rng.seed(PR_IntervalNow()); }); + return rng; +} + +} // namespace fuzzing +} // namespace mozilla diff --git a/tools/fuzzing/common/FuzzingTraits.h b/tools/fuzzing/common/FuzzingTraits.h new file mode 100644 index 0000000000..b8c9d76ab7 --- /dev/null +++ b/tools/fuzzing/common/FuzzingTraits.h @@ -0,0 +1,117 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef mozilla_fuzzing_FuzzingTraits_h +#define mozilla_fuzzing_FuzzingTraits_h + +#include "mozilla/Assertions.h" +#include <cmath> +#include <random> +#include <type_traits> + +namespace mozilla { +namespace fuzzing { + +class FuzzingTraits { + public: + static unsigned int Random(unsigned int aMax); + static bool Sometimes(unsigned int aProbability); + /** + * Frequency() defines how many mutations of a kind shall be applied to a + * target buffer by using a user definable factor. The higher the factor, + * the less mutations are being made. + */ + static size_t Frequency(const size_t aSize, const uint64_t aFactor); + + static std::mt19937_64& Rng(); +}; + +/** + * RandomNumericLimit returns either the min or max limit of an arithmetic + * data type. + */ +template <typename T> +T RandomNumericLimit() { + static_assert(std::is_arithmetic_v<T> == true, + "T must be an arithmetic type"); + return FuzzingTraits::Sometimes(2) ? std::numeric_limits<T>::min() + : std::numeric_limits<T>::max(); +} + +/** + * RandomInteger generates negative and positive integers in 2**n increments. + */ +template <typename T> +T RandomInteger() { + static_assert(std::is_integral_v<T> == true, "T must be an integral type"); + double r = + static_cast<double>(FuzzingTraits::Random((sizeof(T) * CHAR_BIT) + 1)); + T x = static_cast<T>(pow(2.0, r)) - 1; + if (std::numeric_limits<T>::is_signed && FuzzingTraits::Sometimes(2)) { + return (x * -1) - 1; + } + return x; +} + +/** + * RandomIntegerRange returns a random integral within a [min, max] range. + */ +template <typename T> +T RandomIntegerRange(T min, T max) { + static_assert(std::is_integral_v<T> == true, "T must be an integral type"); + MOZ_ASSERT(min < max); + std::uniform_int_distribution<T> d(min, max); + return d(FuzzingTraits::Rng()); +} +/** + * uniform_int_distribution is undefined for char/uchar. Need to handle them + * separately. + */ +template <> +inline unsigned char RandomIntegerRange(unsigned char min, unsigned char max) { + MOZ_ASSERT(min < max); + std::uniform_int_distribution<unsigned short> d(min, max); + return static_cast<unsigned char>(d(FuzzingTraits::Rng())); +} +template <> +inline char RandomIntegerRange(char min, char max) { + MOZ_ASSERT(min < max); + std::uniform_int_distribution<short> d(min, max); + return static_cast<char>(d(FuzzingTraits::Rng())); +} + +/** + * RandomFloatingPointRange returns a random floating-point number within a + * [min, max] range. + */ +template <typename T> +T RandomFloatingPointRange(T min, T max) { + static_assert(std::is_floating_point_v<T> == true, + "T must be a floating point type"); + MOZ_ASSERT(min < max); + std::uniform_real_distribution<T> d( + min, std::nextafter(max, std::numeric_limits<T>::max())); + return d(FuzzingTraits::Rng()); +} + +/** + * RandomFloatingPoint returns a random floating-point number in 2**n + * increments. + */ +template <typename T> +T RandomFloatingPoint() { + static_assert(std::is_floating_point_v<T> == true, + "T must be a floating point type"); + int radix = RandomIntegerRange<int>(std::numeric_limits<T>::min_exponent, + std::numeric_limits<T>::max_exponent); + T x = static_cast<T>(pow(2.0, static_cast<double>(radix))); + return x * RandomFloatingPointRange<T>(-1.0, 1.0); +} + +} // namespace fuzzing +} // namespace mozilla + +#endif diff --git a/tools/fuzzing/common/moz.build b/tools/fuzzing/common/moz.build new file mode 100644 index 0000000000..afa19eddb4 --- /dev/null +++ b/tools/fuzzing/common/moz.build @@ -0,0 +1,11 @@ +# -*- Mode: python; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +SOURCES += ["FuzzingMutate.cpp", "FuzzingTraits.cpp"] + +EXPORTS += ["FuzzingMutate.h", "FuzzingTraits.h"] + +FINAL_LIBRARY = "xul" diff --git a/tools/fuzzing/docs/fuzzing_interface.rst b/tools/fuzzing/docs/fuzzing_interface.rst new file mode 100644 index 0000000000..561727d540 --- /dev/null +++ b/tools/fuzzing/docs/fuzzing_interface.rst @@ -0,0 +1,496 @@ +Fuzzing Interface +================= + +The fuzzing interface is glue code living in mozilla-central in order to +make it easier for developers and security researchers to test C/C++ +code with either `libFuzzer <https://llvm.org/docs/LibFuzzer.html>`__ or +`afl-fuzz <http://lcamtuf.coredump.cx/afl/>`__. + +These fuzzing tools, are based on *compile-time instrumentation* to measure +things like branch coverage and more advanced heuristics per fuzzing test. +Doing so allows these tools to progress through code with little to no custom +logic/knowledge implemented in the fuzzer itself. Usually, the only thing +these tools need is a code "shim" that provides the entry point for the fuzzer +to the code to be tested. We call this additional code a *fuzzing target* and +the rest of this manual describes how to implement and work with these targets. + +As for the tools used with these targets, we currently recommend the use of +libFuzzer over afl-fuzz, as the latter is no longer maintained while libFuzzer +is being actively developed. Furthermore, libFuzzer has some advanced +instrumentation features (e.g. value profiling to deal with complicated +comparisons in code), making it overall more effective. + +What can be tested? +~~~~~~~~~~~~~~~~~~~ + +The interface can be used to test all C/C++ code that either ends up in +``libxul`` (more precisely, the gtest version of ``libxul``) **or** is +part of the JS engine. + +Note that this is not the right testing approach for testing the full +browser as a whole. It is rather meant for component-based testing +(especially as some components cannot be easily separated out of the +full build). + +.. note:: + + **Note:** If you are working on the JS engine (trying to reproduce a + bug or seeking to develop a new fuzzing target), then please also read + the :ref:`JS Engine Specifics Section <JS Engine Specifics>` at the end + of this documentation, as the JS engine offers additional options for + implementing and running fuzzing targets. + + +Reproducing bugs for existing fuzzing targets +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If you are working on a bug that involves an existing fuzzing interface target, +you have two options for reproducing the issue: + + +Using existing builds +^^^^^^^^^^^^^^^^^^^^^ + +We have several fuzzing builds in CI that you can simply download. We recommend +using ``fuzzfetch`` for this purpose, as it makes downloading and unpacking +these builds much easier. + +You can install ``fuzzfetch`` from +`Github <https://github.com/MozillaSecurity/fuzzfetch>`__ or +`via pip <https://pypi.org/project/fuzzfetch/>`__. + +Afterwards, you can run + +:: + + $ python -m fuzzfetch -a --fuzzing --gtest -n firefox-fuzzing + +to fetch the latest optimized build. Alternatively, we offer non-ASan debug builds +which you can download using + +:: + + $ python -m fuzzfetch -d --fuzzing --gtest -n firefox-fuzzing + +In both commands, ``firefox-fuzzing`` indicates the name of the directory that +will be created for the download. + +Afterwards, you can reproduce the bug using + +:: + + $ FUZZER=TargetName firefox-fuzzing/firefox test.bin + +assuming that ``TargetName`` is the name of the fuzzing target specified in the +bug you are working on and ``test.bin`` is the attached testcase. + +.. note:: + + **Note:** You should not export the ``FUZZER`` variable permanently + in your shell, especially if you plan to do local builds. If the ``FUZZER`` + variable is exported, it will affect the build process. + +If the CI builds don't meet your requirements and you need a local build instead, +you can follow the steps below to create one: + +.. _Local build requirements and flags: + +Local build requirements and flags +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +You will need a Linux environment with a recent Clang. Using the Clang downloaded +by ``./mach bootstrap`` or a newer version is recommended. + +The only build flag required to enable the fuzzing targets is ``--enable-fuzzing``, +so adding + +:: + + ac_add_options --enable-fuzzing + +to your ``.mozconfig`` is already sufficient for producing a fuzzing build. +However, for improved crash handling capabilities and to detect additional errors, +it is strongly recommended to combine libFuzzer with :ref:`AddressSanitizer <Address Sanitizer>` +at least for optimized builds and bugs requiring ASan to reproduce at all +(e.g. you are working on a bug where ASan reports a memory safety violation +of some sort). + +Once your build is complete, if you want to run gtests, you **must** additionally run + +:: + + $ ./mach gtest dontruntests + +to force the gtest libxul to be built. + +.. note:: + + **Note:** If you modify any code, please ensure that you run **both** build + commands to ensure that the gtest libxul is also rebuilt. It is a common mistake + to only run ``./mach build`` and miss the second command. + +Once these steps are complete, you can reproduce the bug locally using the same +steps as described above for the downloaded builds. + + +Developing new fuzzing targets +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Developing a new fuzzing target using the fuzzing interface only requires a few steps. + + +Determine if the fuzzing interface is the right tool +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The fuzzing interface is not suitable for every kind of testing. In particular +if your testing requires the full browser to be running, then you might want to +look into other testing methods. + +The interface uses the ``ScopedXPCOM`` implementation to provide an environment +in which XPCOM is available and initialized. You can initialize further subsystems +that you might require, but you are responsible yourself for any kind of +initialization steps. + +There is (in theory) no limit as to how far you can take browser initialization. +However, the more subsystems are involved, the more problems might occur due to +non-determinism and loss of performance. + +If you are unsure if the fuzzing interface is the right approach for you or you +require help in evaluating what could be done for your particular task, please +don't hestitate to :ref:`contact us <Fuzzing#contact-us>`. + + +Develop the fuzzing code +^^^^^^^^^^^^^^^^^^^^^^^^ + +Where to put your fuzzing code +'''''''''''''''''''''''''''''' + +The code using the fuzzing interface usually lives in a separate directory +called ``fuzztest`` that is on the same level as gtests. If your component +has no gtests, then a subdirectory either in tests or in your main directory +will work. If such a directory does not exist yet in your component, then you +need to create one with a suitable ``moz.build``. See `the transport target +for an example <https://searchfox.org/mozilla-central/source/dom/media/webrtc/transport/fuzztest/moz.build>`__ + +In order to include the new subdirectory into the build process, you will +also have to modify the toplevel ``moz.build`` file accordingly. For this +purpose, you should add your directory to ``TEST_DIRS`` only if ``FUZZING_INTERFACES`` +is set. See again `the transport target for an example +<https://searchfox.org/mozilla-central/rev/de7676288a78b70d2b9927c79493adbf294faad5/media/mtransport/moz.build#18-24>`__. + +How your code should look like +'''''''''''''''''''''''''''''' + +In order to define your fuzzing target ``MyTarget``, you only need to implement 2 functions: + +1. A one-time initialization function. + + At startup, the fuzzing interface calls this function **once**, so this can + be used to perform one-time operations like initializing subsystems or parsing + extra fuzzing options. + + This function is the equivalent of the `LLVMFuzzerInitialize <https://llvm.org/docs/LibFuzzer.html#startup-initialization>`__ + function and has the same signature. However, with our fuzzing interface, + it won't be resolved by its name, so it can be defined ``static`` and called + whatever you prefer. Note that the function should always ``return 0`` and + can (except for the return), remain empty. + + For the sake of this documentation, we assume that you have ``static int FuzzingInitMyTarget(int* argc, char*** argv);`` + +2. The fuzzing iteration function. + + This is where the actual fuzzing happens, and this function is the equivalent + of `LLVMFuzzerTestOneInput <https://llvm.org/docs/LibFuzzer.html#fuzz-target>`__. + Again, the difference to the fuzzing interface is that the function won't be + resolved by its name. In addition, we offer two different possible signatures + for this function, either + + ``static int FuzzingRunMyTarget(const uint8_t* data, size_t size);`` + + or + + ``static int FuzzingRunMyTarget(nsCOMPtr<nsIInputStream> inputStream);`` + + The latter is just a wrapper around the first one for implementations that + usually work with streams. No matter which of the two signatures you choose + to work with, the only thing you need to implement inside the function + is the use of the provided data with your target implementation. This can + mean to simply feed the data to your target, using the data to drive operations + on the target API, or a mix of both. + + While doing so, you should avoid altering global state in a permanent way, + using additional sources of data/randomness or having code run beyond the + lifetime of the iteration function (e.g. on another thread), for one simple + reason: Coverage-guided fuzzing tools depend on the **deterministic** nature + of the iteration function. If the same input to this function does not lead + to the same execution when run twice (e.g. because the resulting state depends + on multiple successive calls or because of additional external influences), + then the tool will not be able to reproduce its fuzzing progress and perform + badly. Dealing with this restriction can be challenging e.g. when dealing + with asynchronous targets that run multi-threaded, but can usually be managed + by synchronizing execution on all threads at the end of the iteration function. + For implementations accumulating global state, it might be necessary to + (re)initialize this global state in each iteration, rather than doing it once + in the initialization function, even if this costs additional performance. + + Note that unlike the vanilla libFuzzer approach, you are allowed to ``return 1`` + in this function to indicate that an input is "bad". Doing so will cause + libFuzzer to discard the input, no matter if it generated new coverage or not. + This is particularly useful if you have means to internally detect and catch + bad testcase behavior such as timeouts/excessive resource usage etc. to avoid + these tests to end up in your corpus. + + +Once you have implemented the two functions, the only thing remaining is to +register them with the fuzzing interface. For this purpose, we offer two +macros, depending on which iteration function signature you used. If you +sticked to the classic signature using buffer and size, you can simply use + +:: + + #include "FuzzingInterface.h" + + // Your includes and code + + MOZ_FUZZING_INTERFACE_RAW(FuzzingInitMyTarget, FuzzingRunMyTarget, MyTarget); + +where ``MyTarget`` is the name of the target and will be used later to decide +at runtime which target should be used. + +If instead you went for the streaming interface, you need a different include, +but the macro invocation is quite similar: + +:: + + #include "FuzzingInterfaceStream.h" + + // Your includes and code + + MOZ_FUZZING_INTERFACE_STREAM(FuzzingInitMyTarget, FuzzingRunMyTarget, MyTarget); + +For a live example, see also the `implementation of the STUN fuzzing target +<https://searchfox.org/mozilla-central/source/dom/media/webrtc/transport/fuzztest/stun_parser_libfuzz.cpp>`__. + +Add instrumentation to the code being tested +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +libFuzzer requires that the code you are trying to test is instrumented +with special compiler flags. Fortunately, adding these on a per-directory basis +can be done just by including the following directive in each ``moz.build`` +file that builds code under test: + +:: + + # Add libFuzzer configuration directives + include('/tools/fuzzing/libfuzzer-config.mozbuild') + + +The include already does the appropriate configuration checks to be only +active in fuzzing builds, so you don't have to guard this in any way. + +.. note:: + + **Note:** This include modifies `CFLAGS` and `CXXFLAGS` accordingly + but this only works for source files defined in this particular + directory. The flags are **not** propagated to subdirectories automatically + and you have to ensure that each directory that builds source files + for your target has the include added to its ``moz.build`` file. + +By keeping the instrumentation limited to the parts that are actually being +tested using this tool, you not only increase the performance but also potentially +reduce the amount of noise that libFuzzer sees. + + +Build your code +^^^^^^^^^^^^^^^ + +See the :ref:`Build instructions above <Local build requirements and flags>` for instructions +how to modify your ``.mozconfig`` to create the appropriate build. + + +Running your code and building a corpus +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +You need to set the following environment variable to enable running the +fuzzing code inside Firefox instead of the regular browser. + +- ``FUZZER=name`` + +Where ``name`` is the name of your fuzzing module that you specified +when calling the ``MOZ_FUZZING_INTERFACE_RAW`` macro. For the example +above, this would be ``MyTarget`` or ``StunParser`` for the live example. + +Now when you invoke the firefox binary in your build directory with the +``-help=1`` parameter, you should see the regular libFuzzer help. On +Linux for example: + +:: + + $ FUZZER=StunParser obj-asan/dist/bin/firefox -help=1 + +You should see an output similar to this: + +:: + + Running Fuzzer tests... + Usage: + + To run fuzzing pass 0 or more directories. + obj-asan/dist/bin/firefox [-flag1=val1 [-flag2=val2 ...] ] [dir1 [dir2 ...] ] + + To run individual tests without fuzzing pass 1 or more files: + obj-asan/dist/bin/firefox [-flag1=val1 [-flag2=val2 ...] ] file1 [file2 ...] + + Flags: (strictly in form -flag=value) + verbosity 1 Verbosity level. + seed 0 Random seed. If 0, seed is generated. + runs -1 Number of individual test runs (-1 for infinite runs). + max_len 0 Maximum length of the test input. If 0, libFuzzer tries to guess a good value based on the corpus and reports it. + ... + + +Reproducing a Crash +''''''''''''''''''' + +In order to reproduce a crash from a given test file, simply put the +file as the only argument on the command line, e.g. + +:: + + $ FUZZER=StunParser obj-asan/dist/bin/firefox test.bin + +This should reproduce the given problem. + + +FuzzManager and libFuzzer +''''''''''''''''''''''''' + +Our FuzzManager project comes with a harness for running libFuzzer with +an optional connection to a FuzzManager server instance. Note that this +connection is not mandatory, even without a server you can make use of +the local harness. + +You can find the harness +`here <https://github.com/MozillaSecurity/FuzzManager/tree/master/misc/afl-libfuzzer>`__. + +An example invocation for the harness to use with StunParser could look +like this: + +:: + + FUZZER=StunParser python /path/to/afl-libfuzzer-daemon.py --fuzzmanager \ + --stats libfuzzer-stunparser.stats --libfuzzer-auto-reduce-min 500 --libfuzzer-auto-reduce 30 \ + --tool libfuzzer-stunparser --libfuzzer --libfuzzer-instances 6 obj-asan/dist/bin/firefox \ + -max_len=256 -use_value_profile=1 -rss_limit_mb=3000 corpus-stunparser + +What this does is + +- run libFuzzer on the ``StunParser`` target with 6 parallel instances + using the corpus in the ``corpus-stunparser`` directory (with the + specified libFuzzer options such as ``-max_len`` and + ``-use_value_profile``) +- automatically reduce the corpus and restart if it grew by 30% (and + has at least 500 files) +- use FuzzManager (need a local ``.fuzzmanagerconf`` and a + ``firefox.fuzzmanagerconf`` binary configuration as described in the + FuzzManager manual) and submit crashes as ``libfuzzer-stunparser`` + tool +- write statistics to the ``libfuzzer-stunparser.stats`` file + +.. _JS Engine Specifics: + +JS Engine Specifics +~~~~~~~~~~~~~~~~~~~ + +The fuzzing interface can also be used for testing the JS engine, in fact there +are two separate options to implement and run fuzzing targets: + +Implementing in C++ +^^^^^^^^^^^^^^^^^^^ + +Similar to the fuzzing interface in Firefox, you can implement your target in +entirely C++ with very similar interfaces compared to what was described before. + +There are a few minor differences though: + +1. All of the fuzzing targets live in `js/src/fuzz-tests`. + +2. All of the code is linked into a separate binary called `fuzz-tests`, + similar to how all JSAPI tests end up in `jsapi-tests`. In order for this + binary to be built, you must build a JS shell with ``--enable-fuzzing`` + **and** ``--enable-tests``. Again, this can and should be combined with + AddressSanitizer for maximum effectiveness. This also means that there is no + need to (re)build gtests when dealing with a JS fuzzing target and using + a shell as part of a full browser build. + +3. The harness around the JS implementation already provides you with an + initialized ``JSContext`` and global object. You can access these in + your target by declaring + + ``extern JS::PersistentRootedObject gGlobal;`` + + and + + ``extern JSContext* gCx;`` + + but there is no obligation for you to use these. + +For a live example, see also the `implementation of the StructuredCloneReader target +<https://searchfox.org/mozilla-central/source/js/src/fuzz-tests/testStructuredCloneReader.cpp>`__. + + +Implementing in JS +^^^^^^^^^^^^^^^^^^ + +In addition to the C++ targets, you can also implement targets in JavaScript +using the JavaScript Runtime (JSRT) fuzzing approach. Using this approach is +not only much simpler (since you don't need to know anything about the +JSAPI or engine internals), but it also gives you full access to everything +defined in the JS shell, including handy functions such as ``timeout()``. + +Of course, this approach also comes with disadvantages: Calling into JS and +performing the fuzzing operations there costs performance. Also, there is more +chance for causing global side-effects or non-determinism compared to a +fairly isolated C++ target. + +As a rule of thumb, you should implement the target in JS if + +* you don't know C++ and/or how to use the JSAPI (after all, a JS fuzzing target is better than none), +* your target is expected to have lots of hangs/timeouts (you can catch these internally), +* or your target is not isolated enough for a C++ target and/or you need specific JS shell functions. + + +There is an `example target <https://searchfox.org/mozilla-central/source/js/src/shell/jsrtfuzzing/jsrtfuzzing-example.js>`__ +in-tree that shows roughly how to implement such a fuzzing target. + +To run such a target, you must run the ``js`` (shell) binary instead of the +``fuzz-tests`` binary and point the ``FUZZER`` variable to the file containing +your fuzzing target, e.g. + +:: + + $ FUZZER=/path/to/jsrtfuzzing-example.js obj-asan/dist/bin/js --fuzzing-safe --no-threads -- <libFuzzer options here> + +More elaborate targets can be found in `js/src/fuzz-tests/ <https://searchfox.org/mozilla-central/source/js/src/fuzz-tests/>`__. + +Troubleshooting +~~~~~~~~~~~~~~~ + + +Fuzzing Interface: Error: No testing callback found +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +This error means that the fuzzing callback with the name you specified +using the ``FUZZER`` environment variable could not be found. Reasons +for are typically either a misspelled name or that your code wasn't +built (check your ``moz.build`` file and build log). + + +``mach build`` doesn't seem to update my fuzzing code +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Keep in mind you always need to run both the ``mach build`` and +``mach gtest dontruntests`` commands in order to update your fuzzing +code. The latter rebuilds the gtest version of ``libxul``, containing +your code. diff --git a/tools/fuzzing/docs/index.rst b/tools/fuzzing/docs/index.rst new file mode 100644 index 0000000000..9a4e2d01c4 --- /dev/null +++ b/tools/fuzzing/docs/index.rst @@ -0,0 +1,438 @@ +Fuzzing +======= + +.. toctree:: + :maxdepth: 1 + :hidden: + :glob: + :reversed: + + * + +This section focuses on explaining the software testing technique called +“Fuzzing” or “Fuzz Testing” and its application to the Mozilla codebase. +The overall goal is to educate developers about the capabilities and +usefulness of fuzzing and also allow them to write their own fuzzing +targets. Note that not all fuzzing tools used at Mozilla are open +source. Some tools are for internal use only because they can easily +find critical security vulnerabilities. + +What is Fuzzing? +---------------- + +Fuzzing (or Fuzz Testing) is a technique to randomly use a program or +parts of it with the goal to uncover bugs. Random usage can have a wide +variety of forms, a few common ones are + +- random input data (e.g. file formats, network data, source code, etc.) + +- random API usage + +- random UI interaction + +with the first two being the most practical methods used in the field. +Of course, these methods are not entirely separate, combinations are +possible. Fuzzing is a great way to find quality issues, some of them +being also security issues. + +Random input data +~~~~~~~~~~~~~~~~~ + +This is probably the most obvious fuzzing method: You have code that +processes data and you provide it with random or mutated data, hoping +that it will uncover bugs in your implementation. Examples are media +formats like JPEG or H.264, but basically anything that involves +processing a “blob” of data can be a valuable target. Countless security +vulnerabilities in a variety of libraries and programs have been found +using this method (the AFLFuzz +`bug-o-rama <http://lcamtuf.coredump.cx/afl/#bugs>`__ gives a good +impression). + +Common tools for this task are e.g. +`libFuzzer <https://llvm.org/docs/LibFuzzer.html>`__ and +`AFLFuzz <http://lcamtuf.coredump.cx/afl/>`__, but also specialized +tools with custom logic like +`LangFuzz <https://www.usenix.org/system/files/conference/usenixsecurity12/sec12-final73.pdf>`__ +and `Avalanche <https://github.com/MozillaSecurity/avalanche>`__. + +Random API Usage +~~~~~~~~~~~~~~~~ + +Randomly testing APIs is especially helpful with parts of software that +expose a well-defined interface (see also :ref:`Well-defined +behavior and Safety <Well defined behaviour and safety>`). If this interface is additionally exposed to +untrusted parties/content, then this is a strong sign that random API +testing would be worthwhile here, also for security reasons. APIs can be +anything from C++ layer code to APIs offered in the browser. + +A good example for a fuzzing target here is the DOM (Document Object +Model) and various other browser APIs. The browser exposes a variety of +different APIs for working with documents, media, communication, +storage, etc. with a growing complexity. Each of these APIs has +potential bugs that can be uncovered with fuzzing. At Mozilla, we +currently use domino (internal tool) for this purpose. + +Random UI Interaction +~~~~~~~~~~~~~~~~~~~~~ + +A third way to test programs and in particular user interfaces is by +directly interacting with the UI in a random way, typically in +combination with other actions the program has to perform. Imagine for +example an automated browser that surfs through the web and randomly +performs actions such as scrolling, zooming and clicking links. The nice +thing about this approach is that you likely find many issues that the +end-user also experiences. However, this approach typically suffers from +bad reproducibility (see also :ref:`Reproducibility <Reproducibility>`) and is therefore +often of limited use. + +An example for a fuzzing tool using this technique is `Android +Monkey <https://developer.android.com/studio/test/monkey>`__. At +Mozilla however, we currently don’t make much use of this approach. + +Why Fuzzing Helps You +--------------------- + +Understanding the value of fuzzing for you as a developer and software +quality in general is important to justify the support this testing +method might need from you. When your component is fuzzed for the first +time there are two common things you will be confronted with: + +**Bug reports that don’t seem real bugs or not important:** Fuzzers +find all sorts of bugs in various corners of your component, even +obscure ones. This automatically leads to a larger number of bugs that +either don’t seem to be bugs (see also the :ref:`Well-defined behavior and +safety <Well defined behaviour and safety>` section below) or that don’t seem to be important bugs. + +Fixing these bugs is still important for the fuzzers because ignoring them +in fuzzing costs resources (performance, human resources) and might even +prevent the fuzzer from hitting other bugs. For example certain fuzzing tools +like libFuzzer run in-process and have to restart on every crash, involving a +costly re-read of the fuzzing samples. + +Also, as some of our code evolves quickly, a corner case might become a +hot code path in a few months. + +**New steps to reproduce:** Fuzzing tools are very likely to exercise +your component using different methods than an average end-user. A +common technique is modify existing parts of a program or write entirely +new code to yield a fuzzing "target". This target is specifically +designed to work with the fuzzing tools in use. Reproducing the reported +bugs might require you to learn these new steps to reproduce, including +building/acquiring that target and having the right environment. + +Both of these issues might seem like a waste of time in some cases, +however, realizing that both steps are a one-time investment for a +constant stream of valuable bug reports is paramount here. Helping your +security engineers to overcome these issues will ensure that future +regressions in your code can be detected at an earlier stage and in a +form that is more easily actionable. Especially if you are dealing with +regressions in your code already, fuzzing has the potential to make your +job as a developer easier. + +One of the best examples at Mozilla is the JavaScript engine. The JS +team has put great quite some effort into getting fuzzing started and +supporting our work. Here’s what Jan de Mooij, a senior platform +engineer for the JavaScript engine, has to say about it: + +*“Bugs in the engine can cause mysterious browser crashes and bugs that +are incredibly hard to track down. Fortunately, we don't have to deal +with these time consuming browser issues very often: usually the fuzzers +find a reliable shell test long before the bug makes it into a release. +Fuzzing is invaluable to us and I cannot imagine working on this project +without it.”* + +Levels of Fuzzing in Firefox/Gecko +---------------------------------- + +Applying fuzzing to e.g. Firefox happens at different "levels", similar +to the different types of automated tests we have: + +Full Browser Fuzzing +~~~~~~~~~~~~~~~~~~~~ + +The most obvious method of testing would be to test the full browser and +doing so is required for certain features like the DOM and other APIs. +The advantage here is that we have all the features of the browser +available and testing happens closely to what we actually ship. The +downside here though is that browser testing is by far the slowest of +all testing methods. In addition, it has the most amount of +non-determinism involved (resulting e.g. in intermittent testcases). +Browser fuzzing at Mozilla is largely done with the `Grizzly +framework <https://blog.mozilla.org/security/2019/07/10/grizzly/>`__ +(`meta bug <https://bugzilla.mozilla.org/show_bug.cgi?id=grizzly>`__) +and one of the most successful fuzzers is the Domino tool (`meta +bug <https://bugzilla.mozilla.org/show_bug.cgi?id=domino>`__). + +Summarizing, full browser fuzzing is the right technique to investigate +if your feature really requires it. Consider using other methods (see +below) if your code can be exercised in this way. + +The Fuzzing Interface +~~~~~~~~~~~~~~~~~~~~~ + +**Fuzzing Interface** + +The fuzzing interface is glue code living in mozilla-central in order to make it +easier for developers and security researchers to test C/C++ code with either libFuzzer or afl-fuzz. + +This interface offers a gtest (C++ unit test) level component based +fuzzing approach and is suitable for anything that could also be +tested/exercised using a gtest. This method is by far the fastest, but +usually limited to testing isolated components that can be instantiated +on this level. Utilizing this method requires you to write a fuzzing +target similar to writing a gtest. This target will automatically be +usable with libFuzzer and AFLFuzz. We offer a :ref:`comprehensive manual <Fuzzing Interface>` +that describes how to write and utilize your own target. + +A simple example here is the `SDP parser +target <https://searchfox.org/mozilla-central/rev/efdf9bb55789ea782ae3a431bda6be74a87b041e/media/webrtc/signaling/fuzztest/sdp_parser_libfuzz.cpp#30>`__, +which tests the SipccSdpParser in our codebase. + +Shell-based Fuzzing +~~~~~~~~~~~~~~~~~~~ + +Some of our fuzzing, e.g. JS Engine testing, happens in a separate shell +program. For JS, this is the JS shell also used for most of the JS tests +and development. In theory, xpcshell could also be used for testing but +so far, there has not been a use case for this (most things that can be +reached through xpcshell can also be tested on the gtest level). + +Identifying the right level of fuzzing is the first step towards +continuous fuzz testing of your code. + +Code/Process Requirements for Fuzzing +------------------------------------- + +In this section, we are going to discuss how code should be written in +order to yield optimal results with fuzzing. + +Defect Oracles +~~~~~~~~~~~~~~ + +Fuzzing is only effective if you are able to know when a problem has +been found. Crashes are typically problems if the unit being tested is +safe for fuzzing (see Well-defined behavior and Safety). But there are +many more problems that you would want to find, correctness issues, +corruptions that don’t necessarily crash etc. For this, you need an +*oracle* that tells you something is wrong. + +The simplest defect oracle is the assertion (ex: ``MOZ_ASSERT``). +Assertions are a very powerful instrument because they can be used to +determine if your program is performing correctly, even if the bug would +not lead to any sort of crash. They can encode arbitrarily complex +information about what is considered correct, information that might +otherwise only exist in the developers’ minds. + +External tools like the sanitizers (AddressSanitizer aka ASan, +ThreadSanitizer aka TSan, MemorySanitizer aka MSan and +UndefinedBehaviorSanitizer - UBSan) can also serve as oracles for +sometimes severe issues that would not necessarily crash. Making sure +that these tools can be used on your code is highly useful. + +Examples for bugs found with sanitizers are `bug +1419608 <https://bugzilla.mozilla.org/show_bug.cgi?id=1419608>`__, +`bug 1580288 <https://bugzilla.mozilla.org/show_bug.cgi?id=1580288>`__ +and `bug 922603 <https://bugzilla.mozilla.org/show_bug.cgi?id=922603>`__, +but since we started using sanitizers, we have found over 1000 bugs with +these tools. + +Another defect oracle can be a reference implementation. Comparing +program behavior (typically output) between two programs or two modes of +the same program that should produce the same outputs can find complex +correctness issues. This method is often called differential testing. + +One example where this is regularly used to find issues is the Mozilla +JavaScript engine: Running random programs with and without JIT +compilation enabled finds lots of problems with the JIT implementation. +One example for such a bug is `Bug +1404636 <https://bugzilla.mozilla.org/show_bug.cgi?id=1404636>`__. + +Component Decoupling +~~~~~~~~~~~~~~~~~~~~ + +Being able to test components in isolation can be an advantage for +fuzzing (both for performance and reproducibility). Clear boundaries +between different components and documentation that explains the +contracts usually help with this goal. Sometimes it might be useful to +mock a certain component that the target component is interacting with +and that is much harder if the components are tightly coupled and their +contracts unclear. Of course, this does not mean that one should only +test components in isolation. Sometimes, testing the interaction between +them is even desirable and does not hurt performance at all. + +Avoiding external I/O +~~~~~~~~~~~~~~~~~~~~~ + +External I/O like network or file interactions are bad for performance +and can introduce additional non-determinism. Providing interfaces to +process data directly from memory instead is usually much more helpful. + +.. _Well defined behaviour and safety: + +Well-defined Behavior and Safety +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This requirement mostly ties in where defect oracles ended and is one of +the most important problems seen in the wild nowadays with fuzzing. If a +part of your program’s behavior is unspecified, then this potentially +leads to bad times if the behavior is considered a defect by fuzzing. +For example, if your code has crashes that are not considered bugs, then +your code might be unsuitable for fuzzing. Your component should be +fuzzing safe, meaning that any defect oracle (e.g. assertion or crash) +triggered by the fuzzer is considered a bug. This important aspect is +often neglected. Be aware that any false positives cause both +performance degradation and additional manual work for your fuzzing +team. The Mozilla JS developers for example have implemented this +concept in a “--fuzzing-safe” switch which disables harmful functions. +Sometimes, crashes cannot be avoided for handling certain error +conditions. In such situations, it is important to mark these crashes in +a way the fuzzer can recognize and distinguish them from undesired +crashes. However, keep in mind that crashes in general can be disruptive +to the fuzzing process. Performance is an important aspect of fuzzing +and frequent crashes can severely degrade performance. + +.. _Reproducibility: + +Reproducibility +~~~~~~~~~~~~~~~ + +Being able to reproduce issues found with fuzzing is necessary for +several reasons: First, you as the developer probably want a test that +reproduces the issue so you can debug it better. Our feedback from most +developers is that traces without a reproducible test can help to find a +problem, but it makes the whole process very complicated. Some of these +non-reproducible bugs never get fixed. Second, having a reproducible +test also helps the triage process by allowing an automated bisection to +find the responsible developer. Last but not least, the test can be +added to a test suite, used for automated verification of fixes and even +serve as a basis for more fuzzing. + +Adding functionality to the program that improve reproducibility is +therefore a good idea in case non-reproducible issues are found. Some +examples are shown in the next section. + +While many problems with reproducibility are specific for the project +you are working on, there is one source of these problems that many +programs have in common: Threading. While some bugs only occur in the +first place due to concurrency, some other bugs would be perfectly +reproducible without threads, but are intermittent and hard to with +threading enabled. If the bug is indeed caused by a data race, then +tools like ThreadSanitizer will help and we are currently working on +making ThreadSanitizer usable on Firefox. For bugs that are not caused +by threading, it sometimes makes sense to be able to disable threading +or limit the amount of worker threads involved. + +Supporting Code +~~~~~~~~~~~~~~~ + +Some possibilities of what support implementations for fuzzing can do +have already been named in the previous sections: Additional defect +oracles and functionality to improve reproducibility and safety. In +fact, many features added specifically for fuzzing fit into one of these +categories. However, there’s room for more: Often, there are ways to +make it easier for fuzzers to exercise complex and hard to reach parts +of your code. For example, if a certain optimization feature is only +turned on under very specific conditions (that are not a requirement for +the optimization), then it makes sense to add a functionality to force +it on. Then, a fuzzer can hit the optimization code much more +frequently, increasing the chance to find issues. Some examples from +Firefox and SpiderMonkey: + +- The `FuzzingFunctions <https://searchfox.org/mozilla-central/rev/efdf9bb55789ea782ae3a431bda6be74a87b041e/dom/webidl/FuzzingFunctions.webidl#15>`__ + interface in the browser allows fuzzing tools to perform GC/CC, tune various + settings related to garbage collection or enable features like accessibility + mode. Being able to force a garbage collection at a specific time helped + identifying lots of problems in the past. + +- The --ion-eager and --baseline-eager flags for the JS shell force JIT + compilation at various stages, rather than using the builtin + heuristic to enable it only for hot functions. + +- The --no-threads flag disables all threading (if possible) in the JS shell. + This makes some bugs reproduce deterministically that would otherwise be + intermittent and harder to find. However, some bugs that only occur with + threading can’t be found with this option enabled. + +Another important feature that must be turned off for fuzzing is +checksums. Many file formats use checksums to validate a file before +processing it. If a checksum feature is still enabled, fuzzers are +likely never going to produce valid files. The same often holds for +cryptographic signatures. Being able to turn off the validation of these +features as part of a fuzzing switch is extremely helpful. + +An example for such a checksum can be found in the +`FlacDemuxer <https://searchfox.org/mozilla-central/rev/efdf9bb55789ea782ae3a431bda6be74a87b041e/dom/media/flac/FlacDemuxer.cpp#494>`__. + +Test Samples +~~~~~~~~~~~~ + +Some fuzzing strategies make use of existing data that is mutated to +produce the new random data. In fact, mutation-based strategies are +typically superior to others if the original samples are of good quality +because the originals carry a lot of semantics that the fuzzer does not +have to know about or implement. However, success here really stands and +falls with the quality of the samples. If the originals don’t cover +certain parts of the implementation, then the fuzzer will also have to +do more work to get there. + + +Fuzz Blockers +~~~~~~~~~~~~~ + +Fuzz blockers are issues that prevent fuzzers from being as +effective as possible. Depending on the fuzzer and its scope a fuzz blocker +in one area (or component) can impede performance in other areas and in +some cases block the fuzzer all together. Some examples are: + +- Frequent crashes - These can block code paths and waste compute + resources due to the need to relaunch the fuzzing target and handle + the results (regardless of whether it is ignored or reported). This can also + include assertions that are mostly benign in many cases are but easily + triggered by fuzzers. + +- Frequent hangs / timeouts - This includes any issue that slows down + or blocks execution of the fuzzer or the target. + +- Hard to bucket - This includes crashes such as stack overflows or any issue + that crashes in an inconsistent location. This also includes issues that + corrupt logs/debugger output or provide a broken/invalid crash report. + +- Broken builds - This is fairly straightforward, without up-to-date builds + fuzzers are unable to run or verify fixes. + +- Missing instrumentation - In some cases tools such as ASan are used as + defect oracles and are required by the fuzzing tools to allow for proper + automation. In other cases incomplete instrumentation can give a false sense + of stability or make investigating issues much more time consuming. Although + this is not necessarily blocking the fuzzers it should be prioritized + appropriately. + +Since these types of crashes harm the overall fuzzing progress, it is important +for them to be addressed in a timely manner. Even if the bug itself might seem +trivial and low priority for the product, it can still have devastating effects +on fuzzing and hence prevent finding other critical issues. + +Issues in Bugzilla are marked as fuzz blockers by adding “[fuzzblocker]” +to the “Whiteboard” field. A list of open issues marked as fuzz blockers +can be found on `Bugzilla <https://bugzilla.mozilla.org/buglist.cgi?cmdtype=dorem&remaction=run&namedcmd=fuzzblockers&sharer_id=486634>`__. + + +Documentation +~~~~~~~~~~~~~ + +It is important for the fuzzing team to know how your software, tests +and designs work. Even obvious tasks, like how a test program is +supposed to be invoked, which options are safe, etc. might be hard to +figure out for the person doing the testing, just as you are reading +this manual right now to find out what is important in fuzzing. + +Contact Us +~~~~~~~~~~ + +The fuzzing team can be reached at +`fuzzing@mozilla.com <mailto:fuzzing@mozilla.com>`__ or +`on Matrix <https://chat.mozilla.org/#/room/#fuzzing:mozilla.org>`__ +and will be happy to help you with any questions about fuzzing +you might have. We can help you find the right method of fuzzing for +your feature, collaborate on the implementation and provide the +infrastructure to run it and process the results accordingly. diff --git a/tools/fuzzing/interface/FuzzingInterface.cpp b/tools/fuzzing/interface/FuzzingInterface.cpp new file mode 100644 index 0000000000..f06ca68656 --- /dev/null +++ b/tools/fuzzing/interface/FuzzingInterface.cpp @@ -0,0 +1,30 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* + * Common code for the unified fuzzing interface + */ + +#include <stdarg.h> +#include <stdlib.h> +#include "FuzzingInterface.h" + +namespace mozilla { + +#ifdef JS_STANDALONE +static bool fuzzing_verbose = !!getenv("MOZ_FUZZ_LOG"); +void fuzzing_log(const char* aFmt, ...) { + if (fuzzing_verbose) { + va_list ap; + va_start(ap, aFmt); + vfprintf(stderr, aFmt, ap); + va_end(ap); + } +} +#else +LazyLogModule gFuzzingLog("nsFuzzing"); +#endif + +} // namespace mozilla diff --git a/tools/fuzzing/interface/FuzzingInterface.h b/tools/fuzzing/interface/FuzzingInterface.h new file mode 100644 index 0000000000..792f0809ec --- /dev/null +++ b/tools/fuzzing/interface/FuzzingInterface.h @@ -0,0 +1,115 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* + * Interface definitions for the unified fuzzing interface + */ + +#ifndef FuzzingInterface_h__ +#define FuzzingInterface_h__ + +#include <fstream> + +#ifdef LIBFUZZER +# include "FuzzerExtFunctions.h" +#endif + +#include "FuzzerRegistry.h" +#include "mozilla/Assertions.h" + +#ifndef JS_STANDALONE +# include "mozilla/Logging.h" +#endif + +namespace mozilla { + +#ifdef JS_STANDALONE +void fuzzing_log(const char* aFmt, ...); +# define MOZ_LOG_EXPAND_ARGS(...) __VA_ARGS__ + +# define FUZZING_LOG(args) fuzzing_log(MOZ_LOG_EXPAND_ARGS args); +#else +extern LazyLogModule gFuzzingLog; + +# define FUZZING_LOG(args) \ + MOZ_LOG(mozilla::gFuzzingLog, mozilla::LogLevel::Verbose, args) +#endif // JS_STANDALONE + +typedef int (*FuzzingTestFuncRaw)(const uint8_t*, size_t); + +#ifdef AFLFUZZ + +static int afl_interface_raw(const char* testFile, + FuzzingTestFuncRaw testFunc) { + char* buf = NULL; + + while (__AFL_LOOP(1000)) { + std::ifstream is; + is.open(testFile, std::ios::binary); + is.seekg(0, std::ios::end); + int len = is.tellg(); + is.seekg(0, std::ios::beg); + MOZ_RELEASE_ASSERT(len >= 0); + if (!len) { + is.close(); + continue; + } + buf = (char*)realloc(buf, len); + MOZ_RELEASE_ASSERT(buf); + is.read(buf, len); + is.close(); + testFunc((uint8_t*)buf, (size_t)len); + } + + free(buf); + + return 0; +} + +# define MOZ_AFL_INTERFACE_COMMON() \ + char* testFilePtr = getenv("MOZ_FUZZ_TESTFILE"); \ + if (!testFilePtr) { \ + fprintf(stderr, \ + "Must specify testfile in MOZ_FUZZ_TESTFILE environment " \ + "variable.\n"); \ + return 1; \ + } \ + /* Make a copy of testFilePtr so the testing function can safely call \ + * getenv \ + */ \ + std::string testFile(testFilePtr); + +# define MOZ_AFL_INTERFACE_RAW(initFunc, testFunc, moduleName) \ + static int afl_fuzz_##moduleName(const uint8_t* data, size_t size) { \ + MOZ_RELEASE_ASSERT(data == NULL && size == 0); \ + MOZ_AFL_INTERFACE_COMMON(); \ + return ::mozilla::afl_interface_raw(testFile.c_str(), testFunc); \ + } \ + static void __attribute__((constructor)) AFLRegister##moduleName() { \ + ::mozilla::FuzzerRegistry::getInstance().registerModule( \ + #moduleName, initFunc, afl_fuzz_##moduleName); \ + } +#else +# define MOZ_AFL_INTERFACE_RAW(initFunc, testFunc, moduleName) /* Nothing */ +#endif // AFLFUZZ + +#ifdef LIBFUZZER +# define MOZ_LIBFUZZER_INTERFACE_RAW(initFunc, testFunc, moduleName) \ + static void __attribute__((constructor)) LibFuzzerRegister##moduleName() { \ + ::mozilla::FuzzerRegistry::getInstance().registerModule( \ + #moduleName, initFunc, testFunc); \ + } +#else +# define MOZ_LIBFUZZER_INTERFACE_RAW(initFunc, testFunc, \ + moduleName) /* Nothing */ +#endif + +#define MOZ_FUZZING_INTERFACE_RAW(initFunc, testFunc, moduleName) \ + MOZ_LIBFUZZER_INTERFACE_RAW(initFunc, testFunc, moduleName); \ + MOZ_AFL_INTERFACE_RAW(initFunc, testFunc, moduleName); + +} // namespace mozilla + +#endif // FuzzingInterface_h__ diff --git a/tools/fuzzing/interface/FuzzingInterfaceStream.cpp b/tools/fuzzing/interface/FuzzingInterfaceStream.cpp new file mode 100644 index 0000000000..f2c5c891e9 --- /dev/null +++ b/tools/fuzzing/interface/FuzzingInterfaceStream.cpp @@ -0,0 +1,54 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* + * Interface implementation for the unified fuzzing interface + */ + +#include "nsIFile.h" +#include "nsIPrefService.h" +#include "nsIProperties.h" + +#include "FuzzingInterfaceStream.h" + +#include "mozilla/Assertions.h" + +#ifndef JS_STANDALONE +# include "nsNetUtil.h" +#endif + +namespace mozilla { + +#ifdef AFLFUZZ + +void afl_interface_stream(const char* testFile, + FuzzingTestFuncStream testFunc) { + nsresult rv; + nsCOMPtr<nsIProperties> dirService = + do_GetService(NS_DIRECTORY_SERVICE_CONTRACTID); + MOZ_RELEASE_ASSERT(dirService != nullptr); + nsCOMPtr<nsIFile> file; + rv = dirService->Get(NS_OS_CURRENT_WORKING_DIR, NS_GET_IID(nsIFile), + getter_AddRefs(file)); + MOZ_RELEASE_ASSERT(NS_SUCCEEDED(rv)); + file->AppendNative(nsDependentCString(testFile)); + while (__AFL_LOOP(1000)) { + nsCOMPtr<nsIInputStream> inputStream; + rv = NS_NewLocalFileInputStream(getter_AddRefs(inputStream), file); + MOZ_RELEASE_ASSERT(NS_SUCCEEDED(rv)); + if (!NS_InputStreamIsBuffered(inputStream)) { + nsCOMPtr<nsIInputStream> bufStream; + rv = NS_NewBufferedInputStream(getter_AddRefs(bufStream), + inputStream.forget(), 1024); + MOZ_RELEASE_ASSERT(NS_SUCCEEDED(rv)); + inputStream = bufStream; + } + testFunc(inputStream.forget()); + } +} + +#endif + +} // namespace mozilla diff --git a/tools/fuzzing/interface/FuzzingInterfaceStream.h b/tools/fuzzing/interface/FuzzingInterfaceStream.h new file mode 100644 index 0000000000..1542020794 --- /dev/null +++ b/tools/fuzzing/interface/FuzzingInterfaceStream.h @@ -0,0 +1,90 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* + * Interface definitions for the unified fuzzing interface with streaming + * support + */ + +#ifndef FuzzingInterfaceStream_h__ +#define FuzzingInterfaceStream_h__ + +#ifdef JS_STANDALONE +# error "FuzzingInterfaceStream.h cannot be used in JS standalone builds." +#endif + +#include "gtest/gtest.h" +#include "nsComponentManagerUtils.h" +#include "nsCOMPtr.h" +#include "nsIInputStream.h" + +#include "nsDirectoryServiceDefs.h" +#include "nsStreamUtils.h" +#include "nsStringStream.h" + +#include <fstream> + +#include "FuzzingInterface.h" + +namespace mozilla { + +typedef int (*FuzzingTestFuncStream)(nsCOMPtr<nsIInputStream>); + +#ifdef AFLFUZZ +void afl_interface_stream(const char* testFile, FuzzingTestFuncStream testFunc); + +# define MOZ_AFL_INTERFACE_COMMON(initFunc) \ + if (initFunc) initFunc(NULL, NULL); \ + char* testFilePtr = getenv("MOZ_FUZZ_TESTFILE"); \ + if (!testFilePtr) { \ + fprintf(stderr, \ + "Must specify testfile in MOZ_FUZZ_TESTFILE environment " \ + "variable.\n"); \ + return; \ + } \ + /* Make a copy of testFilePtr so the testing function can safely call \ + * getenv \ + */ \ + std::string testFile(testFilePtr); + +# define MOZ_AFL_INTERFACE_STREAM(initFunc, testFunc, moduleName) \ + TEST(AFL, moduleName) \ + { \ + MOZ_AFL_INTERFACE_COMMON(initFunc); \ + ::mozilla::afl_interface_stream(testFile.c_str(), testFunc); \ + } +#else +# define MOZ_AFL_INTERFACE_STREAM(initFunc, testFunc, moduleName) /* Nothing \ + */ +#endif + +#ifdef LIBFUZZER +# define MOZ_LIBFUZZER_INTERFACE_STREAM(initFunc, testFunc, moduleName) \ + static int LibFuzzerTest##moduleName(const uint8_t* data, size_t size) { \ + if (size > INT32_MAX) return 0; \ + nsCOMPtr<nsIInputStream> stream; \ + nsresult rv = NS_NewByteInputStream(getter_AddRefs(stream), \ + Span((const char*)data, size), \ + NS_ASSIGNMENT_DEPEND); \ + MOZ_RELEASE_ASSERT(NS_SUCCEEDED(rv)); \ + testFunc(stream.forget()); \ + return 0; \ + } \ + static void __attribute__((constructor)) LibFuzzerRegister##moduleName() { \ + ::mozilla::FuzzerRegistry::getInstance().registerModule( \ + #moduleName, initFunc, LibFuzzerTest##moduleName); \ + } +#else +# define MOZ_LIBFUZZER_INTERFACE_STREAM(initFunc, testFunc, \ + moduleName) /* Nothing */ +#endif + +#define MOZ_FUZZING_INTERFACE_STREAM(initFunc, testFunc, moduleName) \ + MOZ_LIBFUZZER_INTERFACE_STREAM(initFunc, testFunc, moduleName); \ + MOZ_AFL_INTERFACE_STREAM(initFunc, testFunc, moduleName); + +} // namespace mozilla + +#endif // FuzzingInterfaceStream_h__ diff --git a/tools/fuzzing/interface/harness/FuzzerRunner.cpp b/tools/fuzzing/interface/harness/FuzzerRunner.cpp new file mode 100644 index 0000000000..fc2094aa59 --- /dev/null +++ b/tools/fuzzing/interface/harness/FuzzerRunner.cpp @@ -0,0 +1,91 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * * This Source Code Form is subject to the terms of the Mozilla Public + * * License, v. 2.0. If a copy of the MPL was not distributed with this + * * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include <cstdlib> + +#include "FuzzerRunner.h" +#include "mozilla/Attributes.h" +#include "prenv.h" + +#include "FuzzerTestHarness.h" + +namespace mozilla { + +// We use a static var 'fuzzerRunner' defined in nsAppRunner.cpp. +// fuzzerRunner is initialized to nullptr but if this file is linked in, +// then fuzzerRunner will be set here indicating that +// we want to call into either LibFuzzer's main or the AFL entrypoint. +class _InitFuzzer { + public: + _InitFuzzer() { fuzzerRunner = new FuzzerRunner(); } + void InitXPCOM() { mScopedXPCOM = new ScopedXPCOM("Fuzzer"); } + void DeinitXPCOM() { + if (mScopedXPCOM) delete mScopedXPCOM; + mScopedXPCOM = nullptr; + } + + private: + ScopedXPCOM* mScopedXPCOM; +} InitLibFuzzer; + +static void DeinitXPCOM() { InitLibFuzzer.DeinitXPCOM(); } + +int FuzzerRunner::Run(int* argc, char*** argv) { + /* + * libFuzzer uses exit() calls in several places instead of returning, + * so the destructor of ScopedXPCOM is not called in some cases. + * For fuzzing, this does not make a difference, but in debug builds + * when running a single testcase, this causes an assertion when destroying + * global linked lists. For this reason, we allocate ScopedXPCOM on the heap + * using the global InitLibFuzzer class, combined with an atexit call to + * destroy the ScopedXPCOM instance again. + */ + InitLibFuzzer.InitXPCOM(); + std::atexit(DeinitXPCOM); + + const char* fuzzerEnv = getenv("FUZZER"); + + if (!fuzzerEnv) { + fprintf(stderr, + "Must specify fuzzing target in FUZZER environment variable\n"); + exit(1); + } + + std::string moduleNameStr(fuzzerEnv); + FuzzerFunctions funcs = + FuzzerRegistry::getInstance().getModuleFunctions(moduleNameStr); + FuzzerInitFunc initFunc = funcs.first; + FuzzerTestingFunc testingFunc = funcs.second; + if (initFunc) { + int ret = initFunc(argc, argv); + if (ret) { + fprintf(stderr, "Fuzzing Interface: Error: Initialize callback failed\n"); + exit(1); + } + } + + if (!testingFunc) { + fprintf(stderr, "Fuzzing Interface: Error: No testing callback found\n"); + exit(1); + } + +#ifdef LIBFUZZER + int ret = mFuzzerDriver(argc, argv, testingFunc); +#else + // For AFL, testingFunc points to the entry function we need. + int ret = testingFunc(NULL, 0); +#endif + + InitLibFuzzer.DeinitXPCOM(); + return ret; +} + +#ifdef LIBFUZZER +void FuzzerRunner::setParams(LibFuzzerDriver aDriver) { + mFuzzerDriver = aDriver; +} +#endif + +} // namespace mozilla diff --git a/tools/fuzzing/interface/harness/FuzzerRunner.h b/tools/fuzzing/interface/harness/FuzzerRunner.h new file mode 100644 index 0000000000..6b19e751cd --- /dev/null +++ b/tools/fuzzing/interface/harness/FuzzerRunner.h @@ -0,0 +1,24 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * * This Source Code Form is subject to the terms of the Mozilla Public + * * License, v. 2.0. If a copy of the MPL was not distributed with this + * * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "FuzzerRegistry.h" + +namespace mozilla { + +class FuzzerRunner { + public: + int Run(int* argc, char*** argv); + +#ifdef LIBFUZZER + void setParams(LibFuzzerDriver aDriver); + + private: + LibFuzzerDriver mFuzzerDriver; +#endif +}; + +extern FuzzerRunner* fuzzerRunner; + +} // namespace mozilla diff --git a/tools/fuzzing/interface/harness/FuzzerTestHarness.h b/tools/fuzzing/interface/harness/FuzzerTestHarness.h new file mode 100644 index 0000000000..d7bb1064cf --- /dev/null +++ b/tools/fuzzing/interface/harness/FuzzerTestHarness.h @@ -0,0 +1,256 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* + * Test harness for XPCOM objects, providing a scoped XPCOM initializer, + * nsCOMPtr, nsRefPtr, do_CreateInstance, do_GetService, ns(Auto|C|)String, + * and stdio.h/stdlib.h. + */ + +#ifndef FuzzerTestHarness_h__ +#define FuzzerTestHarness_h__ + +#include "mozilla/ArrayUtils.h" +#include "mozilla/Attributes.h" + +#include "prenv.h" +#include "nsComponentManagerUtils.h" +#include "nsServiceManagerUtils.h" +#include "nsCOMPtr.h" +#include "nsString.h" +#include "nsAppDirectoryServiceDefs.h" +#include "nsDirectoryServiceDefs.h" +#include "nsDirectoryServiceUtils.h" +#include "nsIDirectoryService.h" +#include "nsIFile.h" +#include "nsIObserverService.h" +#include "nsIServiceManager.h" +#include "nsXULAppAPI.h" +#include "mozilla/AppShutdown.h" +#include <stdio.h> +#include <stdlib.h> +#include <stdarg.h> + +namespace { + +static uint32_t gFailCount = 0; + +/** + * Prints the given failure message and arguments using printf, prepending + * "TEST-UNEXPECTED-FAIL " for the benefit of the test harness and + * appending "\n" to eliminate having to type it at each call site. + */ +MOZ_FORMAT_PRINTF(1, 2) void fail(const char* msg, ...) { + va_list ap; + + printf("TEST-UNEXPECTED-FAIL | "); + + va_start(ap, msg); + vprintf(msg, ap); + va_end(ap); + + putchar('\n'); + ++gFailCount; +} + +//----------------------------------------------------------------------------- + +class ScopedXPCOM final : public nsIDirectoryServiceProvider2 { + public: + NS_DECL_ISUPPORTS + + explicit ScopedXPCOM(const char* testName, + nsIDirectoryServiceProvider* dirSvcProvider = nullptr) + : mDirSvcProvider(dirSvcProvider) { + mTestName = testName; + printf("Running %s tests...\n", mTestName); + + nsresult rv = NS_InitXPCOM(&mServMgr, nullptr, this); + if (NS_FAILED(rv)) { + fail("NS_InitXPCOM returned failure code 0x%" PRIx32, + static_cast<uint32_t>(rv)); + mServMgr = nullptr; + return; + } + } + + ~ScopedXPCOM() { + // If we created a profile directory, we need to remove it. + if (mProfD) { + mozilla::AppShutdown::AdvanceShutdownPhase( + mozilla::ShutdownPhase::AppShutdownNetTeardown); + mozilla::AppShutdown::AdvanceShutdownPhase( + mozilla::ShutdownPhase::AppShutdownTeardown); + mozilla::AppShutdown::AdvanceShutdownPhase( + mozilla::ShutdownPhase::AppShutdown); + mozilla::AppShutdown::AdvanceShutdownPhase( + mozilla::ShutdownPhase::AppShutdownQM); + mozilla::AppShutdown::AdvanceShutdownPhase( + mozilla::ShutdownPhase::AppShutdownTelemetry); + + if (NS_FAILED(mProfD->Remove(true))) { + NS_WARNING("Problem removing profile directory"); + } + + mProfD = nullptr; + } + + if (mServMgr) { + NS_RELEASE(mServMgr); + nsresult rv = NS_ShutdownXPCOM(nullptr); + if (NS_FAILED(rv)) { + fail("XPCOM shutdown failed with code 0x%" PRIx32, + static_cast<uint32_t>(rv)); + exit(1); + } + } + + printf("Finished running %s tests.\n", mTestName); + } + + already_AddRefed<nsIFile> GetProfileDirectory() { + if (mProfD) { + nsCOMPtr<nsIFile> copy = mProfD; + return copy.forget(); + } + + // Create a unique temporary folder to use for this test. + // Note that runcppunittests.py will run tests with a temp + // directory as the cwd, so just put something under that. + nsCOMPtr<nsIFile> profD; + nsresult rv = NS_GetSpecialDirectory(NS_OS_CURRENT_PROCESS_DIR, + getter_AddRefs(profD)); + NS_ENSURE_SUCCESS(rv, nullptr); + + rv = profD->Append(u"cpp-unit-profd"_ns); + NS_ENSURE_SUCCESS(rv, nullptr); + + rv = profD->CreateUnique(nsIFile::DIRECTORY_TYPE, 0755); + NS_ENSURE_SUCCESS(rv, nullptr); + + mProfD = profD; + return profD.forget(); + } + + already_AddRefed<nsIFile> GetGREDirectory() { + if (mGRED) { + nsCOMPtr<nsIFile> copy = mGRED; + return copy.forget(); + } + + char* env = PR_GetEnv("MOZ_XRE_DIR"); + nsCOMPtr<nsIFile> greD; + if (env) { + NS_NewLocalFile(NS_ConvertUTF8toUTF16(env), false, getter_AddRefs(greD)); + } + + mGRED = greD; + return greD.forget(); + } + + already_AddRefed<nsIFile> GetGREBinDirectory() { + if (mGREBinD) { + nsCOMPtr<nsIFile> copy = mGREBinD; + return copy.forget(); + } + + nsCOMPtr<nsIFile> greD = GetGREDirectory(); + if (!greD) { + return greD.forget(); + } + greD->Clone(getter_AddRefs(mGREBinD)); + +#ifdef XP_MACOSX + nsAutoCString leafName; + mGREBinD->GetNativeLeafName(leafName); + if (leafName.EqualsLiteral("Resources")) { + mGREBinD->SetNativeLeafName("MacOS"_ns); + } +#endif + + nsCOMPtr<nsIFile> copy = mGREBinD; + return copy.forget(); + } + + //////////////////////////////////////////////////////////////////////////// + //// nsIDirectoryServiceProvider + + NS_IMETHODIMP GetFile(const char* aProperty, bool* _persistent, + nsIFile** _result) override { + // If we were supplied a directory service provider, ask it first. + if (mDirSvcProvider && NS_SUCCEEDED(mDirSvcProvider->GetFile( + aProperty, _persistent, _result))) { + return NS_OK; + } + + // Otherwise, the test harness provides some directories automatically. + if (0 == strcmp(aProperty, NS_APP_USER_PROFILE_50_DIR) || + 0 == strcmp(aProperty, NS_APP_USER_PROFILE_LOCAL_50_DIR) || + 0 == strcmp(aProperty, NS_APP_PROFILE_LOCAL_DIR_STARTUP)) { + nsCOMPtr<nsIFile> profD = GetProfileDirectory(); + NS_ENSURE_TRUE(profD, NS_ERROR_FAILURE); + + nsCOMPtr<nsIFile> clone; + nsresult rv = profD->Clone(getter_AddRefs(clone)); + NS_ENSURE_SUCCESS(rv, rv); + + *_persistent = true; + clone.forget(_result); + return NS_OK; + } else if (0 == strcmp(aProperty, NS_GRE_DIR)) { + nsCOMPtr<nsIFile> greD = GetGREDirectory(); + NS_ENSURE_TRUE(greD, NS_ERROR_FAILURE); + + *_persistent = true; + greD.forget(_result); + return NS_OK; + } else if (0 == strcmp(aProperty, NS_GRE_BIN_DIR)) { + nsCOMPtr<nsIFile> greBinD = GetGREBinDirectory(); + NS_ENSURE_TRUE(greBinD, NS_ERROR_FAILURE); + + *_persistent = true; + greBinD.forget(_result); + return NS_OK; + } + + return NS_ERROR_FAILURE; + } + + //////////////////////////////////////////////////////////////////////////// + //// nsIDirectoryServiceProvider2 + + NS_IMETHODIMP GetFiles(const char* aProperty, + nsISimpleEnumerator** _enum) override { + // If we were supplied a directory service provider, ask it first. + nsCOMPtr<nsIDirectoryServiceProvider2> provider = + do_QueryInterface(mDirSvcProvider); + if (provider && NS_SUCCEEDED(provider->GetFiles(aProperty, _enum))) { + return NS_OK; + } + + return NS_ERROR_FAILURE; + } + + private: + const char* mTestName; + nsIServiceManager* mServMgr; + nsCOMPtr<nsIDirectoryServiceProvider> mDirSvcProvider; + nsCOMPtr<nsIFile> mProfD; + nsCOMPtr<nsIFile> mGRED; + nsCOMPtr<nsIFile> mGREBinD; +}; + +NS_IMPL_QUERY_INTERFACE(ScopedXPCOM, nsIDirectoryServiceProvider, + nsIDirectoryServiceProvider2) + +NS_IMETHODIMP_(MozExternalRefCountType) +ScopedXPCOM::AddRef() { return 2; } + +NS_IMETHODIMP_(MozExternalRefCountType) +ScopedXPCOM::Release() { return 1; } + +} // namespace + +#endif // FuzzerTestHarness_h__ diff --git a/tools/fuzzing/interface/harness/moz.build b/tools/fuzzing/interface/harness/moz.build new file mode 100644 index 0000000000..0eff84c8aa --- /dev/null +++ b/tools/fuzzing/interface/harness/moz.build @@ -0,0 +1,16 @@ +# -*- Mode: python; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +Library("fuzzer-runner") + +SOURCES += [ + "FuzzerRunner.cpp", +] +EXPORTS += [ + "FuzzerRunner.h", +] + +FINAL_LIBRARY = "xul" diff --git a/tools/fuzzing/interface/moz.build b/tools/fuzzing/interface/moz.build new file mode 100644 index 0000000000..8a51007174 --- /dev/null +++ b/tools/fuzzing/interface/moz.build @@ -0,0 +1,32 @@ +# -*- Mode: python; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +Library("fuzzer-interface") + +EXPORTS += [ + "FuzzingInterface.h", +] + +SOURCES += [ + "FuzzingInterface.cpp", +] + +if CONFIG["JS_STANDALONE"]: + FINAL_LIBRARY = "js" +else: + EXPORTS += [ + "FuzzingInterfaceStream.h", + ] + + SOURCES += [ + "FuzzingInterfaceStream.cpp", + ] + + DIRS += [ + "harness", + ] + + FINAL_LIBRARY = "xul-gtest" diff --git a/tools/fuzzing/ipc/IPCFuzzController.cpp b/tools/fuzzing/ipc/IPCFuzzController.cpp new file mode 100644 index 0000000000..649b7f780f --- /dev/null +++ b/tools/fuzzing/ipc/IPCFuzzController.cpp @@ -0,0 +1,848 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "IPCFuzzController.h" +#include "mozilla/Fuzzing.h" +#include "mozilla/SpinEventLoopUntil.h" +#include "mozilla/SyncRunnable.h" + +#include "nsIThread.h" +#include "nsThreadUtils.h" + +#include "mozilla/ipc/MessageChannel.h" +#include "mozilla/ipc/MessageLink.h" +#include "mozilla/ipc/ProtocolUtils.h" +#include "mozilla/ipc/NodeChannel.h" +#include "mozilla/ipc/NodeController.h" + +#include "mozilla/ipc/PIdleScheduler.h" +#include "mozilla/ipc/PBackground.h" +#include "mozilla/dom/PContent.h" + +using namespace mojo::core::ports; +using namespace mozilla::ipc; + +// Sync inject means that the actual fuzzing takes place on the I/O thread +// and hence it injects directly into the target NodeChannel. In async mode, +// we run the fuzzing on a separate thread and dispatch the runnable that +// injects the message back to the I/O thread. Both approaches seem to work +// and have advantages and disadvantages. Blocking the I/O thread means no +// IPC between other processes will interfere with our fuzzing in the meantime +// but blocking could also cause hangs when such IPC is required during the +// fuzzing runtime for some reason. +// #define MOZ_FUZZ_IPC_SYNC_INJECT 1 + +// For debugging purposes, it can be helpful to synchronize after each message +// rather than after each iteration, to see which messages are particularly +// slow or cause a hang. Without this, synchronization will occur at the end +// of each iteration as well as after each constructor message. +// #define MOZ_FUZZ_IPC_SYNC_AFTER_EACH_MSG + +namespace mozilla { +namespace fuzzing { + +IPCFuzzController::IPCFuzzController() : mMutex("IPCFuzzController") { + InitializeIPCTypes(); + + // We use 6 bits for port index selection without wrapping, so we just + // create 64 empty rows in our port matrix. Not all of these rows will + // be used though. + portNames.resize(64); + + // This is our port / toplevel actor ordering. Add new toplevel actors + // here to support them in the fuzzer. Do *NOT* change the order of + // these, as it will invalidate our fuzzing corpus. + portNameToIndex["PContent"] = 0; + portNameToIndex["PBackground"] = 1; + portNameToIndex["PBackgroundStarter"] = 2; + portNameToIndex["PCompositorManager"] = 3; + portNameToIndex["PImageBridge"] = 4; + portNameToIndex["PProcessHangMonitor"] = 5; + portNameToIndex["PProfiler"] = 6; + portNameToIndex["PVRManager"] = 7; + portNameToIndex["PCanvasManager"] = 8; +} + +// static +IPCFuzzController& IPCFuzzController::instance() { + static IPCFuzzController ifc; + return ifc; +} + +void IPCFuzzController::InitializeIPCTypes() { + const char* cons = "Constructor"; + size_t cons_len = strlen(cons); + + for (uint32_t start = 0; start < LastMsgIndex; ++start) { + uint32_t i; + for (i = (start << 16) + 1; i < ((start + 1) << 16); ++i) { + const char* name = IPC::StringFromIPCMessageType(i); + + if (name[0] == '<') break; + + size_t len = strlen(name); + if (len > cons_len && !memcmp(cons, name + len - cons_len, cons_len)) { + constructorTypes.insert(i); + } + } + + validMsgTypes[(ProtocolId)start] = i - ((start << 16) + 1); + } +} + +bool IPCFuzzController::GetRandomIPCMessageType(ProtocolId pId, + uint16_t typeOffset, + uint32_t* type) { + auto pIdEntry = validMsgTypes.find(pId); + if (pIdEntry == validMsgTypes.end()) { + return false; + } + + *type = + ((uint32_t)pIdEntry->first << 16) + 1 + (typeOffset % pIdEntry->second); + return true; +} + +void IPCFuzzController::OnActorConnected(IProtocol* protocol) { + if (!XRE_IsParentProcess()) { + return; + } + +#ifdef FUZZ_DEBUG + MOZ_FUZZING_NYX_PRINTF("INFO: [OnActorConnected] ActorID %d Protocol: %s\n", + protocol->Id(), protocol->GetProtocolName()); +#endif + + MessageChannel* channel = protocol->ToplevelProtocol()->GetIPCChannel(); + + Maybe<PortName> portName = channel->GetPortName(); + if (portName) { + MOZ_FUZZING_NYX_DEBUG( + "DEBUG: IPCFuzzController::OnActorConnected() Mutex try\n"); + // Called on background threads and modifies `actorIds`. + MutexAutoLock lock(mMutex); + MOZ_FUZZING_NYX_DEBUG( + "DEBUG: IPCFuzzController::OnActorConnected() Mutex locked\n"); + actorIds[*portName].emplace_back(protocol->Id(), protocol->GetProtocolId()); + + // Fix the port we will be using for at least the next 5 messages + useLastPortName = true; + lastActorPortName = *portName; + + // Use this actor for the next 5 messages + useLastActor = 5; + } else { + MOZ_FUZZING_NYX_PRINT("WARNING: No port name on actor?!\n"); + } +} + +void IPCFuzzController::OnActorDestroyed(IProtocol* protocol) { + if (!XRE_IsParentProcess()) { + return; + } + + MOZ_FUZZING_NYX_PRINTF("INFO: [OnActorDestroyed] ActorID %d Protocol: %s\n", + protocol->Id(), protocol->GetProtocolName()); + + MessageChannel* channel = protocol->ToplevelProtocol()->GetIPCChannel(); + + Maybe<PortName> portName = channel->GetPortName(); + if (portName) { + MOZ_FUZZING_NYX_DEBUG( + "DEBUG: IPCFuzzController::OnActorDestroyed() Mutex try\n"); + // Called on background threads and modifies `actorIds`. + MutexAutoLock lock(mMutex); + MOZ_FUZZING_NYX_DEBUG( + "DEBUG: IPCFuzzController::OnActorDestroyed() Mutex locked\n"); + + for (auto iter = actorIds[*portName].begin(); + iter != actorIds[*portName].end();) { + if (iter->first == protocol->Id() && + iter->second == protocol->GetProtocolId()) { + iter = actorIds[*portName].erase(iter); + } else { + ++iter; + } + } + } else { + MOZ_FUZZING_NYX_PRINT("WARNING: No port name on destroyed actor?!\n"); + } +} + +void IPCFuzzController::AddToplevelActor(PortName name, ProtocolId protocolId) { + const char* protocolName = ProtocolIdToName(protocolId); + auto result = portNameToIndex.find(protocolName); + if (result == portNameToIndex.end()) { + MOZ_FUZZING_NYX_PRINTF( + "ERROR: [OnActorConnected] Unknown Top-Level Protocol: %s\n", + protocolName); + MOZ_FUZZING_NYX_ABORT("Unknown Top-Level Protocol\n"); + } + uint8_t portIndex = result->second; + portNames[portIndex].push_back(name); +} + +bool IPCFuzzController::ObserveIPCMessage(mozilla::ipc::NodeChannel* channel, + IPC::Message& aMessage) { + if (!mozilla::fuzzing::Nyx::instance().is_enabled("IPC_Generic")) { + // Fuzzer is not enabled. + return true; + } + + if (!XRE_IsParentProcess()) { + // For now we only care about things in the parent process. + return true; + } + + if (aMessage.IsFuzzMsg()) { + // Don't observe our own messages. If this is the first fuzzing message, + // we also block further non-fuzzing communication on that node. + if (!channel->mBlockSendRecv) { + MOZ_FUZZING_NYX_PRINTF( + "INFO: [NodeChannel::OnMessageReceived] Blocking further " + "communication on Port %lu %lu (seen fuzz msg)\n", + channel->GetName().v1, channel->GetName().v2); + channel->mBlockSendRecv = true; + } + return true; + } else if (aMessage.type() == dom::PContent::Msg_SignalFuzzingReady__ID) { + MOZ_FUZZING_NYX_PRINT("DEBUG: Ready message detected.\n"); + + // TODO: This is specific to PContent fuzzing. If we later want to fuzz + // a different process pair, we need additional signals here. + OnChildReady(); + + // The ready message indicates the right node name for us to work with + // and we should only ever receive it once. + if (haveTargetNodeName) { + MOZ_FUZZING_NYX_PRINT("ERROR: Received ready signal twice?!\n"); + MOZ_REALLY_CRASH(__LINE__); + } + + targetNodeName = channel->GetName(); + haveTargetNodeName = true; + + // We can also use this message as the base template for other messages + if (!this->sampleHeader.initLengthUninitialized( + sizeof(IPC::Message::Header))) { + MOZ_FUZZING_NYX_ABORT("sampleHeader.initLengthUninitialized failed\n"); + } + + memcpy(sampleHeader.begin(), aMessage.header(), + sizeof(IPC::Message::Header)); + } else if (haveTargetNodeName && targetNodeName != channel->GetName()) { + // Not our node, no need to observe + return true; + } else if (Nyx::instance().started()) { + // When fuzzing is already started, we shouldn't observe messages anymore. + if (!channel->mBlockSendRecv) { + MOZ_FUZZING_NYX_PRINTF( + "INFO: [NodeChannel::OnMessageReceived] Blocking further " + "communication on Port %lu %lu (fuzzing started)\n", + channel->GetName().v1, channel->GetName().v2); + channel->mBlockSendRecv = true; + } + return false; + } + + Vector<char, 256, InfallibleAllocPolicy> footer; + + if (!footer.initLengthUninitialized(aMessage.event_footer_size())) { + MOZ_FUZZING_NYX_ABORT("footer.initLengthUninitialized failed\n"); + } + + if (!aMessage.ReadFooter(footer.begin(), footer.length(), false)) { + MOZ_FUZZING_NYX_ABORT("ERROR: ReadFooter() failed?!\n"); + } + + UniquePtr<Event> event = Event::Deserialize(footer.begin(), footer.length()); + + if (!event) { + MOZ_FUZZING_NYX_ABORT("ERROR: Failed to deserialize observed message?!\n"); + } + + if (event->type() == Event::kUserMessage) { + if (haveTargetNodeName && !fuzzingStartPending) { + bool missingActor = false; + + // Check if we have any entries in our port map that we haven't seen yet + // though `OnActorConnected`. That method is called on a background + // thread and this call will race with the I/O thread. + { + MOZ_FUZZING_NYX_DEBUG( + "DEBUG: IPCFuzzController::ObserveIPCMessage() Mutex try\n"); + // Called on the I/O thread and reads `portSeqNos`. + // + // IMPORTANT: We must give up any locks before entering `StartFuzzing`, + // as we will never return. This would cause a deadlock with new actors + // being created and `OnActorConnected` being called. + MutexAutoLock lock(mMutex); + + MOZ_FUZZING_NYX_DEBUG( + "DEBUG: IPCFuzzController::ObserveIPCMessage() Mutex locked\n"); + + for (auto iter = portSeqNos.begin(); iter != portSeqNos.end(); ++iter) { + auto result = actorIds.find(iter->first); + if (result == actorIds.end()) { + // Make sure we only wait for actors that belong to us. + auto result = portNodeName.find(iter->first); + if (result->second == targetNodeName) { + missingActor = true; + break; + } + } + } + } + + if (missingActor) { + MOZ_FUZZING_NYX_PRINT( + "INFO: Delaying fuzzing start, missing actors...\n"); + } else if (!childReady) { + MOZ_FUZZING_NYX_PRINT( + "INFO: Delaying fuzzing start, waiting for child...\n"); + } else { + fuzzingStartPending = true; + StartFuzzing(channel, aMessage); + + // In the async case, we return and can already block the relevant + // communication. + if (targetNodeName == channel->GetName()) { + if (!channel->mBlockSendRecv) { + MOZ_FUZZING_NYX_PRINTF( + "INFO: [NodeChannel::OnMessageReceived] Blocking further " + "communication on Port %lu %lu (fuzzing start pending)\n", + channel->GetName().v1, channel->GetName().v2); + channel->mBlockSendRecv = true; + } + + return false; + } + return true; + } + } + + // Add/update sequence numbers. We need to make sure to do this after our + // call to `StartFuzzing` because once we start fuzzing, the message will + // never actually be processed, so we run into a sequence number desync. + { + // Get the port name associated with this message + UserMessageEvent* userMsgEv = static_cast<UserMessageEvent*>(event.get()); + PortName name = event->port_name(); + + // Called on the I/O thread and modifies `portSeqNos`. + MutexAutoLock lock(mMutex); + portSeqNos.insert_or_assign( + name, std::pair<int32_t, uint64_t>(aMessage.seqno(), + userMsgEv->sequence_num())); + + portNodeName.insert_or_assign(name, channel->GetName()); + } + } + + return true; +} + +bool IPCFuzzController::MakeTargetDecision( + uint8_t portIndex, uint8_t portInstanceIndex, uint8_t actorIndex, + uint16_t typeOffset, PortName* name, int32_t* seqno, uint64_t* fseqno, + int32_t* actorId, uint32_t* type, bool* is_cons, bool update) { + // Every possible toplevel actor type has a fixed number that + // we assign to it in the constructor of this class. Here, we + // use the lower 6 bits to select this toplevel actor type. + // This approach has the advantage that the tests will always + // select the same toplevel actor type deterministically, + // independent of the order they appeared and independent + // of the type of fuzzing we are doing. + auto portInstances = portNames[portIndex & 0x3f]; + if (!portInstances.size()) { + return false; + } + + if (useLastActor) { + useLastActor--; + *name = lastActorPortName; + + MOZ_FUZZING_NYX_PRINT("DEBUG: MakeTargetDecision: Pinned to last actor.\n"); + + // Once we stop pinning to the last actor, we need to decide if we + // want to keep the pinning on the port itself. We use one of the + // unused upper bits of portIndex for this purpose. + if (!useLastActor && (portIndex & (1 << 7))) { + MOZ_FUZZING_NYX_PRINT( + "DEBUG: MakeTargetDecision: Released pinning on last port.\n"); + useLastPortName = false; + } + } else if (useLastPortName) { + *name = lastActorPortName; + MOZ_FUZZING_NYX_PRINT("DEBUG: MakeTargetDecision: Pinned to last port.\n"); + } else { + *name = portInstances[portInstanceIndex % portInstances.size()]; + } + + // We should always have at least one actor per port + auto result = actorIds.find(*name); + if (result == actorIds.end()) { + MOZ_FUZZING_NYX_PRINT("ERROR: Couldn't find port in actors map?!\n"); + return false; + } + + // Find a random actor on this port + auto actors = result->second; + if (actors.empty()) { + MOZ_FUZZING_NYX_PRINT( + "ERROR: Couldn't find an actor for selected port?!\n"); + return false; + } + + auto seqNos = portSeqNos[*name]; + + // Hand out the correct sequence numbers + *seqno = seqNos.first - 1; + *fseqno = seqNos.second + 1; + + if (update) { + portSeqNos.insert_or_assign(*name, + std::pair<int32_t, uint64_t>(*seqno, *fseqno)); + } + + if (useLastActor) { + actorIndex = actors.size() - 1; + } else { + actorIndex %= actors.size(); + } + + ActorIdPair ids = actors[actorIndex]; + *actorId = ids.first; + + // If the actor ID is 0, then we are talking to the toplevel actor + // of this port. Hence we must set the ID to MSG_ROUTING_CONTROL. + if (!*actorId) { + *actorId = MSG_ROUTING_CONTROL; + } + + if (!this->GetRandomIPCMessageType(ids.second, typeOffset, type)) { + MOZ_FUZZING_NYX_PRINT("ERROR: GetRandomIPCMessageType failed?!\n"); + return false; + } + + *is_cons = false; + if (constructorTypes.find(*type) != constructorTypes.end()) { + *is_cons = true; + } + +#ifdef FUZZ_DEBUG + MOZ_FUZZING_NYX_PRINTF( + "DEBUG: MakeTargetDecision: Protocol: %s msgType: %s\n", + ProtocolIdToName(ids.second), IPC::StringFromIPCMessageType(*type)); +#endif + + return true; +} + +void IPCFuzzController::OnMessageTaskStart() { messageStartCount++; } + +void IPCFuzzController::OnMessageTaskStop() { messageStopCount++; } + +void IPCFuzzController::OnPreFuzzMessageTaskRun() { messageTaskCount++; } +void IPCFuzzController::OnPreFuzzMessageTaskStop() { messageTaskCount--; } + +void IPCFuzzController::OnDropPeer(const char* reason = nullptr, + const char* file = nullptr, int line = 0) { + if (!XRE_IsParentProcess()) { + return; + } + + if (!Nyx::instance().started()) { + // It's possible to close a connection to some peer before we have even + // started fuzzing. We ignore these events until we are actually fuzzing. + return; + } + + MOZ_FUZZING_NYX_PRINT( + "ERROR: ======== END OF ITERATION (DROP_PEER) ========\n"); +#ifdef FUZZ_DEBUG + MOZ_FUZZING_NYX_PRINTF("DEBUG: ======== %s:%d ========\n", file, line); +#endif + Nyx::instance().handle_event("MOZ_IPC_DROP_PEER", file, line, reason); + + if (Nyx::instance().is_replay()) { + // In replay mode, let's ignore drop peer to avoid races with it. + return; + } + + Nyx::instance().release(IPCFuzzController::instance().getMessageStopCount()); +} + +void IPCFuzzController::StartFuzzing(mozilla::ipc::NodeChannel* channel, + IPC::Message& aMessage) { + nodeChannel = channel; + + RefPtr<IPCFuzzLoop> runnable = new IPCFuzzLoop(); + +#if MOZ_FUZZ_IPC_SYNC_INJECT + runnable->Run(); +#else + nsCOMPtr<nsIThread> newThread; + nsresult rv = + NS_NewNamedThread("IPCFuzzLoop", getter_AddRefs(newThread), runnable); + + if (NS_FAILED(rv)) { + MOZ_FUZZING_NYX_ABORT("ERROR: [StartFuzzing] NS_NewNamedThread failed?!\n"); + } +#endif +} + +IPCFuzzController::IPCFuzzLoop::IPCFuzzLoop() + : mozilla::Runnable("IPCFuzzLoop") {} + +NS_IMETHODIMP IPCFuzzController::IPCFuzzLoop::Run() { + MOZ_FUZZING_NYX_DEBUG("DEBUG: BEGIN IPCFuzzLoop::Run()\n"); + + const size_t maxMsgSize = 2048; + const size_t controlLen = 16; + + Vector<char, 256, InfallibleAllocPolicy> buffer; + + RefPtr<NodeController> controller = NodeController::GetSingleton(); + + // TODO: The following code is full of data races. We need synchronization + // on the `IPCFuzzController` instance, because the I/O thread can call into + // this class via ObserveIPCMessages. The problem is that any such call + // must either be observed to update the sequence numbers, or the packet + // must be dropped already. + if (!IPCFuzzController::instance().haveTargetNodeName) { + MOZ_FUZZING_NYX_ABORT("ERROR: I don't have the target NodeName?!\n"); + } + + { + MOZ_FUZZING_NYX_DEBUG("DEBUG: IPCFuzzLoop::Run() Mutex try\n"); + // Called on the I/O thread and modifies `portSeqNos` and `actorIds`. + MutexAutoLock lock(IPCFuzzController::instance().mMutex); + MOZ_FUZZING_NYX_DEBUG("DEBUG: IPCFuzzLoop::Run() Mutex locked\n"); + + // The wait/delay logic in ObserveIPCMessage should ensure that we haven't + // seen any packets on ports for which we haven't received actor information + // yet, if those ports belong to our channel. However, we might also have + // seen ports not belonging to our channel, which we have to remove now. + for (auto iter = IPCFuzzController::instance().portSeqNos.begin(); + iter != IPCFuzzController::instance().portSeqNos.end();) { + auto result = IPCFuzzController::instance().actorIds.find(iter->first); + if (result == IPCFuzzController::instance().actorIds.end()) { + auto portNameResult = + IPCFuzzController::instance().portNodeName.find(iter->first); + if (portNameResult->second == + IPCFuzzController::instance().targetNodeName) { + MOZ_FUZZING_NYX_PRINT( + "ERROR: We should not have port map entries without a " + "corresponding " + "entry in our actors map\n"); + MOZ_REALLY_CRASH(__LINE__); + } else { + iter = IPCFuzzController::instance().portSeqNos.erase(iter); + } + } else { + ++iter; + } + } + + // TODO: Technically, at this point we only know that PContent (or whatever + // toplevel protocol we decided to synchronize on), is present. It might + // be possible that others aren't created yet and we are racing on this. + // + // Note: The delay logic mentioned above makes this less likely. Only actors + // which are created on-demand and which have not been referenced yet at all + // would be affected by such a race. + for (auto iter = IPCFuzzController::instance().actorIds.begin(); + iter != IPCFuzzController::instance().actorIds.end(); ++iter) { + bool isValidTarget = false; + Maybe<PortStatus> status; + PortRef ref = controller->GetPort(iter->first); + if (ref.is_valid()) { + status = controller->GetStatus(ref); + if (status) { + isValidTarget = status->peer_node_name == + IPCFuzzController::instance().targetNodeName; + } + } + + auto result = IPCFuzzController::instance().portSeqNos.find(iter->first); + if (result == IPCFuzzController::instance().portSeqNos.end()) { + if (isValidTarget) { + MOZ_FUZZING_NYX_PRINTF( + "INFO: Using Port %lu %lu for protocol %s (*)\n", iter->first.v1, + iter->first.v2, ProtocolIdToName(iter->second[0].second)); + + // Normally the start sequence numbers would be -1 and 1, but our map + // does not record the next numbers, but the "last seen" state. So we + // have to adjust these so the next calculated sequence number pair + // matches the start sequence numbers. + IPCFuzzController::instance().portSeqNos.insert_or_assign( + iter->first, std::pair<int32_t, uint64_t>(0, 0)); + + IPCFuzzController::instance().AddToplevelActor( + iter->first, iter->second[0].second); + + } else { + MOZ_FUZZING_NYX_PRINTF( + "INFO: Removing Port %lu %lu for protocol %s (*)\n", + iter->first.v1, iter->first.v2, + ProtocolIdToName(iter->second[0].second)); + + // This toplevel actor does not belong to us, but we haven't added + // it to `portSeqNos`, so we don't have to remove it. + } + } else { + if (isValidTarget) { + MOZ_FUZZING_NYX_PRINTF("INFO: Using Port %lu %lu for protocol %s\n", + iter->first.v1, iter->first.v2, + ProtocolIdToName(iter->second[0].second)); + + IPCFuzzController::instance().AddToplevelActor( + iter->first, iter->second[0].second); + } else { + MOZ_FUZZING_NYX_PRINTF( + "INFO: Removing Port %lu %lu for protocol %s\n", iter->first.v1, + iter->first.v2, ProtocolIdToName(iter->second[0].second)); + + // This toplevel actor does not belong to us, so remove it. + IPCFuzzController::instance().portSeqNos.erase(result); + } + } + } + } + + IPCFuzzController::instance().runnableDone = false; + + SyncRunnable::DispatchToThread( + GetMainThreadSerialEventTarget(), + NS_NewRunnableFunction("IPCFuzzController::StartFuzzing", [&]() -> void { + MOZ_FUZZING_NYX_PRINT("INFO: Main thread runnable start.\n"); + NS_ProcessPendingEvents(NS_GetCurrentThread()); + MOZ_FUZZING_NYX_PRINT("INFO: Main thread runnable done.\n"); + })); + + MOZ_FUZZING_NYX_PRINT("INFO: Performing snapshot...\n"); + Nyx::instance().start(); + + uint32_t expected_messages = 0; + + IPCFuzzController::instance().useLastActor = 0; + IPCFuzzController::instance().useLastPortName = false; + + if (!buffer.initLengthUninitialized(maxMsgSize)) { + MOZ_FUZZING_NYX_ABORT("ERROR: Failed to initialize buffer!\n"); + } + + for (int i = 0; i < 16; ++i) { + // Grab enough data to potentially fill our everything except the footer. + uint32_t bufsize = + Nyx::instance().get_data((uint8_t*)buffer.begin(), buffer.length()); + + if (bufsize == 0xFFFFFFFF) { + // Done constructing + MOZ_FUZZING_NYX_DEBUG("Iteration complete: Out of data.\n"); + break; + } + + // Payload must be int aligned + bufsize -= bufsize % 4; + + // Need at least a header and the control bytes. + if (bufsize < sizeof(IPC::Message::Header) + controlLen) { + MOZ_FUZZING_NYX_DEBUG("INFO: Not enough data to craft IPC message.\n"); + continue; + } + + buffer.shrinkTo(bufsize); + + const uint8_t* controlData = (uint8_t*)buffer.begin(); + + char* ipcMsgData = buffer.begin() + controlLen; + size_t ipcMsgLen = buffer.length() - controlLen; + + // Copy the header of the original message + memcpy(ipcMsgData, IPCFuzzController::instance().sampleHeader.begin(), + sizeof(IPC::Message::Header)); + + IPC::Message::Header* ipchdr = (IPC::Message::Header*)ipcMsgData; + + ipchdr->payload_size = ipcMsgLen - sizeof(IPC::Message::Header); + + PortName new_port_name; + int32_t new_seqno; + uint64_t new_fseqno; + + int32_t actorId; + uint32_t msgType; + bool isConstructor; + // Control Data Layout (16 byte) + // Byte 0 - Port Index (selects out of the valid ports seen) + // Byte 1 - Actor Index (selects one of the actors for that port) + // Byte 2 - Type Offset (select valid type for the specified actor) + // Byte 3 - ^- continued + // Byte 4 - Sync Bit + // Byte 5 - Optionally select a particular instance of the selected + // port type. Some toplevel protocols can have multiple + // instances running at the same time. + + uint8_t portIndex = controlData[0]; + uint8_t actorIndex = controlData[1]; + uint16_t typeOffset = *(uint16_t*)(&controlData[2]); + bool isSync = controlData[4] > 127; + uint8_t portInstanceIndex = controlData[5]; + + if (!IPCFuzzController::instance().MakeTargetDecision( + portIndex, portInstanceIndex, actorIndex, typeOffset, + &new_port_name, &new_seqno, &new_fseqno, &actorId, &msgType, + &isConstructor)) { + MOZ_FUZZING_NYX_DEBUG("DEBUG: MakeTargetDecision returned false.\n"); + continue; + } + + if (Nyx::instance().is_replay()) { + MOZ_FUZZING_NYX_PRINT("INFO: Replaying IPC packet with payload:\n"); + for (uint32_t i = 0; i < ipcMsgLen - sizeof(IPC::Message::Header); ++i) { + if (i % 16 == 0) { + MOZ_FUZZING_NYX_PRINT("\n "); + } + + MOZ_FUZZING_NYX_PRINTF( + "0x%02X ", + (unsigned char)(ipcMsgData[sizeof(IPC::Message::Header) + i])); + } + MOZ_FUZZING_NYX_PRINT("\n"); + } + + UniquePtr<IPC::Message> msg(new IPC::Message(ipcMsgData, ipcMsgLen)); + + if (isConstructor) { + MOZ_FUZZING_NYX_DEBUG("DEBUG: Sending constructor message...\n"); + msg->header()->flags.SetConstructor(); + } + + if (!isConstructor && isSync) { + MOZ_FUZZING_NYX_DEBUG("INFO: Sending sync message...\n"); + msg->header()->flags.SetSync(); + } + + msg->set_seqno(new_seqno); + msg->set_routing_id(actorId); + + // TODO: There is no setter for this. + msg->header()->type = msgType; + + // Create the footer + auto messageEvent = MakeUnique<UserMessageEvent>(0); + messageEvent->set_port_name(new_port_name); + messageEvent->set_sequence_num(new_fseqno); + + Vector<char, 256, InfallibleAllocPolicy> footerBuffer; + (void)footerBuffer.initLengthUninitialized( + messageEvent->GetSerializedSize()); + messageEvent->Serialize(footerBuffer.begin()); + + msg->WriteFooter(footerBuffer.begin(), footerBuffer.length()); + msg->set_event_footer_size(footerBuffer.length()); + + // This marks the message as a fuzzing message. Without this, it will + // be ignored by MessageTask and also not even scheduled by NodeChannel + // in asynchronous mode. We use this to ignore any IPC activity that + // happens just while we are fuzzing. + msg->SetFuzzMsg(); + +#ifdef FUZZ_DEBUG + MOZ_FUZZING_NYX_PRINTF( + "DEBUG: OnEventMessage iteration %d, EVS: %u Payload: %u.\n", i, + ipchdr->event_footer_size, ipchdr->payload_size); +#endif + +#ifdef FUZZ_DEBUG + MOZ_FUZZING_NYX_PRINTF("DEBUG: OnEventMessage: Port %lu %lu. Actor %d\n", + new_port_name.v1, new_port_name.v2, actorId); + MOZ_FUZZING_NYX_PRINTF( + "DEBUG: OnEventMessage: Flags: %u TxID: %d Handles: %u\n", + msg->header()->flags, msg->header()->txid, msg->header()->num_handles); +#endif + + // The number of messages we expect to see stopped. + expected_messages++; + +#if MOZ_FUZZ_IPC_SYNC_INJECT + // For synchronous injection, we just call OnMessageReceived directly. + IPCFuzzController::instance().nodeChannel->OnMessageReceived( + std::move(msg)); +#else + // For asynchronous injection, we have to post to the I/O thread instead. + XRE_GetIOMessageLoop()->PostTask(NS_NewRunnableFunction( + "NodeChannel::OnMessageReceived", + [msg = std::move(msg), + nodeChannel = + RefPtr{IPCFuzzController::instance().nodeChannel}]() mutable { + nodeChannel->OnMessageReceived(std::move(msg)); + })); +#endif + +#ifdef MOZ_FUZZ_IPC_SYNC_AFTER_EACH_MSG + MOZ_FUZZING_NYX_DEBUG("DEBUG: Synchronizing after message...\n"); + IPCFuzzController::instance().SynchronizeOnMessageExecution( + expected_messages); +#else + + if (isConstructor) { + MOZ_FUZZING_NYX_DEBUG( + "DEBUG: Synchronizing due to constructor message...\n"); + IPCFuzzController::instance().SynchronizeOnMessageExecution( + expected_messages); + } +#endif + } + + MOZ_FUZZING_NYX_DEBUG("DEBUG: Synchronizing due to end of iteration...\n"); + IPCFuzzController::instance().SynchronizeOnMessageExecution( + expected_messages); + + MOZ_FUZZING_NYX_DEBUG( + "DEBUG: ======== END OF ITERATION (RELEASE) ========\n"); + + Nyx::instance().release(IPCFuzzController::instance().getMessageStopCount()); + + // Never reached. + return NS_OK; +} + +void IPCFuzzController::SynchronizeOnMessageExecution( + uint32_t expected_messages) { + // This synchronization will work in both the sync and async case. + // For the async case, it is important to wait for the exact stop count + // because the message task is not even started potentially when we + // read this loop. + int hang_timeout = 10 * 1000; + while (IPCFuzzController::instance().getMessageStopCount() != + expected_messages) { +#ifdef FUZZ_DEBUG + uint32_t count_stopped = + IPCFuzzController::instance().getMessageStopCount(); + uint32_t count_live = IPCFuzzController::instance().getMessageStartCount(); + MOZ_FUZZING_NYX_PRINTF( + "DEBUG: Post Constructor: %d stopped messages (%d live, %d " + "expected)!\n", + count_stopped, count_live, expected_messages); +#endif + PR_Sleep(PR_MillisecondsToInterval(50)); + hang_timeout -= 50; + + if (hang_timeout <= 0) { + Nyx::instance().handle_event("MOZ_TIMEOUT", nullptr, 0, nullptr); + MOZ_FUZZING_NYX_PRINT( + "ERROR: ======== END OF ITERATION (TIMEOUT) ========\n"); + Nyx::instance().release( + IPCFuzzController::instance().getMessageStopCount()); + } + } +} + +} // namespace fuzzing +} // namespace mozilla diff --git a/tools/fuzzing/ipc/IPCFuzzController.h b/tools/fuzzing/ipc/IPCFuzzController.h new file mode 100644 index 0000000000..faf89d9f81 --- /dev/null +++ b/tools/fuzzing/ipc/IPCFuzzController.h @@ -0,0 +1,193 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef mozilla_ipc_IPCFuzzController_h +#define mozilla_ipc_IPCFuzzController_h + +#include "mozilla/Atomics.h" +#include "mozilla/Attributes.h" +#include "mozilla/HashTable.h" +#include "mozilla/Mutex.h" +#include "mozilla/fuzzing/Nyx.h" + +#include "nsIRunnable.h" +#include "nsThreadUtils.h" + +#include "chrome/common/ipc_message.h" +#include "mojo/core/ports/name.h" +#include "mojo/core/ports/event.h" + +#include "IPCMessageStart.h" + +#include <unordered_map> +#include <unordered_set> +#include <string> +#include <utility> +#include <vector> + +#define MOZ_FUZZING_IPC_DROP_PEER(aReason) \ + mozilla::fuzzing::IPCFuzzController::instance().OnDropPeer( \ + aReason, __FILE__, __LINE__); + +#define MOZ_FUZZING_IPC_MT_CTOR() \ + mozilla::fuzzing::IPCFuzzController::instance().OnMessageTaskStart(); + +#define MOZ_FUZZING_IPC_MT_STOP() \ + mozilla::fuzzing::IPCFuzzController::instance().OnMessageTaskStop(); + +#define MOZ_FUZZING_IPC_PRE_FUZZ_MT_RUN() \ + mozilla::fuzzing::IPCFuzzController::instance().OnPreFuzzMessageTaskRun(); + +#define MOZ_FUZZING_IPC_PRE_FUZZ_MT_STOP() \ + mozilla::fuzzing::IPCFuzzController::instance().OnPreFuzzMessageTaskStop(); + +namespace mozilla { + +namespace ipc { +// We can't include ProtocolUtils.h here +class IProtocol; +typedef IPCMessageStart ProtocolId; + +class NodeChannel; +} // namespace ipc + +namespace fuzzing { + +class IPCFuzzController { + typedef std::pair<int32_t, uint64_t> SeqNoPair; + + typedef std::pair<int32_t, mozilla::ipc::ProtocolId> ActorIdPair; + + class IPCFuzzLoop final : public Runnable { + friend class IPCFuzzController; + + public: + NS_DECL_NSIRUNNABLE + + IPCFuzzLoop(); + + private: + ~IPCFuzzLoop() = default; + }; + + public: + static IPCFuzzController& instance(); + + void InitializeIPCTypes(); + bool GetRandomIPCMessageType(mozilla::ipc::ProtocolId pId, + uint16_t typeOffset, uint32_t* type); + + bool ObserveIPCMessage(mozilla::ipc::NodeChannel* channel, + IPC::Message& aMessage); + bool MakeTargetDecision(uint8_t portIndex, uint8_t portInstanceIndex, + uint8_t actorIndex, uint16_t typeOffset, + mojo::core::ports::PortName* name, int32_t* seqno, + uint64_t* fseqno, int32_t* actorId, uint32_t* type, + bool* is_cons, bool update = true); + + void OnActorConnected(mozilla::ipc::IProtocol* protocol); + void OnActorDestroyed(mozilla::ipc::IProtocol* protocol); + void OnDropPeer(const char* reason, const char* file, int line); + void OnMessageTaskStart(); + void OnMessageTaskStop(); + void OnPreFuzzMessageTaskRun(); + void OnPreFuzzMessageTaskStop(); + void OnChildReady() { childReady = true; } + void OnRunnableDone() { runnableDone = true; } + + uint32_t getPreFuzzMessageTaskCount() { return messageTaskCount; }; + uint32_t getMessageStartCount() { return messageStartCount; }; + uint32_t getMessageStopCount() { return messageStopCount; }; + + void StartFuzzing(mozilla::ipc::NodeChannel* channel, IPC::Message& aMessage); + + void SynchronizeOnMessageExecution(uint32_t expected_messages); + void AddToplevelActor(mojo::core::ports::PortName name, + mozilla::ipc::ProtocolId protocolId); + + private: + // This is a mapping from port name to a pair of last seen sequence numbers. + std::unordered_map<mojo::core::ports::PortName, SeqNoPair> portSeqNos; + + // This is a mapping from port name to node name. + std::unordered_map<mojo::core::ports::PortName, mojo::core::ports::NodeName> + portNodeName; + + // This maps each ProtocolId (IPCMessageStart) to the number of valid + // messages for that particular type. + std::unordered_map<mozilla::ipc::ProtocolId, uint32_t> validMsgTypes; + + // This is a mapping from port name to pairs of actor Id and ProtocolId. + std::unordered_map<mojo::core::ports::PortName, std::vector<ActorIdPair>> + actorIds; + + // If set, `lastActorPortName` is valid and fuzzing is pinned to this port. + Atomic<bool> useLastPortName; + + // Last port where a new actor appeared. Only valid with `useLastPortName`. + mojo::core::ports::PortName lastActorPortName; + + // Counter to indicate how long fuzzing should stay pinned to the last + // actor that appeared on `lastActorPortName`. + Atomic<uint32_t> useLastActor; + + // This is the deterministic ordering of toplevel actors for fuzzing. + // In this matrix, each row (toplevel index) corresponds to one toplevel + // actor *type* while each entry in that row is an instance of that type, + // since some actors usually have multiple instances alive while others + // don't. For the exact ordering, please check the constructor for this + // class. + std::vector<std::vector<mojo::core::ports::PortName>> portNames; + std::unordered_map<std::string, uint8_t> portNameToIndex; + + // This is a set of all types that are constructors. + std::unordered_set<uint32_t> constructorTypes; + + // This is the name of the target node. We select one Node based on a + // particular toplevel actor and then use this to pull in additional + // toplevel actors that are on the same node (i.e. belong to the same + // process pair). + mojo::core::ports::NodeName targetNodeName; + bool haveTargetNodeName = false; + + // This indicates that we have started the fuzzing thread and fuzzing will + // begin shortly. + bool fuzzingStartPending = false; + + // This is used as a signal from other threads that runnables we dispatched + // are completed. Right now we use this only when dispatching to the main + // thread to await the completion of all pending events. + Atomic<bool> runnableDone; + + // This is used to signal that the other process we are talking to is ready + // to start fuzzing. In the case of Parent <-> Child, a special IPC message + // is used to signal this. We might not be able to start fuzzing immediately + // hough if not all toplevel actors have been created yet. + Atomic<bool> childReady; + + // Current amount of pending message tasks. + Atomic<uint32_t> messageStartCount; + Atomic<uint32_t> messageStopCount; + + Atomic<uint32_t> messageTaskCount; + + Vector<char, 256, InfallibleAllocPolicy> sampleHeader; + + mozilla::ipc::NodeChannel* nodeChannel = nullptr; + + // This class is used both on the I/O and background threads as well as + // its own fuzzing thread. Those methods that alter non-threadsafe data + // structures need to aquire this mutex first. + Mutex mMutex; // MOZ_UNANNOTATED; + + IPCFuzzController(); + NYX_DISALLOW_COPY_AND_ASSIGN(IPCFuzzController); +}; + +} // namespace fuzzing +} // namespace mozilla + +#endif diff --git a/tools/fuzzing/ipc/moz.build b/tools/fuzzing/ipc/moz.build new file mode 100644 index 0000000000..001c67ed37 --- /dev/null +++ b/tools/fuzzing/ipc/moz.build @@ -0,0 +1,27 @@ +# -*- Mode: python; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +Library("fuzzer-ipc-protocol") + +LOCAL_INCLUDES += [ + "/dom/base", + "/dom/ipc", +] + +SOURCES += [ + "IPCFuzzController.cpp", +] + +EXPORTS.mozilla.fuzzing += [ + "IPCFuzzController.h", +] + +include("/ipc/chromium/chromium-config.mozbuild") + +FINAL_LIBRARY = "xul" + +# Add libFuzzer configuration directives +include("/tools/fuzzing/libfuzzer-config.mozbuild") diff --git a/tools/fuzzing/libfuzzer-config.mozbuild b/tools/fuzzing/libfuzzer-config.mozbuild new file mode 100644 index 0000000000..91998e0050 --- /dev/null +++ b/tools/fuzzing/libfuzzer-config.mozbuild @@ -0,0 +1,13 @@ +# -*- Mode: python; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +include('libfuzzer-flags.mozbuild') + +if CONFIG['FUZZING']: + if CONFIG['LIBFUZZER']: + # Add trace-pc coverage for libfuzzer + CFLAGS += libfuzzer_flags + CXXFLAGS += libfuzzer_flags diff --git a/tools/fuzzing/libfuzzer-flags.mozbuild b/tools/fuzzing/libfuzzer-flags.mozbuild new file mode 100644 index 0000000000..97f9c6cf45 --- /dev/null +++ b/tools/fuzzing/libfuzzer-flags.mozbuild @@ -0,0 +1,11 @@ +# -*- Mode: python; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +libfuzzer_flags = [] + +if CONFIG['LIBFUZZER_FLAGS']: + libfuzzer_flags += CONFIG['LIBFUZZER_FLAGS'] + diff --git a/tools/fuzzing/libfuzzer/FuzzerBuiltins.h b/tools/fuzzing/libfuzzer/FuzzerBuiltins.h new file mode 100644 index 0000000000..4c0ada8266 --- /dev/null +++ b/tools/fuzzing/libfuzzer/FuzzerBuiltins.h @@ -0,0 +1,35 @@ +//===- FuzzerBuiltins.h - Internal header for builtins ----------*- C++ -* ===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Wrapper functions and marcos around builtin functions. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_FUZZER_BUILTINS_H +#define LLVM_FUZZER_BUILTINS_H + +#include "FuzzerPlatform.h" + +#if !LIBFUZZER_MSVC +#include <cstdint> + +#define GET_CALLER_PC() __builtin_return_address(0) + +namespace fuzzer { + +inline uint8_t Bswap(uint8_t x) { return x; } +inline uint16_t Bswap(uint16_t x) { return __builtin_bswap16(x); } +inline uint32_t Bswap(uint32_t x) { return __builtin_bswap32(x); } +inline uint64_t Bswap(uint64_t x) { return __builtin_bswap64(x); } + +inline uint32_t Clzll(unsigned long long X) { return __builtin_clzll(X); } +inline uint32_t Clz(unsigned long long X) { return __builtin_clz(X); } +inline int Popcountll(unsigned long long X) { return __builtin_popcountll(X); } + +} // namespace fuzzer + +#endif // !LIBFUZZER_MSVC +#endif // LLVM_FUZZER_BUILTINS_H diff --git a/tools/fuzzing/libfuzzer/FuzzerBuiltinsMsvc.h b/tools/fuzzing/libfuzzer/FuzzerBuiltinsMsvc.h new file mode 100644 index 0000000000..c5bec9787d --- /dev/null +++ b/tools/fuzzing/libfuzzer/FuzzerBuiltinsMsvc.h @@ -0,0 +1,72 @@ +//===- FuzzerBuiltinsMSVC.h - Internal header for builtins ------*- C++ -* ===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Wrapper functions and marcos that use intrinsics instead of builtin functions +// which cannot be compiled by MSVC. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_FUZZER_BUILTINS_MSVC_H +#define LLVM_FUZZER_BUILTINS_MSVC_H + +#include "FuzzerPlatform.h" + +#if LIBFUZZER_MSVC +#include <intrin.h> +#include <cstdint> +#include <cstdlib> + +// __builtin_return_address() cannot be compiled with MSVC. Use the equivalent +// from <intrin.h> +#define GET_CALLER_PC() _ReturnAddress() + +namespace fuzzer { + +inline uint8_t Bswap(uint8_t x) { return x; } +// Use alternatives to __builtin functions from <stdlib.h> and <intrin.h> on +// Windows since the builtins are not supported by MSVC. +inline uint16_t Bswap(uint16_t x) { return _byteswap_ushort(x); } +inline uint32_t Bswap(uint32_t x) { return _byteswap_ulong(x); } +inline uint64_t Bswap(uint64_t x) { return _byteswap_uint64(x); } + +// The functions below were mostly copied from +// compiler-rt/lib/builtins/int_lib.h which defines the __builtin functions used +// outside of Windows. +inline uint32_t Clzll(uint64_t X) { + unsigned long LeadZeroIdx = 0; + +#if !defined(_M_ARM) && !defined(_M_X64) + // Scan the high 32 bits. + if (_BitScanReverse(&LeadZeroIdx, static_cast<unsigned long>(X >> 32))) + return static_cast<int>(63 - (LeadZeroIdx + 32)); // Create a bit offset from the MSB. + // Scan the low 32 bits. + if (_BitScanReverse(&LeadZeroIdx, static_cast<unsigned long>(X))) + return static_cast<int>(63 - LeadZeroIdx); + +#else + if (_BitScanReverse64(&LeadZeroIdx, X)) return 63 - LeadZeroIdx; +#endif + return 64; +} + +inline uint32_t Clz(uint32_t X) { + unsigned long LeadZeroIdx = 0; + if (_BitScanReverse(&LeadZeroIdx, X)) return 31 - LeadZeroIdx; + return 32; +} + +inline int Popcountll(unsigned long long X) { +#if !defined(_M_ARM) && !defined(_M_X64) + return __popcnt(X) + __popcnt(X >> 32); +#else + return __popcnt64(X); +#endif +} + +} // namespace fuzzer + +#endif // LIBFUZER_MSVC +#endif // LLVM_FUZZER_BUILTINS_MSVC_H diff --git a/tools/fuzzing/libfuzzer/FuzzerCommand.h b/tools/fuzzing/libfuzzer/FuzzerCommand.h new file mode 100644 index 0000000000..87308864af --- /dev/null +++ b/tools/fuzzing/libfuzzer/FuzzerCommand.h @@ -0,0 +1,178 @@ +//===- FuzzerCommand.h - Interface representing a process -------*- C++ -* ===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// FuzzerCommand represents a command to run in a subprocess. It allows callers +// to manage command line arguments and output and error streams. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_FUZZER_COMMAND_H +#define LLVM_FUZZER_COMMAND_H + +#include "FuzzerDefs.h" +#include "FuzzerIO.h" + +#include <algorithm> +#include <sstream> +#include <string> +#include <vector> + +namespace fuzzer { + +class Command final { +public: + // This command line flag is used to indicate that the remaining command line + // is immutable, meaning this flag effectively marks the end of the mutable + // argument list. + static inline const char *ignoreRemainingArgs() { + return "-ignore_remaining_args=1"; + } + + Command() : CombinedOutAndErr(false) {} + + explicit Command(const Vector<std::string> &ArgsToAdd) + : Args(ArgsToAdd), CombinedOutAndErr(false) {} + + explicit Command(const Command &Other) + : Args(Other.Args), CombinedOutAndErr(Other.CombinedOutAndErr), + OutputFile(Other.OutputFile) {} + + Command &operator=(const Command &Other) { + Args = Other.Args; + CombinedOutAndErr = Other.CombinedOutAndErr; + OutputFile = Other.OutputFile; + return *this; + } + + ~Command() {} + + // Returns true if the given Arg is present in Args. Only checks up to + // "-ignore_remaining_args=1". + bool hasArgument(const std::string &Arg) const { + auto i = endMutableArgs(); + return std::find(Args.begin(), i, Arg) != i; + } + + // Gets all of the current command line arguments, **including** those after + // "-ignore-remaining-args=1". + const Vector<std::string> &getArguments() const { return Args; } + + // Adds the given argument before "-ignore_remaining_args=1", or at the end + // if that flag isn't present. + void addArgument(const std::string &Arg) { + Args.insert(endMutableArgs(), Arg); + } + + // Adds all given arguments before "-ignore_remaining_args=1", or at the end + // if that flag isn't present. + void addArguments(const Vector<std::string> &ArgsToAdd) { + Args.insert(endMutableArgs(), ArgsToAdd.begin(), ArgsToAdd.end()); + } + + // Removes the given argument from the command argument list. Ignores any + // occurrences after "-ignore_remaining_args=1", if present. + void removeArgument(const std::string &Arg) { + auto i = endMutableArgs(); + Args.erase(std::remove(Args.begin(), i, Arg), i); + } + + // Like hasArgument, but checks for "-[Flag]=...". + bool hasFlag(const std::string &Flag) const { + std::string Arg("-" + Flag + "="); + auto IsMatch = [&](const std::string &Other) { + return Arg.compare(0, std::string::npos, Other, 0, Arg.length()) == 0; + }; + return std::any_of(Args.begin(), endMutableArgs(), IsMatch); + } + + // Returns the value of the first instance of a given flag, or an empty string + // if the flag isn't present. Ignores any occurrences after + // "-ignore_remaining_args=1", if present. + std::string getFlagValue(const std::string &Flag) const { + std::string Arg("-" + Flag + "="); + auto IsMatch = [&](const std::string &Other) { + return Arg.compare(0, std::string::npos, Other, 0, Arg.length()) == 0; + }; + auto i = endMutableArgs(); + auto j = std::find_if(Args.begin(), i, IsMatch); + std::string result; + if (j != i) { + result = j->substr(Arg.length()); + } + return result; + } + + // Like AddArgument, but adds "-[Flag]=[Value]". + void addFlag(const std::string &Flag, const std::string &Value) { + addArgument("-" + Flag + "=" + Value); + } + + // Like RemoveArgument, but removes "-[Flag]=...". + void removeFlag(const std::string &Flag) { + std::string Arg("-" + Flag + "="); + auto IsMatch = [&](const std::string &Other) { + return Arg.compare(0, std::string::npos, Other, 0, Arg.length()) == 0; + }; + auto i = endMutableArgs(); + Args.erase(std::remove_if(Args.begin(), i, IsMatch), i); + } + + // Returns whether the command's stdout is being written to an output file. + bool hasOutputFile() const { return !OutputFile.empty(); } + + // Returns the currently set output file. + const std::string &getOutputFile() const { return OutputFile; } + + // Configures the command to redirect its output to the name file. + void setOutputFile(const std::string &FileName) { OutputFile = FileName; } + + // Returns whether the command's stderr is redirected to stdout. + bool isOutAndErrCombined() const { return CombinedOutAndErr; } + + // Sets whether to redirect the command's stderr to its stdout. + void combineOutAndErr(bool combine = true) { CombinedOutAndErr = combine; } + + // Returns a string representation of the command. On many systems this will + // be the equivalent command line. + std::string toString() const { + std::stringstream SS; + for (auto arg : getArguments()) + SS << arg << " "; + if (hasOutputFile()) + SS << ">" << getOutputFile() << " "; + if (isOutAndErrCombined()) + SS << "2>&1 "; + std::string result = SS.str(); + if (!result.empty()) + result = result.substr(0, result.length() - 1); + return result; + } + +private: + Command(Command &&Other) = delete; + Command &operator=(Command &&Other) = delete; + + Vector<std::string>::iterator endMutableArgs() { + return std::find(Args.begin(), Args.end(), ignoreRemainingArgs()); + } + + Vector<std::string>::const_iterator endMutableArgs() const { + return std::find(Args.begin(), Args.end(), ignoreRemainingArgs()); + } + + // The command arguments. Args[0] is the command name. + Vector<std::string> Args; + + // True indicates stderr is redirected to stdout. + bool CombinedOutAndErr; + + // If not empty, stdout is redirected to the named file. + std::string OutputFile; +}; + +} // namespace fuzzer + +#endif // LLVM_FUZZER_COMMAND_H diff --git a/tools/fuzzing/libfuzzer/FuzzerCorpus.h b/tools/fuzzing/libfuzzer/FuzzerCorpus.h new file mode 100644 index 0000000000..54d1e09ec6 --- /dev/null +++ b/tools/fuzzing/libfuzzer/FuzzerCorpus.h @@ -0,0 +1,533 @@ +//===- FuzzerCorpus.h - Internal header for the Fuzzer ----------*- C++ -* ===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// fuzzer::InputCorpus +//===----------------------------------------------------------------------===// + +#ifndef LLVM_FUZZER_CORPUS +#define LLVM_FUZZER_CORPUS + +#include "FuzzerDataFlowTrace.h" +#include "FuzzerDefs.h" +#include "FuzzerIO.h" +#include "FuzzerRandom.h" +#include "FuzzerSHA1.h" +#include "FuzzerTracePC.h" +#include <algorithm> +#include <numeric> +#include <random> +#include <unordered_set> + +namespace fuzzer { + +struct InputInfo { + Unit U; // The actual input data. + uint8_t Sha1[kSHA1NumBytes]; // Checksum. + // Number of features that this input has and no smaller input has. + size_t NumFeatures = 0; + size_t Tmp = 0; // Used by ValidateFeatureSet. + // Stats. + size_t NumExecutedMutations = 0; + size_t NumSuccessfullMutations = 0; + bool MayDeleteFile = false; + bool Reduced = false; + bool HasFocusFunction = false; + Vector<uint32_t> UniqFeatureSet; + Vector<uint8_t> DataFlowTraceForFocusFunction; + // Power schedule. + bool NeedsEnergyUpdate = false; + double Energy = 0.0; + size_t SumIncidence = 0; + Vector<std::pair<uint32_t, uint16_t>> FeatureFreqs; + + // Delete feature Idx and its frequency from FeatureFreqs. + bool DeleteFeatureFreq(uint32_t Idx) { + if (FeatureFreqs.empty()) + return false; + + // Binary search over local feature frequencies sorted by index. + auto Lower = std::lower_bound(FeatureFreqs.begin(), FeatureFreqs.end(), + std::pair<uint32_t, uint16_t>(Idx, 0)); + + if (Lower != FeatureFreqs.end() && Lower->first == Idx) { + FeatureFreqs.erase(Lower); + return true; + } + return false; + } + + // Assign more energy to a high-entropy seed, i.e., that reveals more + // information about the globally rare features in the neighborhood + // of the seed. Since we do not know the entropy of a seed that has + // never been executed we assign fresh seeds maximum entropy and + // let II->Energy approach the true entropy from above. + void UpdateEnergy(size_t GlobalNumberOfFeatures) { + Energy = 0.0; + SumIncidence = 0; + + // Apply add-one smoothing to locally discovered features. + for (auto F : FeatureFreqs) { + size_t LocalIncidence = F.second + 1; + Energy -= LocalIncidence * logl(LocalIncidence); + SumIncidence += LocalIncidence; + } + + // Apply add-one smoothing to locally undiscovered features. + // PreciseEnergy -= 0; // since logl(1.0) == 0) + SumIncidence += (GlobalNumberOfFeatures - FeatureFreqs.size()); + + // Add a single locally abundant feature apply add-one smoothing. + size_t AbdIncidence = NumExecutedMutations + 1; + Energy -= AbdIncidence * logl(AbdIncidence); + SumIncidence += AbdIncidence; + + // Normalize. + if (SumIncidence != 0) + Energy = (Energy / SumIncidence) + logl(SumIncidence); + } + + // Increment the frequency of the feature Idx. + void UpdateFeatureFrequency(uint32_t Idx) { + NeedsEnergyUpdate = true; + + // The local feature frequencies is an ordered vector of pairs. + // If there are no local feature frequencies, push_back preserves order. + // Set the feature frequency for feature Idx32 to 1. + if (FeatureFreqs.empty()) { + FeatureFreqs.push_back(std::pair<uint32_t, uint16_t>(Idx, 1)); + return; + } + + // Binary search over local feature frequencies sorted by index. + auto Lower = std::lower_bound(FeatureFreqs.begin(), FeatureFreqs.end(), + std::pair<uint32_t, uint16_t>(Idx, 0)); + + // If feature Idx32 already exists, increment its frequency. + // Otherwise, insert a new pair right after the next lower index. + if (Lower != FeatureFreqs.end() && Lower->first == Idx) { + Lower->second++; + } else { + FeatureFreqs.insert(Lower, std::pair<uint32_t, uint16_t>(Idx, 1)); + } + } +}; + +struct EntropicOptions { + bool Enabled; + size_t NumberOfRarestFeatures; + size_t FeatureFrequencyThreshold; +}; + +class InputCorpus { + static const uint32_t kFeatureSetSize = 1 << 21; + static const uint8_t kMaxMutationFactor = 20; + static const size_t kSparseEnergyUpdates = 100; + + size_t NumExecutedMutations = 0; + + EntropicOptions Entropic; + +public: + InputCorpus(const std::string &OutputCorpus, EntropicOptions Entropic) + : Entropic(Entropic), OutputCorpus(OutputCorpus) { + memset(InputSizesPerFeature, 0, sizeof(InputSizesPerFeature)); + memset(SmallestElementPerFeature, 0, sizeof(SmallestElementPerFeature)); + } + ~InputCorpus() { + for (auto II : Inputs) + delete II; + } + size_t size() const { return Inputs.size(); } + size_t SizeInBytes() const { + size_t Res = 0; + for (auto II : Inputs) + Res += II->U.size(); + return Res; + } + size_t NumActiveUnits() const { + size_t Res = 0; + for (auto II : Inputs) + Res += !II->U.empty(); + return Res; + } + size_t MaxInputSize() const { + size_t Res = 0; + for (auto II : Inputs) + Res = std::max(Res, II->U.size()); + return Res; + } + void IncrementNumExecutedMutations() { NumExecutedMutations++; } + + size_t NumInputsThatTouchFocusFunction() { + return std::count_if(Inputs.begin(), Inputs.end(), [](const InputInfo *II) { + return II->HasFocusFunction; + }); + } + + size_t NumInputsWithDataFlowTrace() { + return std::count_if(Inputs.begin(), Inputs.end(), [](const InputInfo *II) { + return !II->DataFlowTraceForFocusFunction.empty(); + }); + } + + bool empty() const { return Inputs.empty(); } + const Unit &operator[] (size_t Idx) const { return Inputs[Idx]->U; } + InputInfo *AddToCorpus(const Unit &U, size_t NumFeatures, bool MayDeleteFile, + bool HasFocusFunction, + const Vector<uint32_t> &FeatureSet, + const DataFlowTrace &DFT, const InputInfo *BaseII) { + assert(!U.empty()); + if (FeatureDebug) + Printf("ADD_TO_CORPUS %zd NF %zd\n", Inputs.size(), NumFeatures); + Inputs.push_back(new InputInfo()); + InputInfo &II = *Inputs.back(); + II.U = U; + II.NumFeatures = NumFeatures; + II.MayDeleteFile = MayDeleteFile; + II.UniqFeatureSet = FeatureSet; + II.HasFocusFunction = HasFocusFunction; + // Assign maximal energy to the new seed. + II.Energy = RareFeatures.empty() ? 1.0 : log(RareFeatures.size()); + II.SumIncidence = RareFeatures.size(); + II.NeedsEnergyUpdate = false; + std::sort(II.UniqFeatureSet.begin(), II.UniqFeatureSet.end()); + ComputeSHA1(U.data(), U.size(), II.Sha1); + auto Sha1Str = Sha1ToString(II.Sha1); + Hashes.insert(Sha1Str); + if (HasFocusFunction) + if (auto V = DFT.Get(Sha1Str)) + II.DataFlowTraceForFocusFunction = *V; + // This is a gross heuristic. + // Ideally, when we add an element to a corpus we need to know its DFT. + // But if we don't, we'll use the DFT of its base input. + if (II.DataFlowTraceForFocusFunction.empty() && BaseII) + II.DataFlowTraceForFocusFunction = BaseII->DataFlowTraceForFocusFunction; + DistributionNeedsUpdate = true; + PrintCorpus(); + // ValidateFeatureSet(); + return &II; + } + + // Debug-only + void PrintUnit(const Unit &U) { + if (!FeatureDebug) return; + for (uint8_t C : U) { + if (C != 'F' && C != 'U' && C != 'Z') + C = '.'; + Printf("%c", C); + } + } + + // Debug-only + void PrintFeatureSet(const Vector<uint32_t> &FeatureSet) { + if (!FeatureDebug) return; + Printf("{"); + for (uint32_t Feature: FeatureSet) + Printf("%u,", Feature); + Printf("}"); + } + + // Debug-only + void PrintCorpus() { + if (!FeatureDebug) return; + Printf("======= CORPUS:\n"); + int i = 0; + for (auto II : Inputs) { + if (std::find(II->U.begin(), II->U.end(), 'F') != II->U.end()) { + Printf("[%2d] ", i); + Printf("%s sz=%zd ", Sha1ToString(II->Sha1).c_str(), II->U.size()); + PrintUnit(II->U); + Printf(" "); + PrintFeatureSet(II->UniqFeatureSet); + Printf("\n"); + } + i++; + } + } + + void Replace(InputInfo *II, const Unit &U) { + assert(II->U.size() > U.size()); + Hashes.erase(Sha1ToString(II->Sha1)); + DeleteFile(*II); + ComputeSHA1(U.data(), U.size(), II->Sha1); + Hashes.insert(Sha1ToString(II->Sha1)); + II->U = U; + II->Reduced = true; + DistributionNeedsUpdate = true; + } + + bool HasUnit(const Unit &U) { return Hashes.count(Hash(U)); } + bool HasUnit(const std::string &H) { return Hashes.count(H); } + InputInfo &ChooseUnitToMutate(Random &Rand) { + InputInfo &II = *Inputs[ChooseUnitIdxToMutate(Rand)]; + assert(!II.U.empty()); + return II; + } + + // Returns an index of random unit from the corpus to mutate. + size_t ChooseUnitIdxToMutate(Random &Rand) { + UpdateCorpusDistribution(Rand); + size_t Idx = static_cast<size_t>(CorpusDistribution(Rand)); + assert(Idx < Inputs.size()); + return Idx; + } + + void PrintStats() { + for (size_t i = 0; i < Inputs.size(); i++) { + const auto &II = *Inputs[i]; + Printf(" [% 3zd %s] sz: % 5zd runs: % 5zd succ: % 5zd focus: %d\n", i, + Sha1ToString(II.Sha1).c_str(), II.U.size(), + II.NumExecutedMutations, II.NumSuccessfullMutations, II.HasFocusFunction); + } + } + + void PrintFeatureSet() { + for (size_t i = 0; i < kFeatureSetSize; i++) { + if(size_t Sz = GetFeature(i)) + Printf("[%zd: id %zd sz%zd] ", i, SmallestElementPerFeature[i], Sz); + } + Printf("\n\t"); + for (size_t i = 0; i < Inputs.size(); i++) + if (size_t N = Inputs[i]->NumFeatures) + Printf(" %zd=>%zd ", i, N); + Printf("\n"); + } + + void DeleteFile(const InputInfo &II) { + if (!OutputCorpus.empty() && II.MayDeleteFile) + RemoveFile(DirPlusFile(OutputCorpus, Sha1ToString(II.Sha1))); + } + + void DeleteInput(size_t Idx) { + InputInfo &II = *Inputs[Idx]; + DeleteFile(II); + Unit().swap(II.U); + II.Energy = 0.0; + II.NeedsEnergyUpdate = false; + DistributionNeedsUpdate = true; + if (FeatureDebug) + Printf("EVICTED %zd\n", Idx); + } + + void AddRareFeature(uint32_t Idx) { + // Maintain *at least* TopXRarestFeatures many rare features + // and all features with a frequency below ConsideredRare. + // Remove all other features. + while (RareFeatures.size() > Entropic.NumberOfRarestFeatures && + FreqOfMostAbundantRareFeature > Entropic.FeatureFrequencyThreshold) { + + // Find most and second most abbundant feature. + uint32_t MostAbundantRareFeatureIndices[2] = {RareFeatures[0], + RareFeatures[0]}; + size_t Delete = 0; + for (size_t i = 0; i < RareFeatures.size(); i++) { + uint32_t Idx2 = RareFeatures[i]; + if (GlobalFeatureFreqs[Idx2] >= + GlobalFeatureFreqs[MostAbundantRareFeatureIndices[0]]) { + MostAbundantRareFeatureIndices[1] = MostAbundantRareFeatureIndices[0]; + MostAbundantRareFeatureIndices[0] = Idx2; + Delete = i; + } + } + + // Remove most abundant rare feature. + RareFeatures[Delete] = RareFeatures.back(); + RareFeatures.pop_back(); + + for (auto II : Inputs) { + if (II->DeleteFeatureFreq(MostAbundantRareFeatureIndices[0])) + II->NeedsEnergyUpdate = true; + } + + // Set 2nd most abundant as the new most abundant feature count. + FreqOfMostAbundantRareFeature = + GlobalFeatureFreqs[MostAbundantRareFeatureIndices[1]]; + } + + // Add rare feature, handle collisions, and update energy. + RareFeatures.push_back(Idx); + GlobalFeatureFreqs[Idx] = 0; + for (auto II : Inputs) { + II->DeleteFeatureFreq(Idx); + + // Apply add-one smoothing to this locally undiscovered feature. + // Zero energy seeds will never be fuzzed and remain zero energy. + if (II->Energy > 0.0) { + II->SumIncidence += 1; + II->Energy += logl(II->SumIncidence) / II->SumIncidence; + } + } + + DistributionNeedsUpdate = true; + } + + bool AddFeature(size_t Idx, uint32_t NewSize, bool Shrink) { + assert(NewSize); + Idx = Idx % kFeatureSetSize; + uint32_t OldSize = GetFeature(Idx); + if (OldSize == 0 || (Shrink && OldSize > NewSize)) { + if (OldSize > 0) { + size_t OldIdx = SmallestElementPerFeature[Idx]; + InputInfo &II = *Inputs[OldIdx]; + assert(II.NumFeatures > 0); + II.NumFeatures--; + if (II.NumFeatures == 0) + DeleteInput(OldIdx); + } else { + NumAddedFeatures++; + if (Entropic.Enabled) + AddRareFeature((uint32_t)Idx); + } + NumUpdatedFeatures++; + if (FeatureDebug) + Printf("ADD FEATURE %zd sz %d\n", Idx, NewSize); + SmallestElementPerFeature[Idx] = Inputs.size(); + InputSizesPerFeature[Idx] = NewSize; + return true; + } + return false; + } + + // Increment frequency of feature Idx globally and locally. + void UpdateFeatureFrequency(InputInfo *II, size_t Idx) { + uint32_t Idx32 = Idx % kFeatureSetSize; + + // Saturated increment. + if (GlobalFeatureFreqs[Idx32] == 0xFFFF) + return; + uint16_t Freq = GlobalFeatureFreqs[Idx32]++; + + // Skip if abundant. + if (Freq > FreqOfMostAbundantRareFeature || + std::find(RareFeatures.begin(), RareFeatures.end(), Idx32) == + RareFeatures.end()) + return; + + // Update global frequencies. + if (Freq == FreqOfMostAbundantRareFeature) + FreqOfMostAbundantRareFeature++; + + // Update local frequencies. + if (II) + II->UpdateFeatureFrequency(Idx32); + } + + size_t NumFeatures() const { return NumAddedFeatures; } + size_t NumFeatureUpdates() const { return NumUpdatedFeatures; } + +private: + + static const bool FeatureDebug = false; + + size_t GetFeature(size_t Idx) const { return InputSizesPerFeature[Idx]; } + + void ValidateFeatureSet() { + if (FeatureDebug) + PrintFeatureSet(); + for (size_t Idx = 0; Idx < kFeatureSetSize; Idx++) + if (GetFeature(Idx)) + Inputs[SmallestElementPerFeature[Idx]]->Tmp++; + for (auto II: Inputs) { + if (II->Tmp != II->NumFeatures) + Printf("ZZZ %zd %zd\n", II->Tmp, II->NumFeatures); + assert(II->Tmp == II->NumFeatures); + II->Tmp = 0; + } + } + + // Updates the probability distribution for the units in the corpus. + // Must be called whenever the corpus or unit weights are changed. + // + // Hypothesis: inputs that maximize information about globally rare features + // are interesting. + void UpdateCorpusDistribution(Random &Rand) { + // Skip update if no seeds or rare features were added/deleted. + // Sparse updates for local change of feature frequencies, + // i.e., randomly do not skip. + if (!DistributionNeedsUpdate && + (!Entropic.Enabled || Rand(kSparseEnergyUpdates))) + return; + + DistributionNeedsUpdate = false; + + size_t N = Inputs.size(); + assert(N); + Intervals.resize(N + 1); + Weights.resize(N); + std::iota(Intervals.begin(), Intervals.end(), 0); + + bool VanillaSchedule = true; + if (Entropic.Enabled) { + for (auto II : Inputs) { + if (II->NeedsEnergyUpdate && II->Energy != 0.0) { + II->NeedsEnergyUpdate = false; + II->UpdateEnergy(RareFeatures.size()); + } + } + + for (size_t i = 0; i < N; i++) { + + if (Inputs[i]->NumFeatures == 0) { + // If the seed doesn't represent any features, assign zero energy. + Weights[i] = 0.; + } else if (Inputs[i]->NumExecutedMutations / kMaxMutationFactor > + NumExecutedMutations / Inputs.size()) { + // If the seed was fuzzed a lot more than average, assign zero energy. + Weights[i] = 0.; + } else { + // Otherwise, simply assign the computed energy. + Weights[i] = Inputs[i]->Energy; + } + + // If energy for all seeds is zero, fall back to vanilla schedule. + if (Weights[i] > 0.0) + VanillaSchedule = false; + } + } + + if (VanillaSchedule) { + for (size_t i = 0; i < N; i++) + Weights[i] = Inputs[i]->NumFeatures + ? (i + 1) * (Inputs[i]->HasFocusFunction ? 1000 : 1) + : 0.; + } + + if (FeatureDebug) { + for (size_t i = 0; i < N; i++) + Printf("%zd ", Inputs[i]->NumFeatures); + Printf("SCORE\n"); + for (size_t i = 0; i < N; i++) + Printf("%f ", Weights[i]); + Printf("Weights\n"); + } + CorpusDistribution = std::piecewise_constant_distribution<double>( + Intervals.begin(), Intervals.end(), Weights.begin()); + } + std::piecewise_constant_distribution<double> CorpusDistribution; + + Vector<double> Intervals; + Vector<double> Weights; + + std::unordered_set<std::string> Hashes; + Vector<InputInfo*> Inputs; + + size_t NumAddedFeatures = 0; + size_t NumUpdatedFeatures = 0; + uint32_t InputSizesPerFeature[kFeatureSetSize]; + uint32_t SmallestElementPerFeature[kFeatureSetSize]; + + bool DistributionNeedsUpdate = true; + uint16_t FreqOfMostAbundantRareFeature = 0; + uint16_t GlobalFeatureFreqs[kFeatureSetSize] = {}; + Vector<uint32_t> RareFeatures; + + std::string OutputCorpus; +}; + +} // namespace fuzzer + +#endif // LLVM_FUZZER_CORPUS diff --git a/tools/fuzzing/libfuzzer/FuzzerCrossOver.cpp b/tools/fuzzing/libfuzzer/FuzzerCrossOver.cpp new file mode 100644 index 0000000000..83d9f8d47c --- /dev/null +++ b/tools/fuzzing/libfuzzer/FuzzerCrossOver.cpp @@ -0,0 +1,51 @@ +//===- FuzzerCrossOver.cpp - Cross over two test inputs -------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Cross over test inputs. +//===----------------------------------------------------------------------===// + +#include "FuzzerDefs.h" +#include "FuzzerMutate.h" +#include "FuzzerRandom.h" +#include <cstring> + +namespace fuzzer { + +// Cross Data1 and Data2, store the result (up to MaxOutSize bytes) in Out. +size_t MutationDispatcher::CrossOver(const uint8_t *Data1, size_t Size1, + const uint8_t *Data2, size_t Size2, + uint8_t *Out, size_t MaxOutSize) { + assert(Size1 || Size2); + MaxOutSize = Rand(MaxOutSize) + 1; + size_t OutPos = 0; + size_t Pos1 = 0; + size_t Pos2 = 0; + size_t *InPos = &Pos1; + size_t InSize = Size1; + const uint8_t *Data = Data1; + bool CurrentlyUsingFirstData = true; + while (OutPos < MaxOutSize && (Pos1 < Size1 || Pos2 < Size2)) { + // Merge a part of Data into Out. + size_t OutSizeLeft = MaxOutSize - OutPos; + if (*InPos < InSize) { + size_t InSizeLeft = InSize - *InPos; + size_t MaxExtraSize = std::min(OutSizeLeft, InSizeLeft); + size_t ExtraSize = Rand(MaxExtraSize) + 1; + memcpy(Out + OutPos, Data + *InPos, ExtraSize); + OutPos += ExtraSize; + (*InPos) += ExtraSize; + } + // Use the other input data on the next iteration. + InPos = CurrentlyUsingFirstData ? &Pos2 : &Pos1; + InSize = CurrentlyUsingFirstData ? Size2 : Size1; + Data = CurrentlyUsingFirstData ? Data2 : Data1; + CurrentlyUsingFirstData = !CurrentlyUsingFirstData; + } + return OutPos; +} + +} // namespace fuzzer diff --git a/tools/fuzzing/libfuzzer/FuzzerDataFlowTrace.cpp b/tools/fuzzing/libfuzzer/FuzzerDataFlowTrace.cpp new file mode 100644 index 0000000000..06ea287a3c --- /dev/null +++ b/tools/fuzzing/libfuzzer/FuzzerDataFlowTrace.cpp @@ -0,0 +1,295 @@ +//===- FuzzerDataFlowTrace.cpp - DataFlowTrace ---*- C++ -* ===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// fuzzer::DataFlowTrace +//===----------------------------------------------------------------------===// + +#include "FuzzerDataFlowTrace.h" + +#include "FuzzerCommand.h" +#include "FuzzerIO.h" +#include "FuzzerRandom.h" +#include "FuzzerSHA1.h" +#include "FuzzerUtil.h" + +#include <cstdlib> +#include <fstream> +#include <numeric> +#include <queue> +#include <sstream> +#include <string> +#include <unordered_map> +#include <unordered_set> +#include <vector> + +namespace fuzzer { +static const char *kFunctionsTxt = "functions.txt"; + +bool BlockCoverage::AppendCoverage(const std::string &S) { + std::stringstream SS(S); + return AppendCoverage(SS); +} + +// Coverage lines have this form: +// CN X Y Z T +// where N is the number of the function, T is the total number of instrumented +// BBs, and X,Y,Z, if present, are the indecies of covered BB. +// BB #0, which is the entry block, is not explicitly listed. +bool BlockCoverage::AppendCoverage(std::istream &IN) { + std::string L; + while (std::getline(IN, L, '\n')) { + if (L.empty()) + continue; + std::stringstream SS(L.c_str() + 1); + size_t FunctionId = 0; + SS >> FunctionId; + if (L[0] == 'F') { + FunctionsWithDFT.insert(FunctionId); + continue; + } + if (L[0] != 'C') continue; + Vector<uint32_t> CoveredBlocks; + while (true) { + uint32_t BB = 0; + SS >> BB; + if (!SS) break; + CoveredBlocks.push_back(BB); + } + if (CoveredBlocks.empty()) return false; + uint32_t NumBlocks = CoveredBlocks.back(); + CoveredBlocks.pop_back(); + for (auto BB : CoveredBlocks) + if (BB >= NumBlocks) return false; + auto It = Functions.find(FunctionId); + auto &Counters = + It == Functions.end() + ? Functions.insert({FunctionId, Vector<uint32_t>(NumBlocks)}) + .first->second + : It->second; + + if (Counters.size() != NumBlocks) return false; // wrong number of blocks. + + Counters[0]++; + for (auto BB : CoveredBlocks) + Counters[BB]++; + } + return true; +} + +// Assign weights to each function. +// General principles: +// * any uncovered function gets weight 0. +// * a function with lots of uncovered blocks gets bigger weight. +// * a function with a less frequently executed code gets bigger weight. +Vector<double> BlockCoverage::FunctionWeights(size_t NumFunctions) const { + Vector<double> Res(NumFunctions); + for (auto It : Functions) { + auto FunctionID = It.first; + auto Counters = It.second; + assert(FunctionID < NumFunctions); + auto &Weight = Res[FunctionID]; + // Give higher weight if the function has a DFT. + Weight = FunctionsWithDFT.count(FunctionID) ? 1000. : 1; + // Give higher weight to functions with less frequently seen basic blocks. + Weight /= SmallestNonZeroCounter(Counters); + // Give higher weight to functions with the most uncovered basic blocks. + Weight *= NumberOfUncoveredBlocks(Counters) + 1; + } + return Res; +} + +int DataFlowTrace::ReadCoverage(const std::string &DirPath) { + Vector<SizedFile> Files; + int Res = GetSizedFilesFromDir(DirPath, &Files); + if (Res != 0) + return Res; + for (auto &SF : Files) { + auto Name = Basename(SF.File); + if (Name == kFunctionsTxt) continue; + if (!CorporaHashes.count(Name)) continue; + std::ifstream IF(SF.File); + Coverage.AppendCoverage(IF); + } + return 0; +} + +static void DFTStringAppendToVector(Vector<uint8_t> *DFT, + const std::string &DFTString) { + assert(DFT->size() == DFTString.size()); + for (size_t I = 0, Len = DFT->size(); I < Len; I++) + (*DFT)[I] = DFTString[I] == '1'; +} + +// converts a string of '0' and '1' into a Vector<uint8_t> +static Vector<uint8_t> DFTStringToVector(const std::string &DFTString) { + Vector<uint8_t> DFT(DFTString.size()); + DFTStringAppendToVector(&DFT, DFTString); + return DFT; +} + +static bool ParseError(const char *Err, const std::string &Line) { + Printf("DataFlowTrace: parse error: %s: Line: %s\n", Err, Line.c_str()); + return false; +} + +// TODO(metzman): replace std::string with std::string_view for +// better performance. Need to figure our how to use string_view on Windows. +static bool ParseDFTLine(const std::string &Line, size_t *FunctionNum, + std::string *DFTString) { + if (!Line.empty() && Line[0] != 'F') + return false; // Ignore coverage. + size_t SpacePos = Line.find(' '); + if (SpacePos == std::string::npos) + return ParseError("no space in the trace line", Line); + if (Line.empty() || Line[0] != 'F') + return ParseError("the trace line doesn't start with 'F'", Line); + *FunctionNum = std::atol(Line.c_str() + 1); + const char *Beg = Line.c_str() + SpacePos + 1; + const char *End = Line.c_str() + Line.size(); + assert(Beg < End); + size_t Len = End - Beg; + for (size_t I = 0; I < Len; I++) { + if (Beg[I] != '0' && Beg[I] != '1') + return ParseError("the trace should contain only 0 or 1", Line); + } + *DFTString = Beg; + return true; +} + +int DataFlowTrace::Init(const std::string &DirPath, std::string *FocusFunction, + Vector<SizedFile> &CorporaFiles, Random &Rand) { + if (DirPath.empty()) return 0; + Printf("INFO: DataFlowTrace: reading from '%s'\n", DirPath.c_str()); + Vector<SizedFile> Files; + int Res = GetSizedFilesFromDir(DirPath, &Files); + if (Res != 0) + return Res; + std::string L; + size_t FocusFuncIdx = SIZE_MAX; + Vector<std::string> FunctionNames; + + // Collect the hashes of the corpus files. + for (auto &SF : CorporaFiles) + CorporaHashes.insert(Hash(FileToVector(SF.File))); + + // Read functions.txt + std::ifstream IF(DirPlusFile(DirPath, kFunctionsTxt)); + size_t NumFunctions = 0; + while (std::getline(IF, L, '\n')) { + FunctionNames.push_back(L); + NumFunctions++; + if (*FocusFunction == L) + FocusFuncIdx = NumFunctions - 1; + } + if (!NumFunctions) + return 0; + + if (*FocusFunction == "auto") { + // AUTOFOCUS works like this: + // * reads the coverage data from the DFT files. + // * assigns weights to functions based on coverage. + // * chooses a random function according to the weights. + Res = ReadCoverage(DirPath); + if (Res != 0) + return Res; + auto Weights = Coverage.FunctionWeights(NumFunctions); + Vector<double> Intervals(NumFunctions + 1); + std::iota(Intervals.begin(), Intervals.end(), 0); + auto Distribution = std::piecewise_constant_distribution<double>( + Intervals.begin(), Intervals.end(), Weights.begin()); + FocusFuncIdx = static_cast<size_t>(Distribution(Rand)); + *FocusFunction = FunctionNames[FocusFuncIdx]; + assert(FocusFuncIdx < NumFunctions); + Printf("INFO: AUTOFOCUS: %zd %s\n", FocusFuncIdx, + FunctionNames[FocusFuncIdx].c_str()); + for (size_t i = 0; i < NumFunctions; i++) { + if (!Weights[i]) continue; + Printf(" [%zd] W %g\tBB-tot %u\tBB-cov %u\tEntryFreq %u:\t%s\n", i, + Weights[i], Coverage.GetNumberOfBlocks(i), + Coverage.GetNumberOfCoveredBlocks(i), Coverage.GetCounter(i, 0), + FunctionNames[i].c_str()); + } + } + + if (!NumFunctions || FocusFuncIdx == SIZE_MAX || Files.size() <= 1) + return 0; + + // Read traces. + size_t NumTraceFiles = 0; + size_t NumTracesWithFocusFunction = 0; + for (auto &SF : Files) { + auto Name = Basename(SF.File); + if (Name == kFunctionsTxt) continue; + if (!CorporaHashes.count(Name)) continue; // not in the corpus. + NumTraceFiles++; + // Printf("=== %s\n", Name.c_str()); + std::ifstream IF(SF.File); + while (std::getline(IF, L, '\n')) { + size_t FunctionNum = 0; + std::string DFTString; + if (ParseDFTLine(L, &FunctionNum, &DFTString) && + FunctionNum == FocusFuncIdx) { + NumTracesWithFocusFunction++; + + if (FunctionNum >= NumFunctions) { + ParseError("N is greater than the number of functions", L); + return 0; + } + Traces[Name] = DFTStringToVector(DFTString); + // Print just a few small traces. + if (NumTracesWithFocusFunction <= 3 && DFTString.size() <= 16) + Printf("%s => |%s|\n", Name.c_str(), std::string(DFTString).c_str()); + break; // No need to parse the following lines. + } + } + } + Printf("INFO: DataFlowTrace: %zd trace files, %zd functions, " + "%zd traces with focus function\n", + NumTraceFiles, NumFunctions, NumTracesWithFocusFunction); + return 0; +} + +int CollectDataFlow(const std::string &DFTBinary, const std::string &DirPath, + const Vector<SizedFile> &CorporaFiles) { + Printf("INFO: collecting data flow: bin: %s dir: %s files: %zd\n", + DFTBinary.c_str(), DirPath.c_str(), CorporaFiles.size()); + if (CorporaFiles.empty()) { + Printf("ERROR: can't collect data flow without corpus provided."); + return 1; + } + + static char DFSanEnv[] = "DFSAN_OPTIONS=warn_unimplemented=0"; + putenv(DFSanEnv); + MkDir(DirPath); + for (auto &F : CorporaFiles) { + // For every input F we need to collect the data flow and the coverage. + // Data flow collection may fail if we request too many DFSan tags at once. + // So, we start from requesting all tags in range [0,Size) and if that fails + // we then request tags in [0,Size/2) and [Size/2, Size), and so on. + // Function number => DFT. + auto OutPath = DirPlusFile(DirPath, Hash(FileToVector(F.File))); + std::unordered_map<size_t, Vector<uint8_t>> DFTMap; + std::unordered_set<std::string> Cov; + Command Cmd; + Cmd.addArgument(DFTBinary); + Cmd.addArgument(F.File); + Cmd.addArgument(OutPath); + Printf("CMD: %s\n", Cmd.toString().c_str()); + ExecuteCommand(Cmd); + } + // Write functions.txt if it's currently empty or doesn't exist. + auto FunctionsTxtPath = DirPlusFile(DirPath, kFunctionsTxt); + if (FileToString(FunctionsTxtPath).empty()) { + Command Cmd; + Cmd.addArgument(DFTBinary); + Cmd.setOutputFile(FunctionsTxtPath); + ExecuteCommand(Cmd); + } + return 0; +} + +} // namespace fuzzer diff --git a/tools/fuzzing/libfuzzer/FuzzerDataFlowTrace.h b/tools/fuzzing/libfuzzer/FuzzerDataFlowTrace.h new file mode 100644 index 0000000000..767bad24f1 --- /dev/null +++ b/tools/fuzzing/libfuzzer/FuzzerDataFlowTrace.h @@ -0,0 +1,135 @@ +//===- FuzzerDataFlowTrace.h - Internal header for the Fuzzer ---*- C++ -* ===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// fuzzer::DataFlowTrace; reads and handles a data-flow trace. +// +// A data flow trace is generated by e.g. dataflow/DataFlow.cpp +// and is stored on disk in a separate directory. +// +// The trace dir contains a file 'functions.txt' which lists function names, +// oner per line, e.g. +// ==> functions.txt <== +// Func2 +// LLVMFuzzerTestOneInput +// Func1 +// +// All other files in the dir are the traces, see dataflow/DataFlow.cpp. +// The name of the file is sha1 of the input used to generate the trace. +// +// Current status: +// the data is parsed and the summary is printed, but the data is not yet +// used in any other way. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_FUZZER_DATA_FLOW_TRACE +#define LLVM_FUZZER_DATA_FLOW_TRACE + +#include "FuzzerDefs.h" +#include "FuzzerIO.h" + +#include <unordered_map> +#include <unordered_set> +#include <vector> +#include <string> + +namespace fuzzer { + +int CollectDataFlow(const std::string &DFTBinary, const std::string &DirPath, + const Vector<SizedFile> &CorporaFiles); + +class BlockCoverage { + public: + bool AppendCoverage(std::istream &IN); + bool AppendCoverage(const std::string &S); + + size_t NumCoveredFunctions() const { return Functions.size(); } + + uint32_t GetCounter(size_t FunctionId, size_t BasicBlockId) { + auto It = Functions.find(FunctionId); + if (It == Functions.end()) return 0; + const auto &Counters = It->second; + if (BasicBlockId < Counters.size()) + return Counters[BasicBlockId]; + return 0; + } + + uint32_t GetNumberOfBlocks(size_t FunctionId) { + auto It = Functions.find(FunctionId); + if (It == Functions.end()) return 0; + const auto &Counters = It->second; + return Counters.size(); + } + + uint32_t GetNumberOfCoveredBlocks(size_t FunctionId) { + auto It = Functions.find(FunctionId); + if (It == Functions.end()) return 0; + const auto &Counters = It->second; + uint32_t Result = 0; + for (auto Cnt: Counters) + if (Cnt) + Result++; + return Result; + } + + Vector<double> FunctionWeights(size_t NumFunctions) const; + void clear() { Functions.clear(); } + + private: + + typedef Vector<uint32_t> CoverageVector; + + uint32_t NumberOfCoveredBlocks(const CoverageVector &Counters) const { + uint32_t Res = 0; + for (auto Cnt : Counters) + if (Cnt) + Res++; + return Res; + } + + uint32_t NumberOfUncoveredBlocks(const CoverageVector &Counters) const { + return Counters.size() - NumberOfCoveredBlocks(Counters); + } + + uint32_t SmallestNonZeroCounter(const CoverageVector &Counters) const { + assert(!Counters.empty()); + uint32_t Res = Counters[0]; + for (auto Cnt : Counters) + if (Cnt) + Res = Min(Res, Cnt); + assert(Res); + return Res; + } + + // Function ID => vector of counters. + // Each counter represents how many input files trigger the given basic block. + std::unordered_map<size_t, CoverageVector> Functions; + // Functions that have DFT entry. + std::unordered_set<size_t> FunctionsWithDFT; +}; + +class DataFlowTrace { + public: + int ReadCoverage(const std::string &DirPath); + int Init(const std::string &DirPath, std::string *FocusFunction, + Vector<SizedFile> &CorporaFiles, Random &Rand); + void Clear() { Traces.clear(); } + const Vector<uint8_t> *Get(const std::string &InputSha1) const { + auto It = Traces.find(InputSha1); + if (It != Traces.end()) + return &It->second; + return nullptr; + } + + private: + // Input's sha1 => DFT for the FocusFunction. + std::unordered_map<std::string, Vector<uint8_t> > Traces; + BlockCoverage Coverage; + std::unordered_set<std::string> CorporaHashes; +}; +} // namespace fuzzer + +#endif // LLVM_FUZZER_DATA_FLOW_TRACE diff --git a/tools/fuzzing/libfuzzer/FuzzerDefs.h b/tools/fuzzing/libfuzzer/FuzzerDefs.h new file mode 100644 index 0000000000..3952ac51ef --- /dev/null +++ b/tools/fuzzing/libfuzzer/FuzzerDefs.h @@ -0,0 +1,75 @@ +//===- FuzzerDefs.h - Internal header for the Fuzzer ------------*- C++ -* ===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Basic definitions. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_FUZZER_DEFS_H +#define LLVM_FUZZER_DEFS_H + +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <cstring> +#include <memory> +#include <set> +#include <string> +#include <vector> + + +namespace fuzzer { + +template <class T> T Min(T a, T b) { return a < b ? a : b; } +template <class T> T Max(T a, T b) { return a > b ? a : b; } + +class Random; +class Dictionary; +class DictionaryEntry; +class MutationDispatcher; +struct FuzzingOptions; +class InputCorpus; +struct InputInfo; +struct ExternalFunctions; + +// Global interface to functions that may or may not be available. +extern ExternalFunctions *EF; + +// We are using a custom allocator to give a different symbol name to STL +// containers in order to avoid ODR violations. +template<typename T> + class fuzzer_allocator: public std::allocator<T> { + public: + fuzzer_allocator() = default; + + template<class U> + explicit fuzzer_allocator(const fuzzer_allocator<U>&) {} + + template<class Other> + struct rebind { typedef fuzzer_allocator<Other> other; }; + }; + +template<typename T> +using Vector = std::vector<T, fuzzer_allocator<T>>; + +template<typename T> +using Set = std::set<T, std::less<T>, fuzzer_allocator<T>>; + +typedef Vector<uint8_t> Unit; +typedef Vector<Unit> UnitVector; +typedef int (*UserCallback)(const uint8_t *Data, size_t Size); + +int FuzzerDriver(int *argc, char ***argv, UserCallback Callback); + +uint8_t *ExtraCountersBegin(); +uint8_t *ExtraCountersEnd(); +void ClearExtraCounters(); + +extern bool RunningUserCallback; + +} // namespace fuzzer + +#endif // LLVM_FUZZER_DEFS_H diff --git a/tools/fuzzing/libfuzzer/FuzzerDictionary.h b/tools/fuzzing/libfuzzer/FuzzerDictionary.h new file mode 100644 index 0000000000..301c5d9afe --- /dev/null +++ b/tools/fuzzing/libfuzzer/FuzzerDictionary.h @@ -0,0 +1,118 @@ +//===- FuzzerDictionary.h - Internal header for the Fuzzer ------*- C++ -* ===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// fuzzer::Dictionary +//===----------------------------------------------------------------------===// + +#ifndef LLVM_FUZZER_DICTIONARY_H +#define LLVM_FUZZER_DICTIONARY_H + +#include "FuzzerDefs.h" +#include "FuzzerIO.h" +#include "FuzzerUtil.h" +#include <algorithm> +#include <limits> + +namespace fuzzer { +// A simple POD sized array of bytes. +template <size_t kMaxSizeT> class FixedWord { +public: + static const size_t kMaxSize = kMaxSizeT; + FixedWord() {} + FixedWord(const uint8_t *B, uint8_t S) { Set(B, S); } + + void Set(const uint8_t *B, uint8_t S) { + assert(S <= kMaxSize); + memcpy(Data, B, S); + Size = S; + } + + bool operator==(const FixedWord<kMaxSize> &w) const { + return Size == w.Size && 0 == memcmp(Data, w.Data, Size); + } + + static size_t GetMaxSize() { return kMaxSize; } + const uint8_t *data() const { return Data; } + uint8_t size() const { return Size; } + +private: + uint8_t Size = 0; + uint8_t Data[kMaxSize]; +}; + +typedef FixedWord<64> Word; + +class DictionaryEntry { + public: + DictionaryEntry() {} + DictionaryEntry(Word W) : W(W) {} + DictionaryEntry(Word W, size_t PositionHint) : W(W), PositionHint(PositionHint) {} + const Word &GetW() const { return W; } + + bool HasPositionHint() const { return PositionHint != std::numeric_limits<size_t>::max(); } + size_t GetPositionHint() const { + assert(HasPositionHint()); + return PositionHint; + } + void IncUseCount() { UseCount++; } + void IncSuccessCount() { SuccessCount++; } + size_t GetUseCount() const { return UseCount; } + size_t GetSuccessCount() const {return SuccessCount; } + + void Print(const char *PrintAfter = "\n") { + PrintASCII(W.data(), W.size()); + if (HasPositionHint()) + Printf("@%zd", GetPositionHint()); + Printf("%s", PrintAfter); + } + +private: + Word W; + size_t PositionHint = std::numeric_limits<size_t>::max(); + size_t UseCount = 0; + size_t SuccessCount = 0; +}; + +class Dictionary { + public: + static const size_t kMaxDictSize = 1 << 14; + + bool ContainsWord(const Word &W) const { + return std::any_of(begin(), end(), [&](const DictionaryEntry &DE) { + return DE.GetW() == W; + }); + } + const DictionaryEntry *begin() const { return &DE[0]; } + const DictionaryEntry *end() const { return begin() + Size; } + DictionaryEntry & operator[] (size_t Idx) { + assert(Idx < Size); + return DE[Idx]; + } + void push_back(DictionaryEntry DE) { + if (Size < kMaxDictSize) + this->DE[Size++] = DE; + } + void clear() { Size = 0; } + bool empty() const { return Size == 0; } + size_t size() const { return Size; } + +private: + DictionaryEntry DE[kMaxDictSize]; + size_t Size = 0; +}; + +// Parses one dictionary entry. +// If successful, write the enty to Unit and returns true, +// otherwise returns false. +bool ParseOneDictionaryEntry(const std::string &Str, Unit *U); +// Parses the dictionary file, fills Units, returns true iff all lines +// were parsed successfully. +bool ParseDictionaryFile(const std::string &Text, Vector<Unit> *Units); + +} // namespace fuzzer + +#endif // LLVM_FUZZER_DICTIONARY_H diff --git a/tools/fuzzing/libfuzzer/FuzzerDriver.cpp b/tools/fuzzing/libfuzzer/FuzzerDriver.cpp new file mode 100644 index 0000000000..bedad16efa --- /dev/null +++ b/tools/fuzzing/libfuzzer/FuzzerDriver.cpp @@ -0,0 +1,888 @@ +//===- FuzzerDriver.cpp - FuzzerDriver function and flags -----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// FuzzerDriver and flag parsing. +//===----------------------------------------------------------------------===// + +#include "FuzzerCommand.h" +#include "FuzzerCorpus.h" +#include "FuzzerFork.h" +#include "FuzzerIO.h" +#include "FuzzerInterface.h" +#include "FuzzerInternal.h" +#include "FuzzerMerge.h" +#include "FuzzerMutate.h" +#include "FuzzerPlatform.h" +#include "FuzzerRandom.h" +#include "FuzzerTracePC.h" +#include <algorithm> +#include <atomic> +#include <chrono> +#include <cstdlib> +#include <cstring> +#include <mutex> +#include <string> +#include <thread> +#include <fstream> + +// This function should be present in the libFuzzer so that the client +// binary can test for its existence. +#if LIBFUZZER_MSVC +extern "C" void __libfuzzer_is_present() {} +#if defined(_M_IX86) || defined(__i386__) +#pragma comment(linker, "/include:___libfuzzer_is_present") +#else +#pragma comment(linker, "/include:__libfuzzer_is_present") +#endif +#else +extern "C" __attribute__((used)) void __libfuzzer_is_present() {} +#endif // LIBFUZZER_MSVC + +namespace fuzzer { + +// Program arguments. +struct FlagDescription { + const char *Name; + const char *Description; + int Default; + int *IntFlag; + const char **StrFlag; + unsigned int *UIntFlag; +}; + +struct { +#define FUZZER_DEPRECATED_FLAG(Name) +#define FUZZER_FLAG_INT(Name, Default, Description) int Name; +#define FUZZER_FLAG_UNSIGNED(Name, Default, Description) unsigned int Name; +#define FUZZER_FLAG_STRING(Name, Description) const char *Name; +#include "FuzzerFlags.def" +#undef FUZZER_DEPRECATED_FLAG +#undef FUZZER_FLAG_INT +#undef FUZZER_FLAG_UNSIGNED +#undef FUZZER_FLAG_STRING +} Flags; + +static const FlagDescription FlagDescriptions [] { +#define FUZZER_DEPRECATED_FLAG(Name) \ + {#Name, "Deprecated; don't use", 0, nullptr, nullptr, nullptr}, +#define FUZZER_FLAG_INT(Name, Default, Description) \ + {#Name, Description, Default, &Flags.Name, nullptr, nullptr}, +#define FUZZER_FLAG_UNSIGNED(Name, Default, Description) \ + {#Name, Description, static_cast<int>(Default), \ + nullptr, nullptr, &Flags.Name}, +#define FUZZER_FLAG_STRING(Name, Description) \ + {#Name, Description, 0, nullptr, &Flags.Name, nullptr}, +#include "FuzzerFlags.def" +#undef FUZZER_DEPRECATED_FLAG +#undef FUZZER_FLAG_INT +#undef FUZZER_FLAG_UNSIGNED +#undef FUZZER_FLAG_STRING +}; + +static const size_t kNumFlags = + sizeof(FlagDescriptions) / sizeof(FlagDescriptions[0]); + +static Vector<std::string> *Inputs; +static std::string *ProgName; + +static void PrintHelp() { + Printf("Usage:\n"); + auto Prog = ProgName->c_str(); + Printf("\nTo run fuzzing pass 0 or more directories.\n"); + Printf("%s [-flag1=val1 [-flag2=val2 ...] ] [dir1 [dir2 ...] ]\n", Prog); + + Printf("\nTo run individual tests without fuzzing pass 1 or more files:\n"); + Printf("%s [-flag1=val1 [-flag2=val2 ...] ] file1 [file2 ...]\n", Prog); + + Printf("\nFlags: (strictly in form -flag=value)\n"); + size_t MaxFlagLen = 0; + for (size_t F = 0; F < kNumFlags; F++) + MaxFlagLen = std::max(strlen(FlagDescriptions[F].Name), MaxFlagLen); + + for (size_t F = 0; F < kNumFlags; F++) { + const auto &D = FlagDescriptions[F]; + if (strstr(D.Description, "internal flag") == D.Description) continue; + Printf(" %s", D.Name); + for (size_t i = 0, n = MaxFlagLen - strlen(D.Name); i < n; i++) + Printf(" "); + Printf("\t"); + Printf("%d\t%s\n", D.Default, D.Description); + } + Printf("\nFlags starting with '--' will be ignored and " + "will be passed verbatim to subprocesses.\n"); +} + +static const char *FlagValue(const char *Param, const char *Name) { + size_t Len = strlen(Name); + if (Param[0] == '-' && strstr(Param + 1, Name) == Param + 1 && + Param[Len + 1] == '=') + return &Param[Len + 2]; + return nullptr; +} + +// Avoid calling stol as it triggers a bug in clang/glibc build. +static long MyStol(const char *Str) { + long Res = 0; + long Sign = 1; + if (*Str == '-') { + Str++; + Sign = -1; + } + for (size_t i = 0; Str[i]; i++) { + char Ch = Str[i]; + if (Ch < '0' || Ch > '9') + return Res; + Res = Res * 10 + (Ch - '0'); + } + return Res * Sign; +} + +static bool ParseOneFlag(const char *Param) { + if (Param[0] != '-') return false; + if (Param[1] == '-') { + static bool PrintedWarning = false; + if (!PrintedWarning) { + PrintedWarning = true; + Printf("INFO: libFuzzer ignores flags that start with '--'\n"); + } + for (size_t F = 0; F < kNumFlags; F++) + if (FlagValue(Param + 1, FlagDescriptions[F].Name)) + Printf("WARNING: did you mean '%s' (single dash)?\n", Param + 1); + return true; + } + for (size_t F = 0; F < kNumFlags; F++) { + const char *Name = FlagDescriptions[F].Name; + const char *Str = FlagValue(Param, Name); + if (Str) { + if (FlagDescriptions[F].IntFlag) { + int Val = MyStol(Str); + *FlagDescriptions[F].IntFlag = Val; + if (Flags.verbosity >= 2) + Printf("Flag: %s %d\n", Name, Val); + return true; + } else if (FlagDescriptions[F].UIntFlag) { + unsigned int Val = std::stoul(Str); + *FlagDescriptions[F].UIntFlag = Val; + if (Flags.verbosity >= 2) + Printf("Flag: %s %u\n", Name, Val); + return true; + } else if (FlagDescriptions[F].StrFlag) { + *FlagDescriptions[F].StrFlag = Str; + if (Flags.verbosity >= 2) + Printf("Flag: %s %s\n", Name, Str); + return true; + } else { // Deprecated flag. + Printf("Flag: %s: deprecated, don't use\n", Name); + return true; + } + } + } + Printf("\n\nWARNING: unrecognized flag '%s'; " + "use -help=1 to list all flags\n\n", Param); + return true; +} + +// We don't use any library to minimize dependencies. +static void ParseFlags(const Vector<std::string> &Args, + const ExternalFunctions *EF) { + for (size_t F = 0; F < kNumFlags; F++) { + if (FlagDescriptions[F].IntFlag) + *FlagDescriptions[F].IntFlag = FlagDescriptions[F].Default; + if (FlagDescriptions[F].UIntFlag) + *FlagDescriptions[F].UIntFlag = + static_cast<unsigned int>(FlagDescriptions[F].Default); + if (FlagDescriptions[F].StrFlag) + *FlagDescriptions[F].StrFlag = nullptr; + } + + // Disable len_control by default, if LLVMFuzzerCustomMutator is used. + if (EF->LLVMFuzzerCustomMutator) { + Flags.len_control = 0; + Printf("INFO: found LLVMFuzzerCustomMutator (%p). " + "Disabling -len_control by default.\n", EF->LLVMFuzzerCustomMutator); + } + + Inputs = new Vector<std::string>; + for (size_t A = 1; A < Args.size(); A++) { + if (ParseOneFlag(Args[A].c_str())) { + if (Flags.ignore_remaining_args) + break; + continue; + } + Inputs->push_back(Args[A]); + } +} + +static std::mutex Mu; + +static void PulseThread() { + while (true) { + SleepSeconds(600); + std::lock_guard<std::mutex> Lock(Mu); + Printf("pulse...\n"); + } +} + +static void WorkerThread(const Command &BaseCmd, std::atomic<unsigned> *Counter, + unsigned NumJobs, std::atomic<bool> *HasErrors) { + while (true) { + unsigned C = (*Counter)++; + if (C >= NumJobs) break; + std::string Log = "fuzz-" + std::to_string(C) + ".log"; + Command Cmd(BaseCmd); + Cmd.setOutputFile(Log); + Cmd.combineOutAndErr(); + if (Flags.verbosity) { + std::string CommandLine = Cmd.toString(); + Printf("%s\n", CommandLine.c_str()); + } + int ExitCode = ExecuteCommand(Cmd); + if (ExitCode != 0) + *HasErrors = true; + std::lock_guard<std::mutex> Lock(Mu); + Printf("================== Job %u exited with exit code %d ============\n", + C, ExitCode); + fuzzer::CopyFileToErr(Log); + } +} + +std::string CloneArgsWithoutX(const Vector<std::string> &Args, + const char *X1, const char *X2) { + std::string Cmd; + for (auto &S : Args) { + if (FlagValue(S.c_str(), X1) || FlagValue(S.c_str(), X2)) + continue; + Cmd += S + " "; + } + return Cmd; +} + +static int RunInMultipleProcesses(const Vector<std::string> &Args, + unsigned NumWorkers, unsigned NumJobs) { + std::atomic<unsigned> Counter(0); + std::atomic<bool> HasErrors(false); + Command Cmd(Args); + Cmd.removeFlag("jobs"); + Cmd.removeFlag("workers"); + Vector<std::thread> V; + std::thread Pulse(PulseThread); + Pulse.detach(); + for (unsigned i = 0; i < NumWorkers; i++) + V.push_back(std::thread(WorkerThread, std::ref(Cmd), &Counter, NumJobs, &HasErrors)); + for (auto &T : V) + T.join(); + return HasErrors ? 1 : 0; +} + +static void RssThread(Fuzzer *F, size_t RssLimitMb) { + while (true) { + SleepSeconds(1); + size_t Peak = GetPeakRSSMb(); + if (Peak > RssLimitMb) + F->RssLimitCallback(); + } +} + +static void StartRssThread(Fuzzer *F, size_t RssLimitMb) { + if (!RssLimitMb) + return; + std::thread T(RssThread, F, RssLimitMb); + T.detach(); +} + +int RunOneTest(Fuzzer *F, const char *InputFilePath, size_t MaxLen) { + Unit U = FileToVector(InputFilePath); + if (MaxLen && MaxLen < U.size()) + U.resize(MaxLen); + F->ExecuteCallback(U.data(), U.size()); + F->TryDetectingAMemoryLeak(U.data(), U.size(), true); + return 0; +} + +static bool AllInputsAreFiles() { + if (Inputs->empty()) return false; + for (auto &Path : *Inputs) + if (!IsFile(Path)) + return false; + return true; +} + +static std::string GetDedupTokenFromCmdOutput(const std::string &S) { + auto Beg = S.find("DEDUP_TOKEN:"); + if (Beg == std::string::npos) + return ""; + auto End = S.find('\n', Beg); + if (End == std::string::npos) + return ""; + return S.substr(Beg, End - Beg); +} + +int CleanseCrashInput(const Vector<std::string> &Args, + const FuzzingOptions &Options) { + if (Inputs->size() != 1 || !Flags.exact_artifact_path) { + Printf("ERROR: -cleanse_crash should be given one input file and" + " -exact_artifact_path\n"); + return 1; + } + std::string InputFilePath = Inputs->at(0); + std::string OutputFilePath = Flags.exact_artifact_path; + Command Cmd(Args); + Cmd.removeFlag("cleanse_crash"); + + assert(Cmd.hasArgument(InputFilePath)); + Cmd.removeArgument(InputFilePath); + + auto TmpFilePath = TempPath("CleanseCrashInput", ".repro"); + Cmd.addArgument(TmpFilePath); + Cmd.setOutputFile(getDevNull()); + Cmd.combineOutAndErr(); + + std::string CurrentFilePath = InputFilePath; + auto U = FileToVector(CurrentFilePath); + size_t Size = U.size(); + + const Vector<uint8_t> ReplacementBytes = {' ', 0xff}; + for (int NumAttempts = 0; NumAttempts < 5; NumAttempts++) { + bool Changed = false; + for (size_t Idx = 0; Idx < Size; Idx++) { + Printf("CLEANSE[%d]: Trying to replace byte %zd of %zd\n", NumAttempts, + Idx, Size); + uint8_t OriginalByte = U[Idx]; + if (ReplacementBytes.end() != std::find(ReplacementBytes.begin(), + ReplacementBytes.end(), + OriginalByte)) + continue; + for (auto NewByte : ReplacementBytes) { + U[Idx] = NewByte; + WriteToFile(U, TmpFilePath); + auto ExitCode = ExecuteCommand(Cmd); + RemoveFile(TmpFilePath); + if (!ExitCode) { + U[Idx] = OriginalByte; + } else { + Changed = true; + Printf("CLEANSE: Replaced byte %zd with 0x%x\n", Idx, NewByte); + WriteToFile(U, OutputFilePath); + break; + } + } + } + if (!Changed) break; + } + return 0; +} + +int MinimizeCrashInput(const Vector<std::string> &Args, + const FuzzingOptions &Options) { + if (Inputs->size() != 1) { + Printf("ERROR: -minimize_crash should be given one input file\n"); + return 1; + } + std::string InputFilePath = Inputs->at(0); + Command BaseCmd(Args); + BaseCmd.removeFlag("minimize_crash"); + BaseCmd.removeFlag("exact_artifact_path"); + assert(BaseCmd.hasArgument(InputFilePath)); + BaseCmd.removeArgument(InputFilePath); + if (Flags.runs <= 0 && Flags.max_total_time == 0) { + Printf("INFO: you need to specify -runs=N or " + "-max_total_time=N with -minimize_crash=1\n" + "INFO: defaulting to -max_total_time=600\n"); + BaseCmd.addFlag("max_total_time", "600"); + } + + BaseCmd.combineOutAndErr(); + + std::string CurrentFilePath = InputFilePath; + while (true) { + Unit U = FileToVector(CurrentFilePath); + Printf("CRASH_MIN: minimizing crash input: '%s' (%zd bytes)\n", + CurrentFilePath.c_str(), U.size()); + + Command Cmd(BaseCmd); + Cmd.addArgument(CurrentFilePath); + + Printf("CRASH_MIN: executing: %s\n", Cmd.toString().c_str()); + std::string CmdOutput; + bool Success = ExecuteCommand(Cmd, &CmdOutput); + if (Success) { + Printf("ERROR: the input %s did not crash\n", CurrentFilePath.c_str()); + return 1; + } + Printf("CRASH_MIN: '%s' (%zd bytes) caused a crash. Will try to minimize " + "it further\n", + CurrentFilePath.c_str(), U.size()); + auto DedupToken1 = GetDedupTokenFromCmdOutput(CmdOutput); + if (!DedupToken1.empty()) + Printf("CRASH_MIN: DedupToken1: %s\n", DedupToken1.c_str()); + + std::string ArtifactPath = + Flags.exact_artifact_path + ? Flags.exact_artifact_path + : Options.ArtifactPrefix + "minimized-from-" + Hash(U); + Cmd.addFlag("minimize_crash_internal_step", "1"); + Cmd.addFlag("exact_artifact_path", ArtifactPath); + Printf("CRASH_MIN: executing: %s\n", Cmd.toString().c_str()); + CmdOutput.clear(); + Success = ExecuteCommand(Cmd, &CmdOutput); + Printf("%s", CmdOutput.c_str()); + if (Success) { + if (Flags.exact_artifact_path) { + CurrentFilePath = Flags.exact_artifact_path; + WriteToFile(U, CurrentFilePath); + } + Printf("CRASH_MIN: failed to minimize beyond %s (%d bytes), exiting\n", + CurrentFilePath.c_str(), U.size()); + break; + } + auto DedupToken2 = GetDedupTokenFromCmdOutput(CmdOutput); + if (!DedupToken2.empty()) + Printf("CRASH_MIN: DedupToken2: %s\n", DedupToken2.c_str()); + + if (DedupToken1 != DedupToken2) { + if (Flags.exact_artifact_path) { + CurrentFilePath = Flags.exact_artifact_path; + WriteToFile(U, CurrentFilePath); + } + Printf("CRASH_MIN: mismatch in dedup tokens" + " (looks like a different bug). Won't minimize further\n"); + break; + } + + CurrentFilePath = ArtifactPath; + Printf("*********************************\n"); + } + return 0; +} + +int MinimizeCrashInputInternalStep(Fuzzer *F, InputCorpus *Corpus) { + assert(Inputs->size() == 1); + std::string InputFilePath = Inputs->at(0); + Unit U = FileToVector(InputFilePath); + Printf("INFO: Starting MinimizeCrashInputInternalStep: %zd\n", U.size()); + if (U.size() < 2) { + Printf("INFO: The input is small enough, exiting\n"); + return 0; + } + F->SetMaxInputLen(U.size()); + F->SetMaxMutationLen(U.size() - 1); + F->MinimizeCrashLoop(U); + Printf("INFO: Done MinimizeCrashInputInternalStep, no crashes found\n"); + return 0; +} + +int Merge(Fuzzer *F, FuzzingOptions &Options, const Vector<std::string> &Args, + const Vector<std::string> &Corpora, const char *CFPathOrNull) { + if (Corpora.size() < 2) { + Printf("INFO: Merge requires two or more corpus dirs\n"); + return 0; + } + + Vector<SizedFile> OldCorpus, NewCorpus; + int Res = GetSizedFilesFromDir(Corpora[0], &OldCorpus); + if (Res != 0) + return Res; + for (size_t i = 1; i < Corpora.size(); i++) { + Res = GetSizedFilesFromDir(Corpora[i], &NewCorpus); + if (Res != 0) + return Res; + } + std::sort(OldCorpus.begin(), OldCorpus.end()); + std::sort(NewCorpus.begin(), NewCorpus.end()); + + std::string CFPath = CFPathOrNull ? CFPathOrNull : TempPath("Merge", ".txt"); + Vector<std::string> NewFiles; + Set<uint32_t> NewFeatures, NewCov; + Res = CrashResistantMerge(Args, OldCorpus, NewCorpus, &NewFiles, {}, &NewFeatures, + {}, &NewCov, CFPath, true); + if (Res != 0) + return Res; + + if (F->isGracefulExitRequested()) + return 0; + for (auto &Path : NewFiles) + F->WriteToOutputCorpus(FileToVector(Path, Options.MaxLen)); + // We are done, delete the control file if it was a temporary one. + if (!Flags.merge_control_file) + RemoveFile(CFPath); + + return 0; +} + +int AnalyzeDictionary(Fuzzer *F, const Vector<Unit>& Dict, + UnitVector& Corpus) { + Printf("Started dictionary minimization (up to %d tests)\n", + Dict.size() * Corpus.size() * 2); + + // Scores and usage count for each dictionary unit. + Vector<int> Scores(Dict.size()); + Vector<int> Usages(Dict.size()); + + Vector<size_t> InitialFeatures; + Vector<size_t> ModifiedFeatures; + for (auto &C : Corpus) { + // Get coverage for the testcase without modifications. + F->ExecuteCallback(C.data(), C.size()); + InitialFeatures.clear(); + TPC.CollectFeatures([&](size_t Feature) { + InitialFeatures.push_back(Feature); + }); + + for (size_t i = 0; i < Dict.size(); ++i) { + Vector<uint8_t> Data = C; + auto StartPos = std::search(Data.begin(), Data.end(), + Dict[i].begin(), Dict[i].end()); + // Skip dictionary unit, if the testcase does not contain it. + if (StartPos == Data.end()) + continue; + + ++Usages[i]; + while (StartPos != Data.end()) { + // Replace all occurrences of dictionary unit in the testcase. + auto EndPos = StartPos + Dict[i].size(); + for (auto It = StartPos; It != EndPos; ++It) + *It ^= 0xFF; + + StartPos = std::search(EndPos, Data.end(), + Dict[i].begin(), Dict[i].end()); + } + + // Get coverage for testcase with masked occurrences of dictionary unit. + F->ExecuteCallback(Data.data(), Data.size()); + ModifiedFeatures.clear(); + TPC.CollectFeatures([&](size_t Feature) { + ModifiedFeatures.push_back(Feature); + }); + + if (InitialFeatures == ModifiedFeatures) + --Scores[i]; + else + Scores[i] += 2; + } + } + + Printf("###### Useless dictionary elements. ######\n"); + for (size_t i = 0; i < Dict.size(); ++i) { + // Dictionary units with positive score are treated as useful ones. + if (Scores[i] > 0) + continue; + + Printf("\""); + PrintASCII(Dict[i].data(), Dict[i].size(), "\""); + Printf(" # Score: %d, Used: %d\n", Scores[i], Usages[i]); + } + Printf("###### End of useless dictionary elements. ######\n"); + return 0; +} + +int ParseSeedInuts(const char *seed_inputs, Vector<std::string> &Files) { + // Parse -seed_inputs=file1,file2,... or -seed_inputs=@seed_inputs_file + if (!seed_inputs) return 0; + std::string SeedInputs; + if (Flags.seed_inputs[0] == '@') + SeedInputs = FileToString(Flags.seed_inputs + 1); // File contains list. + else + SeedInputs = Flags.seed_inputs; // seed_inputs contains the list. + if (SeedInputs.empty()) { + Printf("seed_inputs is empty or @file does not exist.\n"); + return 1; + } + // Parse SeedInputs. + size_t comma_pos = 0; + while ((comma_pos = SeedInputs.find_last_of(',')) != std::string::npos) { + Files.push_back(SeedInputs.substr(comma_pos + 1)); + SeedInputs = SeedInputs.substr(0, comma_pos); + } + Files.push_back(SeedInputs); + return 0; +} + +static Vector<SizedFile> ReadCorpora(const Vector<std::string> &CorpusDirs, + const Vector<std::string> &ExtraSeedFiles) { + Vector<SizedFile> SizedFiles; + size_t LastNumFiles = 0; + for (auto &Dir : CorpusDirs) { + GetSizedFilesFromDir(Dir, &SizedFiles); + Printf("INFO: % 8zd files found in %s\n", SizedFiles.size() - LastNumFiles, + Dir.c_str()); + LastNumFiles = SizedFiles.size(); + } + for (auto &File : ExtraSeedFiles) + if (auto Size = FileSize(File)) + SizedFiles.push_back({File, Size}); + return SizedFiles; +} + +int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) { + using namespace fuzzer; + assert(argc && argv && "Argument pointers cannot be nullptr"); + std::string Argv0((*argv)[0]); + if (!EF) + EF = new ExternalFunctions(); + if (EF->LLVMFuzzerInitialize) + EF->LLVMFuzzerInitialize(argc, argv); + if (EF->__msan_scoped_disable_interceptor_checks) + EF->__msan_scoped_disable_interceptor_checks(); + const Vector<std::string> Args(*argv, *argv + *argc); + assert(!Args.empty()); + ProgName = new std::string(Args[0]); + if (Argv0 != *ProgName) { + Printf("ERROR: argv[0] has been modified in LLVMFuzzerInitialize\n"); + return 1; + } + ParseFlags(Args, EF); + if (Flags.help) { + PrintHelp(); + return 0; + } + + if (Flags.close_fd_mask & 2) + DupAndCloseStderr(); + if (Flags.close_fd_mask & 1) + CloseStdout(); + + if (Flags.jobs > 0 && Flags.workers == 0) { + Flags.workers = std::min(NumberOfCpuCores() / 2, Flags.jobs); + if (Flags.workers > 1) + Printf("Running %u workers\n", Flags.workers); + } + + if (Flags.workers > 0 && Flags.jobs > 0) + return RunInMultipleProcesses(Args, Flags.workers, Flags.jobs); + + FuzzingOptions Options; + Options.Verbosity = Flags.verbosity; + Options.MaxLen = Flags.max_len; + Options.LenControl = Flags.len_control; + Options.UnitTimeoutSec = Flags.timeout; + Options.ErrorExitCode = Flags.error_exitcode; + Options.TimeoutExitCode = Flags.timeout_exitcode; + Options.IgnoreTimeouts = Flags.ignore_timeouts; + Options.IgnoreOOMs = Flags.ignore_ooms; + Options.IgnoreCrashes = Flags.ignore_crashes; + Options.MaxTotalTimeSec = Flags.max_total_time; + Options.DoCrossOver = Flags.cross_over; + Options.MutateDepth = Flags.mutate_depth; + Options.ReduceDepth = Flags.reduce_depth; + Options.UseCounters = Flags.use_counters; + Options.UseMemmem = Flags.use_memmem; + Options.UseCmp = Flags.use_cmp; + Options.UseValueProfile = Flags.use_value_profile; + Options.Shrink = Flags.shrink; + Options.ReduceInputs = Flags.reduce_inputs; + Options.ShuffleAtStartUp = Flags.shuffle; + Options.PreferSmall = Flags.prefer_small; + Options.ReloadIntervalSec = Flags.reload; + Options.OnlyASCII = Flags.only_ascii; + Options.DetectLeaks = Flags.detect_leaks; + Options.PurgeAllocatorIntervalSec = Flags.purge_allocator_interval; + Options.TraceMalloc = Flags.trace_malloc; + Options.RssLimitMb = Flags.rss_limit_mb; + Options.MallocLimitMb = Flags.malloc_limit_mb; + if (!Options.MallocLimitMb) + Options.MallocLimitMb = Options.RssLimitMb; + if (Flags.runs >= 0) + Options.MaxNumberOfRuns = Flags.runs; + if (!Inputs->empty() && !Flags.minimize_crash_internal_step) + Options.OutputCorpus = (*Inputs)[0]; + Options.ReportSlowUnits = Flags.report_slow_units; + if (Flags.artifact_prefix) + Options.ArtifactPrefix = Flags.artifact_prefix; + if (Flags.exact_artifact_path) + Options.ExactArtifactPath = Flags.exact_artifact_path; + Vector<Unit> Dictionary; + if (Flags.dict) + if (!ParseDictionaryFile(FileToString(Flags.dict), &Dictionary)) + return 1; + if (Flags.verbosity > 0 && !Dictionary.empty()) + Printf("Dictionary: %zd entries\n", Dictionary.size()); + bool RunIndividualFiles = AllInputsAreFiles(); + Options.SaveArtifacts = + !RunIndividualFiles || Flags.minimize_crash_internal_step; + Options.PrintNewCovPcs = Flags.print_pcs; + Options.PrintNewCovFuncs = Flags.print_funcs; + Options.PrintFinalStats = Flags.print_final_stats; + Options.PrintCorpusStats = Flags.print_corpus_stats; + Options.PrintCoverage = Flags.print_coverage; + if (Flags.exit_on_src_pos) + Options.ExitOnSrcPos = Flags.exit_on_src_pos; + if (Flags.exit_on_item) + Options.ExitOnItem = Flags.exit_on_item; + if (Flags.focus_function) + Options.FocusFunction = Flags.focus_function; + if (Flags.data_flow_trace) + Options.DataFlowTrace = Flags.data_flow_trace; + if (Flags.features_dir) + Options.FeaturesDir = Flags.features_dir; + if (Flags.collect_data_flow) + Options.CollectDataFlow = Flags.collect_data_flow; + if (Flags.stop_file) + Options.StopFile = Flags.stop_file; + Options.Entropic = Flags.entropic; + Options.EntropicFeatureFrequencyThreshold = + (size_t)Flags.entropic_feature_frequency_threshold; + Options.EntropicNumberOfRarestFeatures = + (size_t)Flags.entropic_number_of_rarest_features; + if (Options.Entropic) { + if (!Options.FocusFunction.empty()) { + Printf("ERROR: The parameters `--entropic` and `--focus_function` cannot " + "be used together.\n"); + return 1; + } + Printf("INFO: Running with entropic power schedule (0x%X, %d).\n", + Options.EntropicFeatureFrequencyThreshold, + Options.EntropicNumberOfRarestFeatures); + } + struct EntropicOptions Entropic; + Entropic.Enabled = Options.Entropic; + Entropic.FeatureFrequencyThreshold = + Options.EntropicFeatureFrequencyThreshold; + Entropic.NumberOfRarestFeatures = Options.EntropicNumberOfRarestFeatures; + + unsigned Seed = Flags.seed; + // Initialize Seed. + if (Seed == 0) + Seed = + std::chrono::system_clock::now().time_since_epoch().count() + GetPid(); + if (Flags.verbosity) + Printf("INFO: Seed: %u\n", Seed); + + if (Flags.collect_data_flow && !Flags.fork && !Flags.merge) { + if (RunIndividualFiles) + return CollectDataFlow(Flags.collect_data_flow, Flags.data_flow_trace, + ReadCorpora({}, *Inputs)); + else + return CollectDataFlow(Flags.collect_data_flow, Flags.data_flow_trace, + ReadCorpora(*Inputs, {})); + } + + Random Rand(Seed); + auto *MD = new MutationDispatcher(Rand, Options); + auto *Corpus = new InputCorpus(Options.OutputCorpus, Entropic); + auto *F = new Fuzzer(Callback, *Corpus, *MD, Options); + + for (auto &U: Dictionary) + if (U.size() <= Word::GetMaxSize()) + MD->AddWordToManualDictionary(Word(U.data(), U.size())); + + // Threads are only supported by Chrome. Don't use them with emscripten + // for now. +#if !LIBFUZZER_EMSCRIPTEN + StartRssThread(F, Flags.rss_limit_mb); +#endif // LIBFUZZER_EMSCRIPTEN + + Options.HandleAbrt = Flags.handle_abrt; + Options.HandleBus = Flags.handle_bus; + Options.HandleFpe = Flags.handle_fpe; + Options.HandleIll = Flags.handle_ill; + Options.HandleInt = Flags.handle_int; + Options.HandleSegv = Flags.handle_segv; + Options.HandleTerm = Flags.handle_term; + Options.HandleXfsz = Flags.handle_xfsz; + Options.HandleUsr1 = Flags.handle_usr1; + Options.HandleUsr2 = Flags.handle_usr2; + SetSignalHandler(Options); + + std::atexit(Fuzzer::StaticExitCallback); + + if (Flags.minimize_crash) + return MinimizeCrashInput(Args, Options); + + if (Flags.minimize_crash_internal_step) + return MinimizeCrashInputInternalStep(F, Corpus); + + if (Flags.cleanse_crash) + return CleanseCrashInput(Args, Options); + + if (RunIndividualFiles) { + Options.SaveArtifacts = false; + int Runs = std::max(1, Flags.runs); + Printf("%s: Running %zd inputs %d time(s) each.\n", ProgName->c_str(), + Inputs->size(), Runs); + for (auto &Path : *Inputs) { + auto StartTime = system_clock::now(); + Printf("Running: %s\n", Path.c_str()); + for (int Iter = 0; Iter < Runs; Iter++) + RunOneTest(F, Path.c_str(), Options.MaxLen); + auto StopTime = system_clock::now(); + auto MS = duration_cast<milliseconds>(StopTime - StartTime).count(); + Printf("Executed %s in %zd ms\n", Path.c_str(), (long)MS); + } + Printf("***\n" + "*** NOTE: fuzzing was not performed, you have only\n" + "*** executed the target code on a fixed set of inputs.\n" + "***\n"); + F->PrintFinalStats(); + return 0; + } + + if (Flags.fork) + return FuzzWithFork(F->GetMD().GetRand(), Options, Args, *Inputs, Flags.fork); + + if (Flags.merge) + return Merge(F, Options, Args, *Inputs, Flags.merge_control_file); + + if (Flags.merge_inner) { + const size_t kDefaultMaxMergeLen = 1 << 20; + if (Options.MaxLen == 0) + F->SetMaxInputLen(kDefaultMaxMergeLen); + assert(Flags.merge_control_file); + return F->CrashResistantMergeInternalStep(Flags.merge_control_file); + } + + if (Flags.analyze_dict) { + size_t MaxLen = INT_MAX; // Large max length. + UnitVector InitialCorpus; + for (auto &Inp : *Inputs) { + Printf("Loading corpus dir: %s\n", Inp.c_str()); + ReadDirToVectorOfUnits(Inp.c_str(), &InitialCorpus, nullptr, + MaxLen, /*ExitOnError=*/false); + } + + if (Dictionary.empty() || Inputs->empty()) { + Printf("ERROR: can't analyze dict without dict and corpus provided\n"); + return 1; + } + if (AnalyzeDictionary(F, Dictionary, InitialCorpus)) { + Printf("Dictionary analysis failed\n"); + return 1; + } + Printf("Dictionary analysis succeeded\n"); + return 0; + } + + { + Vector<std::string> Files; + int Res = ParseSeedInuts(Flags.seed_inputs, Files); + if (Res != 0) + return Res; + auto CorporaFiles = ReadCorpora(*Inputs, Files); + Res = F->Loop(CorporaFiles); + if (Res != 0) + return Res; + if (F->isGracefulExitRequested()) + return 0; + } + + if (Flags.verbosity) + Printf("Done %zd runs in %zd second(s)\n", F->getTotalNumberOfRuns(), + F->secondsSinceProcessStartUp()); + F->PrintFinalStats(); + + return 0; // Don't let F destroy itself. +} + +extern "C" ATTRIBUTE_INTERFACE int +LLVMFuzzerRunDriver(int *argc, char ***argv, + int (*UserCb)(const uint8_t *Data, size_t Size)) { + return FuzzerDriver(argc, argv, UserCb); +} + +// Storage for global ExternalFunctions object. +ExternalFunctions *EF = nullptr; + +} // namespace fuzzer diff --git a/tools/fuzzing/libfuzzer/FuzzerExtFunctions.def b/tools/fuzzing/libfuzzer/FuzzerExtFunctions.def new file mode 100644 index 0000000000..51edf8444e --- /dev/null +++ b/tools/fuzzing/libfuzzer/FuzzerExtFunctions.def @@ -0,0 +1,50 @@ +//===- FuzzerExtFunctions.def - External functions --------------*- C++ -* ===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// This defines the external function pointers that +// ``fuzzer::ExternalFunctions`` should contain and try to initialize. The +// EXT_FUNC macro must be defined at the point of inclusion. The signature of +// the macro is: +// +// EXT_FUNC(<name>, <return_type>, <function_signature>, <warn_if_missing>) +//===----------------------------------------------------------------------===// + +// Optional user functions +EXT_FUNC(LLVMFuzzerInitialize, int, (int *argc, char ***argv), false); +EXT_FUNC(LLVMFuzzerCustomMutator, size_t, + (uint8_t *Data, size_t Size, size_t MaxSize, unsigned int Seed), + false); +EXT_FUNC(LLVMFuzzerCustomCrossOver, size_t, + (const uint8_t *Data1, size_t Size1, + const uint8_t *Data2, size_t Size2, + uint8_t *Out, size_t MaxOutSize, unsigned int Seed), + false); + +// Sanitizer functions +EXT_FUNC(__lsan_enable, void, (), false); +EXT_FUNC(__lsan_disable, void, (), false); +EXT_FUNC(__lsan_do_recoverable_leak_check, int, (), false); +EXT_FUNC(__sanitizer_acquire_crash_state, int, (), true); +EXT_FUNC(__sanitizer_install_malloc_and_free_hooks, int, + (void (*malloc_hook)(const volatile void *, size_t), + void (*free_hook)(const volatile void *)), + false); +EXT_FUNC(__sanitizer_log_write, void, (const char *buf, size_t len), false); +EXT_FUNC(__sanitizer_purge_allocator, void, (), false); +EXT_FUNC(__sanitizer_print_memory_profile, void, (size_t, size_t), false); +EXT_FUNC(__sanitizer_print_stack_trace, void, (), true); +EXT_FUNC(__sanitizer_symbolize_pc, void, + (void *, const char *fmt, char *out_buf, size_t out_buf_size), false); +EXT_FUNC(__sanitizer_get_module_and_offset_for_pc, int, + (void *pc, char *module_path, + size_t module_path_len,void **pc_offset), false); +EXT_FUNC(__sanitizer_set_death_callback, void, (void (*)(void)), true); +EXT_FUNC(__sanitizer_set_report_fd, void, (void*), false); +EXT_FUNC(__msan_scoped_disable_interceptor_checks, void, (), false); +EXT_FUNC(__msan_scoped_enable_interceptor_checks, void, (), false); +EXT_FUNC(__msan_unpoison, void, (const volatile void *, size_t size), false); +EXT_FUNC(__msan_unpoison_param, void, (size_t n), false); diff --git a/tools/fuzzing/libfuzzer/FuzzerExtFunctions.h b/tools/fuzzing/libfuzzer/FuzzerExtFunctions.h new file mode 100644 index 0000000000..c88aac4e67 --- /dev/null +++ b/tools/fuzzing/libfuzzer/FuzzerExtFunctions.h @@ -0,0 +1,34 @@ +//===- FuzzerExtFunctions.h - Interface to external functions ---*- C++ -* ===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Defines an interface to (possibly optional) functions. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_FUZZER_EXT_FUNCTIONS_H +#define LLVM_FUZZER_EXT_FUNCTIONS_H + +#include <stddef.h> +#include <stdint.h> + +namespace fuzzer { + +struct ExternalFunctions { + // Initialize function pointers. Functions that are not available will be set + // to nullptr. Do not call this constructor before ``main()`` has been + // entered. + ExternalFunctions(); + +#define EXT_FUNC(NAME, RETURN_TYPE, FUNC_SIG, WARN) \ + RETURN_TYPE(*NAME) FUNC_SIG = nullptr + +#include "FuzzerExtFunctions.def" + +#undef EXT_FUNC +}; +} // namespace fuzzer + +#endif diff --git a/tools/fuzzing/libfuzzer/FuzzerExtFunctionsDlsym.cpp b/tools/fuzzing/libfuzzer/FuzzerExtFunctionsDlsym.cpp new file mode 100644 index 0000000000..95233d2a10 --- /dev/null +++ b/tools/fuzzing/libfuzzer/FuzzerExtFunctionsDlsym.cpp @@ -0,0 +1,51 @@ +//===- FuzzerExtFunctionsDlsym.cpp - Interface to external functions ------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Implementation for operating systems that support dlsym(). We only use it on +// Apple platforms for now. We don't use this approach on Linux because it +// requires that clients of LibFuzzer pass ``--export-dynamic`` to the linker. +// That is a complication we don't wish to expose to clients right now. +//===----------------------------------------------------------------------===// +#include "FuzzerPlatform.h" +#if LIBFUZZER_APPLE + +#include "FuzzerExtFunctions.h" +#include "FuzzerIO.h" +#include <dlfcn.h> + +using namespace fuzzer; + +template <typename T> +static T GetFnPtr(const char *FnName, bool WarnIfMissing) { + dlerror(); // Clear any previous errors. + void *Fn = dlsym(RTLD_DEFAULT, FnName); + if (Fn == nullptr) { + if (WarnIfMissing) { + const char *ErrorMsg = dlerror(); + Printf("WARNING: Failed to find function \"%s\".", FnName); + if (ErrorMsg) + Printf(" Reason %s.", ErrorMsg); + Printf("\n"); + } + } + return reinterpret_cast<T>(Fn); +} + +namespace fuzzer { + +ExternalFunctions::ExternalFunctions() { +#define EXT_FUNC(NAME, RETURN_TYPE, FUNC_SIG, WARN) \ + this->NAME = GetFnPtr<decltype(ExternalFunctions::NAME)>(#NAME, WARN) + +#include "FuzzerExtFunctions.def" + +#undef EXT_FUNC +} + +} // namespace fuzzer + +#endif // LIBFUZZER_APPLE diff --git a/tools/fuzzing/libfuzzer/FuzzerExtFunctionsWeak.cpp b/tools/fuzzing/libfuzzer/FuzzerExtFunctionsWeak.cpp new file mode 100644 index 0000000000..24ddc57d47 --- /dev/null +++ b/tools/fuzzing/libfuzzer/FuzzerExtFunctionsWeak.cpp @@ -0,0 +1,54 @@ +//===- FuzzerExtFunctionsWeak.cpp - Interface to external functions -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Implementation for Linux. This relies on the linker's support for weak +// symbols. We don't use this approach on Apple platforms because it requires +// clients of LibFuzzer to pass ``-U _<symbol_name>`` to the linker to allow +// weak symbols to be undefined. That is a complication we don't want to expose +// to clients right now. +//===----------------------------------------------------------------------===// +#include "FuzzerPlatform.h" +#if LIBFUZZER_LINUX || LIBFUZZER_NETBSD || LIBFUZZER_FUCHSIA || \ + LIBFUZZER_FREEBSD || LIBFUZZER_OPENBSD || LIBFUZZER_EMSCRIPTEN + +#include "FuzzerExtFunctions.h" +#include "FuzzerIO.h" + +extern "C" { +// Declare these symbols as weak to allow them to be optionally defined. +#define EXT_FUNC(NAME, RETURN_TYPE, FUNC_SIG, WARN) \ + __attribute__((weak, visibility("default"))) RETURN_TYPE NAME FUNC_SIG + +#include "FuzzerExtFunctions.def" + +#undef EXT_FUNC +} + +using namespace fuzzer; + +static void CheckFnPtr(void *FnPtr, const char *FnName, bool WarnIfMissing) { + if (FnPtr == nullptr && WarnIfMissing) { + Printf("WARNING: Failed to find function \"%s\".\n", FnName); + } +} + +namespace fuzzer { + +ExternalFunctions::ExternalFunctions() { +#define EXT_FUNC(NAME, RETURN_TYPE, FUNC_SIG, WARN) \ + this->NAME = ::NAME; \ + CheckFnPtr(reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(::NAME)), \ + #NAME, WARN); + +#include "FuzzerExtFunctions.def" + +#undef EXT_FUNC +} + +} // namespace fuzzer + +#endif diff --git a/tools/fuzzing/libfuzzer/FuzzerExtFunctionsWindows.cpp b/tools/fuzzing/libfuzzer/FuzzerExtFunctionsWindows.cpp new file mode 100644 index 0000000000..688bad1d51 --- /dev/null +++ b/tools/fuzzing/libfuzzer/FuzzerExtFunctionsWindows.cpp @@ -0,0 +1,82 @@ +//=== FuzzerExtWindows.cpp - Interface to external functions --------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Implementation of FuzzerExtFunctions for Windows. Uses alternatename when +// compiled with MSVC. Uses weak aliases when compiled with clang. Unfortunately +// the method each compiler supports is not supported by the other. +//===----------------------------------------------------------------------===// +#include "FuzzerPlatform.h" +#if LIBFUZZER_WINDOWS + +#include "FuzzerExtFunctions.h" +#include "FuzzerIO.h" + +using namespace fuzzer; + +// Intermediate macro to ensure the parameter is expanded before stringified. +#define STRINGIFY_(A) #A +#define STRINGIFY(A) STRINGIFY_(A) + +#if LIBFUZZER_MSVC +// Copied from compiler-rt/lib/sanitizer_common/sanitizer_win_defs.h +#if defined(_M_IX86) || defined(__i386__) +#define WIN_SYM_PREFIX "_" +#else +#define WIN_SYM_PREFIX +#endif + +// Declare external functions as having alternativenames, so that we can +// determine if they are not defined. +#define EXTERNAL_FUNC(Name, Default) \ + __pragma(comment(linker, "/alternatename:" WIN_SYM_PREFIX STRINGIFY( \ + Name) "=" WIN_SYM_PREFIX STRINGIFY(Default))) +#else +// Declare external functions as weak to allow them to default to a specified +// function if not defined explicitly. We must use weak symbols because clang's +// support for alternatename is not 100%, see +// https://bugs.llvm.org/show_bug.cgi?id=40218 for more details. +#define EXTERNAL_FUNC(Name, Default) \ + __attribute__((weak, alias(STRINGIFY(Default)))) +#endif // LIBFUZZER_MSVC + +extern "C" { +#define EXT_FUNC(NAME, RETURN_TYPE, FUNC_SIG, WARN) \ + RETURN_TYPE NAME##Def FUNC_SIG { \ + Printf("ERROR: Function \"%s\" not defined.\n", #NAME); \ + exit(1); \ + } \ + EXTERNAL_FUNC(NAME, NAME##Def) RETURN_TYPE NAME FUNC_SIG + +#include "FuzzerExtFunctions.def" + +#undef EXT_FUNC +} + +template <typename T> +static T *GetFnPtr(T *Fun, T *FunDef, const char *FnName, bool WarnIfMissing) { + if (Fun == FunDef) { + if (WarnIfMissing) + Printf("WARNING: Failed to find function \"%s\".\n", FnName); + return nullptr; + } + return Fun; +} + +namespace fuzzer { + +ExternalFunctions::ExternalFunctions() { +#define EXT_FUNC(NAME, RETURN_TYPE, FUNC_SIG, WARN) \ + this->NAME = GetFnPtr<decltype(::NAME)>(::NAME, ::NAME##Def, #NAME, WARN); + +#include "FuzzerExtFunctions.def" + +#undef EXT_FUNC +} + +} // namespace fuzzer + +#endif // LIBFUZZER_WINDOWS diff --git a/tools/fuzzing/libfuzzer/FuzzerExtraCounters.cpp b/tools/fuzzing/libfuzzer/FuzzerExtraCounters.cpp new file mode 100644 index 0000000000..d36beba1b1 --- /dev/null +++ b/tools/fuzzing/libfuzzer/FuzzerExtraCounters.cpp @@ -0,0 +1,42 @@ +//===- FuzzerExtraCounters.cpp - Extra coverage counters ------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Extra coverage counters defined by user code. +//===----------------------------------------------------------------------===// + +#include "FuzzerPlatform.h" +#include <cstdint> + +#if LIBFUZZER_LINUX || LIBFUZZER_NETBSD || LIBFUZZER_FREEBSD || \ + LIBFUZZER_OPENBSD || LIBFUZZER_FUCHSIA || LIBFUZZER_EMSCRIPTEN +__attribute__((weak)) extern uint8_t __start___libfuzzer_extra_counters; +__attribute__((weak)) extern uint8_t __stop___libfuzzer_extra_counters; + +namespace fuzzer { +uint8_t *ExtraCountersBegin() { return &__start___libfuzzer_extra_counters; } +uint8_t *ExtraCountersEnd() { return &__stop___libfuzzer_extra_counters; } +ATTRIBUTE_NO_SANITIZE_ALL +void ClearExtraCounters() { // hand-written memset, don't asan-ify. + uintptr_t *Beg = reinterpret_cast<uintptr_t*>(ExtraCountersBegin()); + uintptr_t *End = reinterpret_cast<uintptr_t*>(ExtraCountersEnd()); + for (; Beg < End; Beg++) { + *Beg = 0; + __asm__ __volatile__("" : : : "memory"); + } +} + +} // namespace fuzzer + +#else +// TODO: implement for other platforms. +namespace fuzzer { +uint8_t *ExtraCountersBegin() { return nullptr; } +uint8_t *ExtraCountersEnd() { return nullptr; } +void ClearExtraCounters() {} +} // namespace fuzzer + +#endif diff --git a/tools/fuzzing/libfuzzer/FuzzerFlags.def b/tools/fuzzing/libfuzzer/FuzzerFlags.def new file mode 100644 index 0000000000..832224a705 --- /dev/null +++ b/tools/fuzzing/libfuzzer/FuzzerFlags.def @@ -0,0 +1,169 @@ +//===- FuzzerFlags.def - Run-time flags -------------------------*- C++ -* ===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Flags. FUZZER_FLAG_INT/FUZZER_FLAG_STRING macros should be defined at the +// point of inclusion. We are not using any flag parsing library for better +// portability and independence. +//===----------------------------------------------------------------------===// +FUZZER_FLAG_INT(verbosity, 1, "Verbosity level.") +FUZZER_FLAG_UNSIGNED(seed, 0, "Random seed. If 0, seed is generated.") +FUZZER_FLAG_INT(runs, -1, + "Number of individual test runs (-1 for infinite runs).") +FUZZER_FLAG_INT(max_len, 0, "Maximum length of the test input. " + "If 0, libFuzzer tries to guess a good value based on the corpus " + "and reports it. ") +FUZZER_FLAG_INT(len_control, 100, "Try generating small inputs first, " + "then try larger inputs over time. Specifies the rate at which the length " + "limit is increased (smaller == faster). If 0, immediately try inputs with " + "size up to max_len. Default value is 0, if LLVMFuzzerCustomMutator is used.") +FUZZER_FLAG_STRING(seed_inputs, "A comma-separated list of input files " + "to use as an additional seed corpus. Alternatively, an \"@\" followed by " + "the name of a file containing the comma-separated list.") +FUZZER_FLAG_INT(cross_over, 1, "If 1, cross over inputs.") +FUZZER_FLAG_INT(mutate_depth, 5, + "Apply this number of consecutive mutations to each input.") +FUZZER_FLAG_INT(reduce_depth, 0, "Experimental/internal. " + "Reduce depth if mutations lose unique features") +FUZZER_FLAG_INT(shuffle, 1, "Shuffle inputs at startup") +FUZZER_FLAG_INT(prefer_small, 1, + "If 1, always prefer smaller inputs during the corpus shuffle.") +FUZZER_FLAG_INT( + timeout, 1200, + "Timeout in seconds (if positive). " + "If one unit runs more than this number of seconds the process will abort.") +FUZZER_FLAG_INT(error_exitcode, 77, "When libFuzzer itself reports a bug " + "this exit code will be used.") +FUZZER_FLAG_INT(timeout_exitcode, 70, "When libFuzzer reports a timeout " + "this exit code will be used.") +FUZZER_FLAG_INT(max_total_time, 0, "If positive, indicates the maximal total " + "time in seconds to run the fuzzer.") +FUZZER_FLAG_INT(help, 0, "Print help.") +FUZZER_FLAG_INT(fork, 0, "Experimental mode where fuzzing happens " + "in a subprocess") +FUZZER_FLAG_INT(ignore_timeouts, 1, "Ignore timeouts in fork mode") +FUZZER_FLAG_INT(ignore_ooms, 1, "Ignore OOMs in fork mode") +FUZZER_FLAG_INT(ignore_crashes, 0, "Ignore crashes in fork mode") +FUZZER_FLAG_INT(merge, 0, "If 1, the 2-nd, 3-rd, etc corpora will be " + "merged into the 1-st corpus. Only interesting units will be taken. " + "This flag can be used to minimize a corpus.") +FUZZER_FLAG_STRING(stop_file, "Stop fuzzing ASAP if this file exists") +FUZZER_FLAG_STRING(merge_inner, "internal flag") +FUZZER_FLAG_STRING(merge_control_file, + "Specify a control file used for the merge process. " + "If a merge process gets killed it tries to leave this file " + "in a state suitable for resuming the merge. " + "By default a temporary file will be used." + "The same file can be used for multistep merge process.") +FUZZER_FLAG_INT(minimize_crash, 0, "If 1, minimizes the provided" + " crash input. Use with -runs=N or -max_total_time=N to limit " + "the number attempts." + " Use with -exact_artifact_path to specify the output." + " Combine with ASAN_OPTIONS=dedup_token_length=3 (or similar) to ensure that" + " the minimized input triggers the same crash." + ) +FUZZER_FLAG_INT(cleanse_crash, 0, "If 1, tries to cleanse the provided" + " crash input to make it contain fewer original bytes." + " Use with -exact_artifact_path to specify the output." + ) +FUZZER_FLAG_INT(minimize_crash_internal_step, 0, "internal flag") +FUZZER_FLAG_STRING(features_dir, "internal flag. Used to dump feature sets on disk." + "Every time a new input is added to the corpus, a corresponding file in the features_dir" + " is created containing the unique features of that input." + " Features are stored in binary format.") +FUZZER_FLAG_INT(use_counters, 1, "Use coverage counters") +FUZZER_FLAG_INT(use_memmem, 1, + "Use hints from intercepting memmem, strstr, etc") +FUZZER_FLAG_INT(use_value_profile, 0, + "Experimental. Use value profile to guide fuzzing.") +FUZZER_FLAG_INT(use_cmp, 1, "Use CMP traces to guide mutations") +FUZZER_FLAG_INT(shrink, 0, "Experimental. Try to shrink corpus inputs.") +FUZZER_FLAG_INT(reduce_inputs, 1, + "Try to reduce the size of inputs while preserving their full feature sets") +FUZZER_FLAG_UNSIGNED(jobs, 0, "Number of jobs to run. If jobs >= 1 we spawn" + " this number of jobs in separate worker processes" + " with stdout/stderr redirected to fuzz-JOB.log.") +FUZZER_FLAG_UNSIGNED(workers, 0, + "Number of simultaneous worker processes to run the jobs." + " If zero, \"min(jobs,NumberOfCpuCores()/2)\" is used.") +FUZZER_FLAG_INT(reload, 1, + "Reload the main corpus every <N> seconds to get new units" + " discovered by other processes. If 0, disabled") +FUZZER_FLAG_INT(report_slow_units, 10, + "Report slowest units if they run for more than this number of seconds.") +FUZZER_FLAG_INT(only_ascii, 0, + "If 1, generate only ASCII (isprint+isspace) inputs.") +FUZZER_FLAG_STRING(dict, "Experimental. Use the dictionary file.") +FUZZER_FLAG_STRING(artifact_prefix, "Write fuzzing artifacts (crash, " + "timeout, or slow inputs) as " + "$(artifact_prefix)file") +FUZZER_FLAG_STRING(exact_artifact_path, + "Write the single artifact on failure (crash, timeout) " + "as $(exact_artifact_path). This overrides -artifact_prefix " + "and will not use checksum in the file name. Do not " + "use the same path for several parallel processes.") +FUZZER_FLAG_INT(print_pcs, 0, "If 1, print out newly covered PCs.") +FUZZER_FLAG_INT(print_funcs, 2, "If >=1, print out at most this number of " + "newly covered functions.") +FUZZER_FLAG_INT(print_final_stats, 0, "If 1, print statistics at exit.") +FUZZER_FLAG_INT(print_corpus_stats, 0, + "If 1, print statistics on corpus elements at exit.") +FUZZER_FLAG_INT(print_coverage, 0, "If 1, print coverage information as text" + " at exit.") +FUZZER_FLAG_INT(dump_coverage, 0, "Deprecated.") +FUZZER_FLAG_INT(handle_segv, 1, "If 1, try to intercept SIGSEGV.") +FUZZER_FLAG_INT(handle_bus, 1, "If 1, try to intercept SIGBUS.") +FUZZER_FLAG_INT(handle_abrt, 1, "If 1, try to intercept SIGABRT.") +FUZZER_FLAG_INT(handle_ill, 1, "If 1, try to intercept SIGILL.") +FUZZER_FLAG_INT(handle_fpe, 1, "If 1, try to intercept SIGFPE.") +FUZZER_FLAG_INT(handle_int, 1, "If 1, try to intercept SIGINT.") +FUZZER_FLAG_INT(handle_term, 1, "If 1, try to intercept SIGTERM.") +FUZZER_FLAG_INT(handle_xfsz, 1, "If 1, try to intercept SIGXFSZ.") +FUZZER_FLAG_INT(handle_usr1, 1, "If 1, try to intercept SIGUSR1.") +FUZZER_FLAG_INT(handle_usr2, 1, "If 1, try to intercept SIGUSR2.") +FUZZER_FLAG_INT(close_fd_mask, 0, "If 1, close stdout at startup; " + "if 2, close stderr; if 3, close both. " + "Be careful, this will also close e.g. stderr of asan.") +FUZZER_FLAG_INT(detect_leaks, 1, "If 1, and if LeakSanitizer is enabled " + "try to detect memory leaks during fuzzing (i.e. not only at shut down).") +FUZZER_FLAG_INT(purge_allocator_interval, 1, "Purge allocator caches and " + "quarantines every <N> seconds. When rss_limit_mb is specified (>0), " + "purging starts when RSS exceeds 50% of rss_limit_mb. Pass " + "purge_allocator_interval=-1 to disable this functionality.") +FUZZER_FLAG_INT(trace_malloc, 0, "If >= 1 will print all mallocs/frees. " + "If >= 2 will also print stack traces.") +FUZZER_FLAG_INT(rss_limit_mb, 2048, "If non-zero, the fuzzer will exit upon" + "reaching this limit of RSS memory usage.") +FUZZER_FLAG_INT(malloc_limit_mb, 0, "If non-zero, the fuzzer will exit " + "if the target tries to allocate this number of Mb with one malloc call. " + "If zero (default) same limit as rss_limit_mb is applied.") +FUZZER_FLAG_STRING(exit_on_src_pos, "Exit if a newly found PC originates" + " from the given source location. Example: -exit_on_src_pos=foo.cc:123. " + "Used primarily for testing libFuzzer itself.") +FUZZER_FLAG_STRING(exit_on_item, "Exit if an item with a given sha1 sum" + " was added to the corpus. " + "Used primarily for testing libFuzzer itself.") +FUZZER_FLAG_INT(ignore_remaining_args, 0, "If 1, ignore all arguments passed " + "after this one. Useful for fuzzers that need to do their own " + "argument parsing.") +FUZZER_FLAG_STRING(focus_function, "Experimental. " + "Fuzzing will focus on inputs that trigger calls to this function. " + "If -focus_function=auto and -data_flow_trace is used, libFuzzer " + "will choose the focus functions automatically.") +FUZZER_FLAG_INT(entropic, 0, "Experimental. Enables entropic power schedule.") +FUZZER_FLAG_INT(entropic_feature_frequency_threshold, 0xFF, "Experimental. If " + "entropic is enabled, all features which are observed less often than " + "the specified value are considered as rare.") +FUZZER_FLAG_INT(entropic_number_of_rarest_features, 100, "Experimental. If " + "entropic is enabled, we keep track of the frequencies only for the " + "Top-X least abundant features (union features that are considered as " + "rare).") + +FUZZER_FLAG_INT(analyze_dict, 0, "Experimental") +FUZZER_DEPRECATED_FLAG(use_clang_coverage) +FUZZER_FLAG_STRING(data_flow_trace, "Experimental: use the data flow trace") +FUZZER_FLAG_STRING(collect_data_flow, + "Experimental: collect the data flow trace") diff --git a/tools/fuzzing/libfuzzer/FuzzerFork.cpp b/tools/fuzzing/libfuzzer/FuzzerFork.cpp new file mode 100644 index 0000000000..ee2a99a250 --- /dev/null +++ b/tools/fuzzing/libfuzzer/FuzzerFork.cpp @@ -0,0 +1,427 @@ +//===- FuzzerFork.cpp - run fuzzing in separate subprocesses --------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Spawn and orchestrate separate fuzzing processes. +//===----------------------------------------------------------------------===// + +#include "FuzzerCommand.h" +#include "FuzzerFork.h" +#include "FuzzerIO.h" +#include "FuzzerInternal.h" +#include "FuzzerMerge.h" +#include "FuzzerSHA1.h" +#include "FuzzerTracePC.h" +#include "FuzzerUtil.h" + +#include <atomic> +#include <chrono> +#include <condition_variable> +#include <fstream> +#include <memory> +#include <mutex> +#include <queue> +#include <sstream> +#include <thread> + +namespace fuzzer { + +struct Stats { + size_t number_of_executed_units = 0; + size_t peak_rss_mb = 0; + size_t average_exec_per_sec = 0; +}; + +static Stats ParseFinalStatsFromLog(const std::string &LogPath) { + std::ifstream In(LogPath); + std::string Line; + Stats Res; + struct { + const char *Name; + size_t *Var; + } NameVarPairs[] = { + {"stat::number_of_executed_units:", &Res.number_of_executed_units}, + {"stat::peak_rss_mb:", &Res.peak_rss_mb}, + {"stat::average_exec_per_sec:", &Res.average_exec_per_sec}, + {nullptr, nullptr}, + }; + while (std::getline(In, Line, '\n')) { + if (Line.find("stat::") != 0) continue; + std::istringstream ISS(Line); + std::string Name; + size_t Val; + ISS >> Name >> Val; + for (size_t i = 0; NameVarPairs[i].Name; i++) + if (Name == NameVarPairs[i].Name) + *NameVarPairs[i].Var = Val; + } + return Res; +} + +struct FuzzJob { + // Inputs. + Command Cmd; + std::string CorpusDir; + std::string FeaturesDir; + std::string LogPath; + std::string SeedListPath; + std::string CFPath; + size_t JobId; + + int DftTimeInSeconds = 0; + + // Fuzzing Outputs. + int ExitCode; + + ~FuzzJob() { + RemoveFile(CFPath); + RemoveFile(LogPath); + RemoveFile(SeedListPath); + RmDirRecursive(CorpusDir); + RmDirRecursive(FeaturesDir); + } +}; + +struct GlobalEnv { + Vector<std::string> Args; + Vector<std::string> CorpusDirs; + std::string MainCorpusDir; + std::string TempDir; + std::string DFTDir; + std::string DataFlowBinary; + Set<uint32_t> Features, Cov; + Set<std::string> FilesWithDFT; + Vector<std::string> Files; + Random *Rand; + std::chrono::system_clock::time_point ProcessStartTime; + int Verbosity = 0; + + size_t NumTimeouts = 0; + size_t NumOOMs = 0; + size_t NumCrashes = 0; + + + size_t NumRuns = 0; + + std::string StopFile() { return DirPlusFile(TempDir, "STOP"); } + + size_t secondsSinceProcessStartUp() const { + return std::chrono::duration_cast<std::chrono::seconds>( + std::chrono::system_clock::now() - ProcessStartTime) + .count(); + } + + FuzzJob *CreateNewJob(size_t JobId) { + Command Cmd(Args); + Cmd.removeFlag("fork"); + Cmd.removeFlag("runs"); + Cmd.removeFlag("collect_data_flow"); + for (auto &C : CorpusDirs) // Remove all corpora from the args. + Cmd.removeArgument(C); + Cmd.addFlag("reload", "0"); // working in an isolated dir, no reload. + Cmd.addFlag("print_final_stats", "1"); + Cmd.addFlag("print_funcs", "0"); // no need to spend time symbolizing. + Cmd.addFlag("max_total_time", std::to_string(std::min((size_t)300, JobId))); + Cmd.addFlag("stop_file", StopFile()); + if (!DataFlowBinary.empty()) { + Cmd.addFlag("data_flow_trace", DFTDir); + if (!Cmd.hasFlag("focus_function")) + Cmd.addFlag("focus_function", "auto"); + } + auto Job = new FuzzJob; + std::string Seeds; + if (size_t CorpusSubsetSize = + std::min(Files.size(), (size_t)sqrt(Files.size() + 2))) { + auto Time1 = std::chrono::system_clock::now(); + for (size_t i = 0; i < CorpusSubsetSize; i++) { + auto &SF = Files[Rand->SkewTowardsLast(Files.size())]; + Seeds += (Seeds.empty() ? "" : ",") + SF; + CollectDFT(SF); + } + auto Time2 = std::chrono::system_clock::now(); + Job->DftTimeInSeconds = duration_cast<seconds>(Time2 - Time1).count(); + } + if (!Seeds.empty()) { + Job->SeedListPath = + DirPlusFile(TempDir, std::to_string(JobId) + ".seeds"); + WriteToFile(Seeds, Job->SeedListPath); + Cmd.addFlag("seed_inputs", "@" + Job->SeedListPath); + } + Job->LogPath = DirPlusFile(TempDir, std::to_string(JobId) + ".log"); + Job->CorpusDir = DirPlusFile(TempDir, "C" + std::to_string(JobId)); + Job->FeaturesDir = DirPlusFile(TempDir, "F" + std::to_string(JobId)); + Job->CFPath = DirPlusFile(TempDir, std::to_string(JobId) + ".merge"); + Job->JobId = JobId; + + + Cmd.addArgument(Job->CorpusDir); + Cmd.addFlag("features_dir", Job->FeaturesDir); + + for (auto &D : {Job->CorpusDir, Job->FeaturesDir}) { + RmDirRecursive(D); + MkDir(D); + } + + Cmd.setOutputFile(Job->LogPath); + Cmd.combineOutAndErr(); + + Job->Cmd = Cmd; + + if (Verbosity >= 2) + Printf("Job %zd/%p Created: %s\n", JobId, Job, + Job->Cmd.toString().c_str()); + // Start from very short runs and gradually increase them. + return Job; + } + + int RunOneMergeJob(FuzzJob *Job) { + auto Stats = ParseFinalStatsFromLog(Job->LogPath); + NumRuns += Stats.number_of_executed_units; + + Vector<SizedFile> TempFiles, MergeCandidates; + // Read all newly created inputs and their feature sets. + // Choose only those inputs that have new features. + int Res = GetSizedFilesFromDir(Job->CorpusDir, &TempFiles); + if (Res != 0) + return Res; + std::sort(TempFiles.begin(), TempFiles.end()); + for (auto &F : TempFiles) { + auto FeatureFile = F.File; + FeatureFile.replace(0, Job->CorpusDir.size(), Job->FeaturesDir); + auto FeatureBytes = FileToVector(FeatureFile, 0, false); + assert((FeatureBytes.size() % sizeof(uint32_t)) == 0); + Vector<uint32_t> NewFeatures(FeatureBytes.size() / sizeof(uint32_t)); + memcpy(NewFeatures.data(), FeatureBytes.data(), FeatureBytes.size()); + for (auto Ft : NewFeatures) { + if (!Features.count(Ft)) { + MergeCandidates.push_back(F); + break; + } + } + } + // if (!FilesToAdd.empty() || Job->ExitCode != 0) + Printf("#%zd: cov: %zd ft: %zd corp: %zd exec/s %zd " + "oom/timeout/crash: %zd/%zd/%zd time: %zds job: %zd dft_time: %d\n", + NumRuns, Cov.size(), Features.size(), Files.size(), + Stats.average_exec_per_sec, NumOOMs, NumTimeouts, NumCrashes, + secondsSinceProcessStartUp(), Job->JobId, Job->DftTimeInSeconds); + + if (MergeCandidates.empty()) return 0; + + Vector<std::string> FilesToAdd; + Set<uint32_t> NewFeatures, NewCov; + CrashResistantMerge(Args, {}, MergeCandidates, &FilesToAdd, Features, + &NewFeatures, Cov, &NewCov, Job->CFPath, false); + if (Fuzzer::isGracefulExitRequested()) + return 0; + for (auto &Path : FilesToAdd) { + auto U = FileToVector(Path); + auto NewPath = DirPlusFile(MainCorpusDir, Hash(U)); + WriteToFile(U, NewPath); + Files.push_back(NewPath); + } + Features.insert(NewFeatures.begin(), NewFeatures.end()); + Cov.insert(NewCov.begin(), NewCov.end()); + for (auto Idx : NewCov) + if (auto *TE = TPC.PCTableEntryByIdx(Idx)) + if (TPC.PcIsFuncEntry(TE)) + PrintPC(" NEW_FUNC: %p %F %L\n", "", + TPC.GetNextInstructionPc(TE->PC)); + return 0; + } + + + void CollectDFT(const std::string &InputPath) { + if (DataFlowBinary.empty()) return; + if (!FilesWithDFT.insert(InputPath).second) return; + Command Cmd(Args); + Cmd.removeFlag("fork"); + Cmd.removeFlag("runs"); + Cmd.addFlag("data_flow_trace", DFTDir); + Cmd.addArgument(InputPath); + for (auto &C : CorpusDirs) // Remove all corpora from the args. + Cmd.removeArgument(C); + Cmd.setOutputFile(DirPlusFile(TempDir, "dft.log")); + Cmd.combineOutAndErr(); + // Printf("CollectDFT: %s\n", Cmd.toString().c_str()); + ExecuteCommand(Cmd); + } + +}; + +struct JobQueue { + std::queue<FuzzJob *> Qu; + std::mutex Mu; + std::condition_variable Cv; + + void Push(FuzzJob *Job) { + { + std::lock_guard<std::mutex> Lock(Mu); + Qu.push(Job); + } + Cv.notify_one(); + } + FuzzJob *Pop() { + std::unique_lock<std::mutex> Lk(Mu); + // std::lock_guard<std::mutex> Lock(Mu); + Cv.wait(Lk, [&]{return !Qu.empty();}); + assert(!Qu.empty()); + auto Job = Qu.front(); + Qu.pop(); + return Job; + } +}; + +void WorkerThread(JobQueue *FuzzQ, JobQueue *MergeQ) { + while (auto Job = FuzzQ->Pop()) { + // Printf("WorkerThread: job %p\n", Job); + Job->ExitCode = ExecuteCommand(Job->Cmd); + MergeQ->Push(Job); + } +} + +// This is just a skeleton of an experimental -fork=1 feature. +int FuzzWithFork(Random &Rand, const FuzzingOptions &Options, + const Vector<std::string> &Args, + const Vector<std::string> &CorpusDirs, int NumJobs) { + Printf("INFO: -fork=%d: fuzzing in separate process(s)\n", NumJobs); + + GlobalEnv Env; + Env.Args = Args; + Env.CorpusDirs = CorpusDirs; + Env.Rand = &Rand; + Env.Verbosity = Options.Verbosity; + Env.ProcessStartTime = std::chrono::system_clock::now(); + Env.DataFlowBinary = Options.CollectDataFlow; + + Vector<SizedFile> SeedFiles; + int Res; + for (auto &Dir : CorpusDirs) { + Res = GetSizedFilesFromDir(Dir, &SeedFiles); + if (Res != 0) + return Res; + } + std::sort(SeedFiles.begin(), SeedFiles.end()); + Env.TempDir = TempPath("FuzzWithFork", ".dir"); + Env.DFTDir = DirPlusFile(Env.TempDir, "DFT"); + RmDirRecursive(Env.TempDir); // in case there is a leftover from old runs. + MkDir(Env.TempDir); + MkDir(Env.DFTDir); + + + if (CorpusDirs.empty()) + MkDir(Env.MainCorpusDir = DirPlusFile(Env.TempDir, "C")); + else + Env.MainCorpusDir = CorpusDirs[0]; + + auto CFPath = DirPlusFile(Env.TempDir, "merge.txt"); + Res = CrashResistantMerge(Env.Args, {}, SeedFiles, &Env.Files, {}, &Env.Features, + {}, &Env.Cov, + CFPath, false); + if (Res != 0) + return Res; + if (Fuzzer::isGracefulExitRequested()) + return 0; + + RemoveFile(CFPath); + Printf("INFO: -fork=%d: %zd seed inputs, starting to fuzz in %s\n", NumJobs, + Env.Files.size(), Env.TempDir.c_str()); + + int ExitCode = 0; + + JobQueue FuzzQ, MergeQ; + + auto StopJobs = [&]() { + for (int i = 0; i < NumJobs; i++) + FuzzQ.Push(nullptr); + MergeQ.Push(nullptr); + WriteToFile(Unit({1}), Env.StopFile()); + }; + + size_t JobId = 1; + Vector<std::thread> Threads; + for (int t = 0; t < NumJobs; t++) { + Threads.push_back(std::thread(WorkerThread, &FuzzQ, &MergeQ)); + FuzzQ.Push(Env.CreateNewJob(JobId++)); + } + + while (true) { + std::unique_ptr<FuzzJob> Job(MergeQ.Pop()); + if (!Job) + break; + ExitCode = Job->ExitCode; + if (ExitCode == Options.InterruptExitCode) { + Printf("==%lu== libFuzzer: a child was interrupted; exiting\n", GetPid()); + StopJobs(); + break; + } + if (Fuzzer::MaybeExitGracefully()) + return 0; + + Res = Env.RunOneMergeJob(Job.get()); + if (Res != 0) + return Res; + if (Fuzzer::isGracefulExitRequested()) + return 0; + + // Continue if our crash is one of the ignorred ones. + if (Options.IgnoreTimeouts && ExitCode == Options.TimeoutExitCode) + Env.NumTimeouts++; + else if (Options.IgnoreOOMs && ExitCode == Options.OOMExitCode) + Env.NumOOMs++; + else if (ExitCode != 0) { + Env.NumCrashes++; + if (Options.IgnoreCrashes) { + std::ifstream In(Job->LogPath); + std::string Line; + while (std::getline(In, Line, '\n')) + if (Line.find("ERROR:") != Line.npos || + Line.find("runtime error:") != Line.npos) + Printf("%s\n", Line.c_str()); + } else { + // And exit if we don't ignore this crash. + Printf("INFO: log from the inner process:\n%s", + FileToString(Job->LogPath).c_str()); + StopJobs(); + break; + } + } + + // Stop if we are over the time budget. + // This is not precise, since other threads are still running + // and we will wait while joining them. + // We also don't stop instantly: other jobs need to finish. + if (Options.MaxTotalTimeSec > 0 && + Env.secondsSinceProcessStartUp() >= (size_t)Options.MaxTotalTimeSec) { + Printf("INFO: fuzzed for %zd seconds, wrapping up soon\n", + Env.secondsSinceProcessStartUp()); + StopJobs(); + break; + } + if (Env.NumRuns >= Options.MaxNumberOfRuns) { + Printf("INFO: fuzzed for %zd iterations, wrapping up soon\n", + Env.NumRuns); + StopJobs(); + break; + } + + FuzzQ.Push(Env.CreateNewJob(JobId++)); + } + + for (auto &T : Threads) + T.join(); + + // The workers have terminated. Don't try to remove the directory before they + // terminate to avoid a race condition preventing cleanup on Windows. + RmDirRecursive(Env.TempDir); + + // Use the exit code from the last child process. + Printf("INFO: exiting: %d time: %zds\n", ExitCode, + Env.secondsSinceProcessStartUp()); + return ExitCode; +} + +} // namespace fuzzer diff --git a/tools/fuzzing/libfuzzer/FuzzerFork.h b/tools/fuzzing/libfuzzer/FuzzerFork.h new file mode 100644 index 0000000000..1352171ad4 --- /dev/null +++ b/tools/fuzzing/libfuzzer/FuzzerFork.h @@ -0,0 +1,24 @@ +//===- FuzzerFork.h - run fuzzing in sub-processes --------------*- C++ -* ===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_FUZZER_FORK_H +#define LLVM_FUZZER_FORK_H + +#include "FuzzerDefs.h" +#include "FuzzerOptions.h" +#include "FuzzerRandom.h" + +#include <string> + +namespace fuzzer { +int FuzzWithFork(Random &Rand, const FuzzingOptions &Options, + const Vector<std::string> &Args, + const Vector<std::string> &CorpusDirs, int NumJobs); +} // namespace fuzzer + +#endif // LLVM_FUZZER_FORK_H diff --git a/tools/fuzzing/libfuzzer/FuzzerIO.cpp b/tools/fuzzing/libfuzzer/FuzzerIO.cpp new file mode 100644 index 0000000000..6be2be67c6 --- /dev/null +++ b/tools/fuzzing/libfuzzer/FuzzerIO.cpp @@ -0,0 +1,165 @@ +//===- FuzzerIO.cpp - IO utils. -------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// IO functions. +//===----------------------------------------------------------------------===// + +#include "mozilla/Unused.h" +#include "FuzzerDefs.h" +#include "FuzzerExtFunctions.h" +#include "FuzzerIO.h" +#include "FuzzerUtil.h" +#include <algorithm> +#include <cstdarg> +#include <fstream> +#include <iterator> +#include <sys/stat.h> +#include <sys/types.h> + +namespace fuzzer { + +static FILE *OutputFile = stderr; + +long GetEpoch(const std::string &Path) { + struct stat St; + if (stat(Path.c_str(), &St)) + return 0; // Can't stat, be conservative. + return St.st_mtime; +} + +Unit FileToVector(const std::string &Path, size_t MaxSize, bool ExitOnError) { + std::ifstream T(Path, std::ios::binary); + if (ExitOnError && !T) { + Printf("No such directory: %s; exiting\n", Path.c_str()); + exit(1); + } + + T.seekg(0, T.end); + auto EndPos = T.tellg(); + if (EndPos < 0) return {}; + size_t FileLen = EndPos; + if (MaxSize) + FileLen = std::min(FileLen, MaxSize); + + T.seekg(0, T.beg); + Unit Res(FileLen); + T.read(reinterpret_cast<char *>(Res.data()), FileLen); + return Res; +} + +std::string FileToString(const std::string &Path) { + std::ifstream T(Path, std::ios::binary); + return std::string((std::istreambuf_iterator<char>(T)), + std::istreambuf_iterator<char>()); +} + +void CopyFileToErr(const std::string &Path) { + Printf("%s", FileToString(Path).c_str()); +} + +void WriteToFile(const Unit &U, const std::string &Path) { + WriteToFile(U.data(), U.size(), Path); +} + +void WriteToFile(const std::string &Data, const std::string &Path) { + WriteToFile(reinterpret_cast<const uint8_t *>(Data.c_str()), Data.size(), + Path); +} + +void WriteToFile(const uint8_t *Data, size_t Size, const std::string &Path) { + // Use raw C interface because this function may be called from a sig handler. + FILE *Out = fopen(Path.c_str(), "wb"); + if (!Out) return; + mozilla::Unused << fwrite(Data, sizeof(Data[0]), Size, Out); + fclose(Out); +} + +void ReadDirToVectorOfUnits(const char *Path, Vector<Unit> *V, + long *Epoch, size_t MaxSize, bool ExitOnError) { + long E = Epoch ? *Epoch : 0; + Vector<std::string> Files; + int Res = ListFilesInDirRecursive(Path, Epoch, &Files, /*TopDir*/true); + if (ExitOnError && Res != 0) + exit(Res); + size_t NumLoaded = 0; + for (size_t i = 0; i < Files.size(); i++) { + auto &X = Files[i]; + if (Epoch && GetEpoch(X) < E) continue; + NumLoaded++; + if ((NumLoaded & (NumLoaded - 1)) == 0 && NumLoaded >= 1024) + Printf("Loaded %zd/%zd files from %s\n", NumLoaded, Files.size(), Path); + auto S = FileToVector(X, MaxSize, ExitOnError); + if (!S.empty()) + V->push_back(S); + } +} + + +int GetSizedFilesFromDir(const std::string &Dir, Vector<SizedFile> *V) { + Vector<std::string> Files; + int Res = ListFilesInDirRecursive(Dir, 0, &Files, /*TopDir*/true); + if (Res != 0) + return Res; + for (auto &File : Files) + if (size_t Size = FileSize(File)) + V->push_back({File, Size}); + return 0; +} + +std::string DirPlusFile(const std::string &DirPath, + const std::string &FileName) { + return DirPath + GetSeparator() + FileName; +} + +void DupAndCloseStderr() { + int OutputFd = DuplicateFile(2); + if (OutputFd >= 0) { + FILE *NewOutputFile = OpenFile(OutputFd, "w"); + if (NewOutputFile) { + OutputFile = NewOutputFile; + if (EF->__sanitizer_set_report_fd) + EF->__sanitizer_set_report_fd( + reinterpret_cast<void *>(GetHandleFromFd(OutputFd))); + DiscardOutput(2); + } + } +} + +void CloseStdout() { + DiscardOutput(1); +} + +void Printf(const char *Fmt, ...) { + va_list ap; + va_start(ap, Fmt); + vfprintf(OutputFile, Fmt, ap); + va_end(ap); + fflush(OutputFile); +} + +void VPrintf(bool Verbose, const char *Fmt, ...) { + if (!Verbose) return; + va_list ap; + va_start(ap, Fmt); + vfprintf(OutputFile, Fmt, ap); + va_end(ap); + fflush(OutputFile); +} + +void RmDirRecursive(const std::string &Dir) { + IterateDirRecursive( + Dir, [](const std::string &Path) {}, + [](const std::string &Path) { RmDir(Path); }, + [](const std::string &Path) { RemoveFile(Path); }); +} + +std::string TempPath(const char *Prefix, const char *Extension) { + return DirPlusFile(TmpDir(), std::string("libFuzzerTemp.") + Prefix + + std::to_string(GetPid()) + Extension); +} + +} // namespace fuzzer diff --git a/tools/fuzzing/libfuzzer/FuzzerIO.h b/tools/fuzzing/libfuzzer/FuzzerIO.h new file mode 100644 index 0000000000..6c90ba6373 --- /dev/null +++ b/tools/fuzzing/libfuzzer/FuzzerIO.h @@ -0,0 +1,106 @@ +//===- FuzzerIO.h - Internal header for IO utils ----------------*- C++ -* ===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// IO interface. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_FUZZER_IO_H +#define LLVM_FUZZER_IO_H + +#include "FuzzerDefs.h" + +namespace fuzzer { + +long GetEpoch(const std::string &Path); + +Unit FileToVector(const std::string &Path, size_t MaxSize = 0, + bool ExitOnError = true); + +std::string FileToString(const std::string &Path); + +void CopyFileToErr(const std::string &Path); + +void WriteToFile(const uint8_t *Data, size_t Size, const std::string &Path); +// Write Data.c_str() to the file without terminating null character. +void WriteToFile(const std::string &Data, const std::string &Path); +void WriteToFile(const Unit &U, const std::string &Path); + +void ReadDirToVectorOfUnits(const char *Path, Vector<Unit> *V, + long *Epoch, size_t MaxSize, bool ExitOnError); + +// Returns "Dir/FileName" or equivalent for the current OS. +std::string DirPlusFile(const std::string &DirPath, + const std::string &FileName); + +// Returns the name of the dir, similar to the 'dirname' utility. +std::string DirName(const std::string &FileName); + +// Returns path to a TmpDir. +std::string TmpDir(); + +std::string TempPath(const char *Prefix, const char *Extension); + +bool IsInterestingCoverageFile(const std::string &FileName); + +void DupAndCloseStderr(); + +void CloseStdout(); + +void Printf(const char *Fmt, ...); +void VPrintf(bool Verbose, const char *Fmt, ...); + +// Print using raw syscalls, useful when printing at early init stages. +void RawPrint(const char *Str); + +// Platform specific functions: +bool IsFile(const std::string &Path); +size_t FileSize(const std::string &Path); + +int ListFilesInDirRecursive(const std::string &Dir, long *Epoch, + Vector<std::string> *V, bool TopDir); + +void RmDirRecursive(const std::string &Dir); + +// Iterate files and dirs inside Dir, recursively. +// Call DirPreCallback/DirPostCallback on dirs before/after +// calling FileCallback on files. +void IterateDirRecursive(const std::string &Dir, + void (*DirPreCallback)(const std::string &Dir), + void (*DirPostCallback)(const std::string &Dir), + void (*FileCallback)(const std::string &Dir)); + +struct SizedFile { + std::string File; + size_t Size; + bool operator<(const SizedFile &B) const { return Size < B.Size; } +}; + +int GetSizedFilesFromDir(const std::string &Dir, Vector<SizedFile> *V); + +char GetSeparator(); +// Similar to the basename utility: returns the file name w/o the dir prefix. +std::string Basename(const std::string &Path); + +FILE* OpenFile(int Fd, const char *Mode); + +int CloseFile(int Fd); + +int DuplicateFile(int Fd); + +void RemoveFile(const std::string &Path); +void RenameFile(const std::string &OldPath, const std::string &NewPath); + +intptr_t GetHandleFromFd(int fd); + +void MkDir(const std::string &Path); +void RmDir(const std::string &Path); + +const std::string &getDevNull(); + +} // namespace fuzzer + +#endif // LLVM_FUZZER_IO_H diff --git a/tools/fuzzing/libfuzzer/FuzzerIOPosix.cpp b/tools/fuzzing/libfuzzer/FuzzerIOPosix.cpp new file mode 100644 index 0000000000..1a50295c01 --- /dev/null +++ b/tools/fuzzing/libfuzzer/FuzzerIOPosix.cpp @@ -0,0 +1,181 @@ +//===- FuzzerIOPosix.cpp - IO utils for Posix. ----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// IO functions implementation using Posix API. +//===----------------------------------------------------------------------===// +#include "mozilla/Unused.h" +#include "FuzzerPlatform.h" +#if LIBFUZZER_POSIX || LIBFUZZER_FUCHSIA + +#include "FuzzerExtFunctions.h" +#include "FuzzerIO.h" +#include <cstdarg> +#include <cstdio> +#include <dirent.h> +#include <fstream> +#include <iterator> +#include <libgen.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +namespace fuzzer { + +bool IsFile(const std::string &Path) { + struct stat St; + if (stat(Path.c_str(), &St)) + return false; + return S_ISREG(St.st_mode); +} + +static bool IsDirectory(const std::string &Path) { + struct stat St; + if (stat(Path.c_str(), &St)) + return false; + return S_ISDIR(St.st_mode); +} + +size_t FileSize(const std::string &Path) { + struct stat St; + if (stat(Path.c_str(), &St)) + return 0; + return St.st_size; +} + +std::string Basename(const std::string &Path) { + size_t Pos = Path.rfind(GetSeparator()); + if (Pos == std::string::npos) return Path; + assert(Pos < Path.size()); + return Path.substr(Pos + 1); +} + +int ListFilesInDirRecursive(const std::string &Dir, long *Epoch, + Vector<std::string> *V, bool TopDir) { + auto E = GetEpoch(Dir); + if (Epoch) + if (E && *Epoch >= E) return 0; + + DIR *D = opendir(Dir.c_str()); + if (!D) { + Printf("%s: %s; exiting\n", strerror(errno), Dir.c_str()); + return 1; + } + while (auto E = readdir(D)) { + std::string Path = DirPlusFile(Dir, E->d_name); + if (E->d_type == DT_REG || E->d_type == DT_LNK || + (E->d_type == DT_UNKNOWN && IsFile(Path))) + V->push_back(Path); + else if ((E->d_type == DT_DIR || + (E->d_type == DT_UNKNOWN && IsDirectory(Path))) && + *E->d_name != '.') { + int Res = ListFilesInDirRecursive(Path, Epoch, V, false); + if (Res != 0) + return Res; + } + } + closedir(D); + if (Epoch && TopDir) + *Epoch = E; + return 0; +} + + +void IterateDirRecursive(const std::string &Dir, + void (*DirPreCallback)(const std::string &Dir), + void (*DirPostCallback)(const std::string &Dir), + void (*FileCallback)(const std::string &Dir)) { + DirPreCallback(Dir); + DIR *D = opendir(Dir.c_str()); + if (!D) return; + while (auto E = readdir(D)) { + std::string Path = DirPlusFile(Dir, E->d_name); + if (E->d_type == DT_REG || E->d_type == DT_LNK || + (E->d_type == DT_UNKNOWN && IsFile(Path))) + FileCallback(Path); + else if ((E->d_type == DT_DIR || + (E->d_type == DT_UNKNOWN && IsDirectory(Path))) && + *E->d_name != '.') + IterateDirRecursive(Path, DirPreCallback, DirPostCallback, FileCallback); + } + closedir(D); + DirPostCallback(Dir); +} + +char GetSeparator() { + return '/'; +} + +FILE* OpenFile(int Fd, const char* Mode) { + return fdopen(Fd, Mode); +} + +int CloseFile(int fd) { + return close(fd); +} + +int DuplicateFile(int Fd) { + return dup(Fd); +} + +void RemoveFile(const std::string &Path) { + unlink(Path.c_str()); +} + +void RenameFile(const std::string &OldPath, const std::string &NewPath) { + rename(OldPath.c_str(), NewPath.c_str()); +} + +intptr_t GetHandleFromFd(int fd) { + return static_cast<intptr_t>(fd); +} + +std::string DirName(const std::string &FileName) { + char *Tmp = new char[FileName.size() + 1]; + memcpy(Tmp, FileName.c_str(), FileName.size() + 1); + std::string Res = dirname(Tmp); + delete [] Tmp; + return Res; +} + +std::string TmpDir() { + if (auto Env = getenv("TMPDIR")) + return Env; + return "/tmp"; +} + +bool IsInterestingCoverageFile(const std::string &FileName) { + if (FileName.find("compiler-rt/lib/") != std::string::npos) + return false; // sanitizer internal. + if (FileName.find("/usr/lib/") != std::string::npos) + return false; + if (FileName.find("/usr/include/") != std::string::npos) + return false; + if (FileName == "<null>") + return false; + return true; +} + +void RawPrint(const char *Str) { + mozilla::Unused << write(2, Str, strlen(Str)); +} + +void MkDir(const std::string &Path) { + mkdir(Path.c_str(), 0700); +} + +void RmDir(const std::string &Path) { + rmdir(Path.c_str()); +} + +const std::string &getDevNull() { + static const std::string devNull = "/dev/null"; + return devNull; +} + +} // namespace fuzzer + +#endif // LIBFUZZER_POSIX diff --git a/tools/fuzzing/libfuzzer/FuzzerIOWindows.cpp b/tools/fuzzing/libfuzzer/FuzzerIOWindows.cpp new file mode 100644 index 0000000000..0e977bd025 --- /dev/null +++ b/tools/fuzzing/libfuzzer/FuzzerIOWindows.cpp @@ -0,0 +1,417 @@ +//===- FuzzerIOWindows.cpp - IO utils for Windows. ------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// IO functions implementation for Windows. +//===----------------------------------------------------------------------===// +#include "FuzzerPlatform.h" +#if LIBFUZZER_WINDOWS + +#include "FuzzerExtFunctions.h" +#include "FuzzerIO.h" +#include <cstdarg> +#include <cstdio> +#include <fstream> +#include <io.h> +#include <iterator> +#include <sys/stat.h> +#include <sys/types.h> +#include <windows.h> + +namespace fuzzer { + +static bool IsFile(const std::string &Path, const DWORD &FileAttributes) { + + if (FileAttributes & FILE_ATTRIBUTE_NORMAL) + return true; + + if (FileAttributes & FILE_ATTRIBUTE_DIRECTORY) + return false; + + HANDLE FileHandle( + CreateFileA(Path.c_str(), 0, FILE_SHARE_READ, NULL, OPEN_EXISTING, + FILE_FLAG_BACKUP_SEMANTICS, 0)); + + if (FileHandle == INVALID_HANDLE_VALUE) { + Printf("CreateFileA() failed for \"%s\" (Error code: %lu).\n", Path.c_str(), + GetLastError()); + return false; + } + + DWORD FileType = GetFileType(FileHandle); + + if (FileType == FILE_TYPE_UNKNOWN) { + Printf("GetFileType() failed for \"%s\" (Error code: %lu).\n", Path.c_str(), + GetLastError()); + CloseHandle(FileHandle); + return false; + } + + if (FileType != FILE_TYPE_DISK) { + CloseHandle(FileHandle); + return false; + } + + CloseHandle(FileHandle); + return true; +} + +bool IsFile(const std::string &Path) { + DWORD Att = GetFileAttributesA(Path.c_str()); + + if (Att == INVALID_FILE_ATTRIBUTES) { + Printf("GetFileAttributesA() failed for \"%s\" (Error code: %lu).\n", + Path.c_str(), GetLastError()); + return false; + } + + return IsFile(Path, Att); +} + +static bool IsDir(DWORD FileAttrs) { + if (FileAttrs == INVALID_FILE_ATTRIBUTES) return false; + return FileAttrs & FILE_ATTRIBUTE_DIRECTORY; +} + +std::string Basename(const std::string &Path) { + size_t Pos = Path.find_last_of("/\\"); + if (Pos == std::string::npos) return Path; + assert(Pos < Path.size()); + return Path.substr(Pos + 1); +} + +size_t FileSize(const std::string &Path) { + WIN32_FILE_ATTRIBUTE_DATA attr; + if (!GetFileAttributesExA(Path.c_str(), GetFileExInfoStandard, &attr)) { + DWORD LastError = GetLastError(); + if (LastError != ERROR_FILE_NOT_FOUND) + Printf("GetFileAttributesExA() failed for \"%s\" (Error code: %lu).\n", + Path.c_str(), LastError); + return 0; + } + ULARGE_INTEGER size; + size.HighPart = attr.nFileSizeHigh; + size.LowPart = attr.nFileSizeLow; + return size.QuadPart; +} + +int ListFilesInDirRecursive(const std::string &Dir, long *Epoch, + Vector<std::string> *V, bool TopDir) { + int Res; + auto E = GetEpoch(Dir); + if (Epoch) + if (E && *Epoch >= E) return 0; + + std::string Path(Dir); + assert(!Path.empty()); + if (Path.back() != '\\') + Path.push_back('\\'); + Path.push_back('*'); + + // Get the first directory entry. + WIN32_FIND_DATAA FindInfo; + HANDLE FindHandle(FindFirstFileA(Path.c_str(), &FindInfo)); + if (FindHandle == INVALID_HANDLE_VALUE) + { + if (GetLastError() == ERROR_FILE_NOT_FOUND) + return 0; + Printf("No such file or directory: %s; exiting\n", Dir.c_str()); + return 1; + } + + do { + std::string FileName = DirPlusFile(Dir, FindInfo.cFileName); + + if (FindInfo.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) { + size_t FilenameLen = strlen(FindInfo.cFileName); + if ((FilenameLen == 1 && FindInfo.cFileName[0] == '.') || + (FilenameLen == 2 && FindInfo.cFileName[0] == '.' && + FindInfo.cFileName[1] == '.')) + continue; + + int Res = ListFilesInDirRecursive(FileName, Epoch, V, false); + if (Res != 0) + return Res; + } + else if (IsFile(FileName, FindInfo.dwFileAttributes)) + V->push_back(FileName); + } while (FindNextFileA(FindHandle, &FindInfo)); + + DWORD LastError = GetLastError(); + if (LastError != ERROR_NO_MORE_FILES) + Printf("FindNextFileA failed (Error code: %lu).\n", LastError); + + FindClose(FindHandle); + + if (Epoch && TopDir) + *Epoch = E; + return 0; +} + + +void IterateDirRecursive(const std::string &Dir, + void (*DirPreCallback)(const std::string &Dir), + void (*DirPostCallback)(const std::string &Dir), + void (*FileCallback)(const std::string &Dir)) { + // TODO(metzman): Implement ListFilesInDirRecursive via this function. + DirPreCallback(Dir); + + DWORD DirAttrs = GetFileAttributesA(Dir.c_str()); + if (!IsDir(DirAttrs)) return; + + std::string TargetDir(Dir); + assert(!TargetDir.empty()); + if (TargetDir.back() != '\\') TargetDir.push_back('\\'); + TargetDir.push_back('*'); + + WIN32_FIND_DATAA FindInfo; + // Find the directory's first file. + HANDLE FindHandle = FindFirstFileA(TargetDir.c_str(), &FindInfo); + if (FindHandle == INVALID_HANDLE_VALUE) { + DWORD LastError = GetLastError(); + if (LastError != ERROR_FILE_NOT_FOUND) { + // If the directory isn't empty, then something abnormal is going on. + Printf("FindFirstFileA failed for %s (Error code: %lu).\n", Dir.c_str(), + LastError); + } + return; + } + + do { + std::string Path = DirPlusFile(Dir, FindInfo.cFileName); + DWORD PathAttrs = FindInfo.dwFileAttributes; + if (IsDir(PathAttrs)) { + // Is Path the current directory (".") or the parent ("..")? + if (strcmp(FindInfo.cFileName, ".") == 0 || + strcmp(FindInfo.cFileName, "..") == 0) + continue; + IterateDirRecursive(Path, DirPreCallback, DirPostCallback, FileCallback); + } else if (PathAttrs != INVALID_FILE_ATTRIBUTES) { + FileCallback(Path); + } + } while (FindNextFileA(FindHandle, &FindInfo)); + + DWORD LastError = GetLastError(); + if (LastError != ERROR_NO_MORE_FILES) + Printf("FindNextFileA failed for %s (Error code: %lu).\n", Dir.c_str(), + LastError); + + FindClose(FindHandle); + DirPostCallback(Dir); +} + +char GetSeparator() { + return '\\'; +} + +FILE* OpenFile(int Fd, const char* Mode) { + return _fdopen(Fd, Mode); +} + +int CloseFile(int Fd) { + return _close(Fd); +} + +int DuplicateFile(int Fd) { + return _dup(Fd); +} + +void RemoveFile(const std::string &Path) { + _unlink(Path.c_str()); +} + +void RenameFile(const std::string &OldPath, const std::string &NewPath) { + rename(OldPath.c_str(), NewPath.c_str()); +} + +intptr_t GetHandleFromFd(int fd) { + return _get_osfhandle(fd); +} + +static bool IsSeparator(char C) { + return C == '\\' || C == '/'; +} + +// Parse disk designators, like "C:\". If Relative == true, also accepts: "C:". +// Returns number of characters considered if successful. +static size_t ParseDrive(const std::string &FileName, const size_t Offset, + bool Relative = true) { + if (Offset + 1 >= FileName.size() || FileName[Offset + 1] != ':') + return 0; + if (Offset + 2 >= FileName.size() || !IsSeparator(FileName[Offset + 2])) { + if (!Relative) // Accept relative path? + return 0; + else + return 2; + } + return 3; +} + +// Parse a file name, like: SomeFile.txt +// Returns number of characters considered if successful. +static size_t ParseFileName(const std::string &FileName, const size_t Offset) { + size_t Pos = Offset; + const size_t End = FileName.size(); + for(; Pos < End && !IsSeparator(FileName[Pos]); ++Pos) + ; + return Pos - Offset; +} + +// Parse a directory ending in separator, like: `SomeDir\` +// Returns number of characters considered if successful. +static size_t ParseDir(const std::string &FileName, const size_t Offset) { + size_t Pos = Offset; + const size_t End = FileName.size(); + if (Pos >= End || IsSeparator(FileName[Pos])) + return 0; + for(; Pos < End && !IsSeparator(FileName[Pos]); ++Pos) + ; + if (Pos >= End) + return 0; + ++Pos; // Include separator. + return Pos - Offset; +} + +// Parse a servername and share, like: `SomeServer\SomeShare\` +// Returns number of characters considered if successful. +static size_t ParseServerAndShare(const std::string &FileName, + const size_t Offset) { + size_t Pos = Offset, Res; + if (!(Res = ParseDir(FileName, Pos))) + return 0; + Pos += Res; + if (!(Res = ParseDir(FileName, Pos))) + return 0; + Pos += Res; + return Pos - Offset; +} + +// Parse the given Ref string from the position Offset, to exactly match the given +// string Patt. +// Returns number of characters considered if successful. +static size_t ParseCustomString(const std::string &Ref, size_t Offset, + const char *Patt) { + size_t Len = strlen(Patt); + if (Offset + Len > Ref.size()) + return 0; + return Ref.compare(Offset, Len, Patt) == 0 ? Len : 0; +} + +// Parse a location, like: +// \\?\UNC\Server\Share\ \\?\C:\ \\Server\Share\ \ C:\ C: +// Returns number of characters considered if successful. +static size_t ParseLocation(const std::string &FileName) { + size_t Pos = 0, Res; + + if ((Res = ParseCustomString(FileName, Pos, R"(\\?\)"))) { + Pos += Res; + if ((Res = ParseCustomString(FileName, Pos, R"(UNC\)"))) { + Pos += Res; + if ((Res = ParseServerAndShare(FileName, Pos))) + return Pos + Res; + return 0; + } + if ((Res = ParseDrive(FileName, Pos, false))) + return Pos + Res; + return 0; + } + + if (Pos < FileName.size() && IsSeparator(FileName[Pos])) { + ++Pos; + if (Pos < FileName.size() && IsSeparator(FileName[Pos])) { + ++Pos; + if ((Res = ParseServerAndShare(FileName, Pos))) + return Pos + Res; + return 0; + } + return Pos; + } + + if ((Res = ParseDrive(FileName, Pos))) + return Pos + Res; + + return Pos; +} + +std::string DirName(const std::string &FileName) { + size_t LocationLen = ParseLocation(FileName); + size_t DirLen = 0, Res; + while ((Res = ParseDir(FileName, LocationLen + DirLen))) + DirLen += Res; + size_t FileLen = ParseFileName(FileName, LocationLen + DirLen); + + if (LocationLen + DirLen + FileLen != FileName.size()) { + Printf("DirName() failed for \"%s\", invalid path.\n", FileName.c_str()); + exit(1); + } + + if (DirLen) { + --DirLen; // Remove trailing separator. + if (!FileLen) { // Path ended in separator. + assert(DirLen); + // Remove file name from Dir. + while (DirLen && !IsSeparator(FileName[LocationLen + DirLen - 1])) + --DirLen; + if (DirLen) // Remove trailing separator. + --DirLen; + } + } + + if (!LocationLen) { // Relative path. + if (!DirLen) + return "."; + return std::string(".\\").append(FileName, 0, DirLen); + } + + return FileName.substr(0, LocationLen + DirLen); +} + +std::string TmpDir() { + std::string Tmp; + Tmp.resize(MAX_PATH + 1); + DWORD Size = GetTempPathA(Tmp.size(), &Tmp[0]); + if (Size == 0) { + Printf("Couldn't get Tmp path.\n"); + exit(1); + } + Tmp.resize(Size); + return Tmp; +} + +bool IsInterestingCoverageFile(const std::string &FileName) { + if (FileName.find("Program Files") != std::string::npos) + return false; + if (FileName.find("compiler-rt\\lib\\") != std::string::npos) + return false; // sanitizer internal. + if (FileName == "<null>") + return false; + return true; +} + +void RawPrint(const char *Str) { + _write(2, Str, strlen(Str)); +} + +void MkDir(const std::string &Path) { + if (CreateDirectoryA(Path.c_str(), nullptr)) return; + Printf("CreateDirectoryA failed for %s (Error code: %lu).\n", Path.c_str(), + GetLastError()); +} + +void RmDir(const std::string &Path) { + if (RemoveDirectoryA(Path.c_str())) return; + Printf("RemoveDirectoryA failed for %s (Error code: %lu).\n", Path.c_str(), + GetLastError()); +} + +const std::string &getDevNull() { + static const std::string devNull = "NUL"; + return devNull; +} + +} // namespace fuzzer + +#endif // LIBFUZZER_WINDOWS diff --git a/tools/fuzzing/libfuzzer/FuzzerInterceptors.cpp b/tools/fuzzing/libfuzzer/FuzzerInterceptors.cpp new file mode 100644 index 0000000000..a1a64780de --- /dev/null +++ b/tools/fuzzing/libfuzzer/FuzzerInterceptors.cpp @@ -0,0 +1,235 @@ +//===-- FuzzerInterceptors.cpp --------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Intercept certain libc functions to aid fuzzing. +// Linked only when other RTs that define their own interceptors are not linked. +//===----------------------------------------------------------------------===// + +#include "FuzzerPlatform.h" + +#if LIBFUZZER_LINUX + +#define GET_CALLER_PC() __builtin_return_address(0) + +#define PTR_TO_REAL(x) real_##x +#define REAL(x) __interception::PTR_TO_REAL(x) +#define FUNC_TYPE(x) x##_type +#define DEFINE_REAL(ret_type, func, ...) \ + typedef ret_type (*FUNC_TYPE(func))(__VA_ARGS__); \ + namespace __interception { \ + FUNC_TYPE(func) PTR_TO_REAL(func); \ + } + +#include <cassert> +#include <cstdint> +#include <dlfcn.h> // for dlsym() +#include <sanitizer/common_interface_defs.h> + +static void *getFuncAddr(const char *name, uintptr_t wrapper_addr) { + void *addr = dlsym(RTLD_NEXT, name); + if (!addr) { + // If the lookup using RTLD_NEXT failed, the sanitizer runtime library is + // later in the library search order than the DSO that we are trying to + // intercept, which means that we cannot intercept this function. We still + // want the address of the real definition, though, so look it up using + // RTLD_DEFAULT. + addr = dlsym(RTLD_DEFAULT, name); + + // In case `name' is not loaded, dlsym ends up finding the actual wrapper. + // We don't want to intercept the wrapper and have it point to itself. + if (reinterpret_cast<uintptr_t>(addr) == wrapper_addr) + addr = nullptr; + } + return addr; +} + +static int FuzzerInited = 0; +static bool FuzzerInitIsRunning; + +static void fuzzerInit(); + +static void ensureFuzzerInited() { + assert(!FuzzerInitIsRunning); + if (!FuzzerInited) { + fuzzerInit(); + } +} + +static int internal_strcmp_strncmp(const char *s1, const char *s2, bool strncmp, + size_t n) { + size_t i = 0; + while (true) { + if (strncmp) { + if (i == n) + break; + i++; + } + unsigned c1 = *s1; + unsigned c2 = *s2; + if (c1 != c2) + return (c1 < c2) ? -1 : 1; + if (c1 == 0) + break; + s1++; + s2++; + } + return 0; +} + +static int internal_strncmp(const char *s1, const char *s2, size_t n) { + return internal_strcmp_strncmp(s1, s2, true, n); +} + +static int internal_strcmp(const char *s1, const char *s2) { + return internal_strcmp_strncmp(s1, s2, false, 0); +} + +static int internal_memcmp(const void *s1, const void *s2, size_t n) { + const uint8_t *t1 = static_cast<const uint8_t *>(s1); + const uint8_t *t2 = static_cast<const uint8_t *>(s2); + for (size_t i = 0; i < n; ++i, ++t1, ++t2) + if (*t1 != *t2) + return *t1 < *t2 ? -1 : 1; + return 0; +} + +static size_t internal_strlen(const char *s) { + size_t i = 0; + while (s[i]) + i++; + return i; +} + +static char *internal_strstr(const char *haystack, const char *needle) { + // This is O(N^2), but we are not using it in hot places. + size_t len1 = internal_strlen(haystack); + size_t len2 = internal_strlen(needle); + if (len1 < len2) + return nullptr; + for (size_t pos = 0; pos <= len1 - len2; pos++) { + if (internal_memcmp(haystack + pos, needle, len2) == 0) + return const_cast<char *>(haystack) + pos; + } + return nullptr; +} + +extern "C" { + +DEFINE_REAL(int, bcmp, const void *, const void *, size_t) +DEFINE_REAL(int, memcmp, const void *, const void *, size_t) +DEFINE_REAL(int, strncmp, const char *, const char *, size_t) +DEFINE_REAL(int, strcmp, const char *, const char *) +DEFINE_REAL(int, strncasecmp, const char *, const char *, size_t) +DEFINE_REAL(int, strcasecmp, const char *, const char *) +DEFINE_REAL(char *, strstr, const char *, const char *) +DEFINE_REAL(char *, strcasestr, const char *, const char *) +DEFINE_REAL(void *, memmem, const void *, size_t, const void *, size_t) + +ATTRIBUTE_INTERFACE int bcmp(const char *s1, const char *s2, size_t n) { + if (!FuzzerInited) + return internal_memcmp(s1, s2, n); + int result = REAL(bcmp)(s1, s2, n); + __sanitizer_weak_hook_memcmp(GET_CALLER_PC(), s1, s2, n, result); + return result; +} + +ATTRIBUTE_INTERFACE int memcmp(const void *s1, const void *s2, size_t n) { + if (!FuzzerInited) + return internal_memcmp(s1, s2, n); + int result = REAL(memcmp)(s1, s2, n); + __sanitizer_weak_hook_memcmp(GET_CALLER_PC(), s1, s2, n, result); + return result; +} + +ATTRIBUTE_INTERFACE int strncmp(const char *s1, const char *s2, size_t n) { + if (!FuzzerInited) + return internal_strncmp(s1, s2, n); + int result = REAL(strncmp)(s1, s2, n); + __sanitizer_weak_hook_strncmp(GET_CALLER_PC(), s1, s2, n, result); + return result; +} + +ATTRIBUTE_INTERFACE int strcmp(const char *s1, const char *s2) { + if (!FuzzerInited) + return internal_strcmp(s1, s2); + int result = REAL(strcmp)(s1, s2); + __sanitizer_weak_hook_strcmp(GET_CALLER_PC(), s1, s2, result); + return result; +} + +ATTRIBUTE_INTERFACE int strncasecmp(const char *s1, const char *s2, size_t n) { + ensureFuzzerInited(); + int result = REAL(strncasecmp)(s1, s2, n); + __sanitizer_weak_hook_strncasecmp(GET_CALLER_PC(), s1, s2, n, result); + return result; +} + +ATTRIBUTE_INTERFACE int strcasecmp(const char *s1, const char *s2) { + ensureFuzzerInited(); + int result = REAL(strcasecmp)(s1, s2); + __sanitizer_weak_hook_strcasecmp(GET_CALLER_PC(), s1, s2, result); + return result; +} + +ATTRIBUTE_INTERFACE char *strstr(const char *s1, const char *s2) { + if (!FuzzerInited) + return internal_strstr(s1, s2); + char *result = REAL(strstr)(s1, s2); + __sanitizer_weak_hook_strstr(GET_CALLER_PC(), s1, s2, result); + return result; +} + +ATTRIBUTE_INTERFACE char *strcasestr(const char *s1, const char *s2) { + ensureFuzzerInited(); + char *result = REAL(strcasestr)(s1, s2); + __sanitizer_weak_hook_strcasestr(GET_CALLER_PC(), s1, s2, result); + return result; +} + +ATTRIBUTE_INTERFACE +void *memmem(const void *s1, size_t len1, const void *s2, size_t len2) { + ensureFuzzerInited(); + void *result = REAL(memmem)(s1, len1, s2, len2); + __sanitizer_weak_hook_memmem(GET_CALLER_PC(), s1, len1, s2, len2, result); + return result; +} + +__attribute__((section(".preinit_array"), + used)) static void (*__local_fuzzer_preinit)(void) = fuzzerInit; + +} // extern "C" + +static void fuzzerInit() { + assert(!FuzzerInitIsRunning); + if (FuzzerInited) + return; + FuzzerInitIsRunning = true; + + REAL(bcmp) = reinterpret_cast<memcmp_type>( + getFuncAddr("bcmp", reinterpret_cast<uintptr_t>(&bcmp))); + REAL(memcmp) = reinterpret_cast<memcmp_type>( + getFuncAddr("memcmp", reinterpret_cast<uintptr_t>(&memcmp))); + REAL(strncmp) = reinterpret_cast<strncmp_type>( + getFuncAddr("strncmp", reinterpret_cast<uintptr_t>(&strncmp))); + REAL(strcmp) = reinterpret_cast<strcmp_type>( + getFuncAddr("strcmp", reinterpret_cast<uintptr_t>(&strcmp))); + REAL(strncasecmp) = reinterpret_cast<strncasecmp_type>( + getFuncAddr("strncasecmp", reinterpret_cast<uintptr_t>(&strncasecmp))); + REAL(strcasecmp) = reinterpret_cast<strcasecmp_type>( + getFuncAddr("strcasecmp", reinterpret_cast<uintptr_t>(&strcasecmp))); + REAL(strstr) = reinterpret_cast<strstr_type>( + getFuncAddr("strstr", reinterpret_cast<uintptr_t>(&strstr))); + REAL(strcasestr) = reinterpret_cast<strcasestr_type>( + getFuncAddr("strcasestr", reinterpret_cast<uintptr_t>(&strcasestr))); + REAL(memmem) = reinterpret_cast<memmem_type>( + getFuncAddr("memmem", reinterpret_cast<uintptr_t>(&memmem))); + + FuzzerInitIsRunning = false; + FuzzerInited = 1; +} + +#endif diff --git a/tools/fuzzing/libfuzzer/FuzzerInterface.h b/tools/fuzzing/libfuzzer/FuzzerInterface.h new file mode 100644 index 0000000000..4f62822eac --- /dev/null +++ b/tools/fuzzing/libfuzzer/FuzzerInterface.h @@ -0,0 +1,79 @@ +//===- FuzzerInterface.h - Interface header for the Fuzzer ------*- C++ -* ===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Define the interface between libFuzzer and the library being tested. +//===----------------------------------------------------------------------===// + +// NOTE: the libFuzzer interface is thin and in the majority of cases +// you should not include this file into your target. In 95% of cases +// all you need is to define the following function in your file: +// extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size); + +// WARNING: keep the interface in C. + +#ifndef LLVM_FUZZER_INTERFACE_H +#define LLVM_FUZZER_INTERFACE_H + +#include <stddef.h> +#include <stdint.h> + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +// Define FUZZER_INTERFACE_VISIBILITY to set default visibility in a way that +// doesn't break MSVC. +#if defined(_WIN32) +#define FUZZER_INTERFACE_VISIBILITY __declspec(dllexport) +#else +#define FUZZER_INTERFACE_VISIBILITY __attribute__((visibility("default"))) +#endif + +// Mandatory user-provided target function. +// Executes the code under test with [Data, Data+Size) as the input. +// libFuzzer will invoke this function *many* times with different inputs. +// Must return 0. +FUZZER_INTERFACE_VISIBILITY int +LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size); + +// Optional user-provided initialization function. +// If provided, this function will be called by libFuzzer once at startup. +// It may read and modify argc/argv. +// Must return 0. +FUZZER_INTERFACE_VISIBILITY int LLVMFuzzerInitialize(int *argc, char ***argv); + +// Optional user-provided custom mutator. +// Mutates raw data in [Data, Data+Size) inplace. +// Returns the new size, which is not greater than MaxSize. +// Given the same Seed produces the same mutation. +FUZZER_INTERFACE_VISIBILITY size_t +LLVMFuzzerCustomMutator(uint8_t *Data, size_t Size, size_t MaxSize, + unsigned int Seed); + +// Optional user-provided custom cross-over function. +// Combines pieces of Data1 & Data2 together into Out. +// Returns the new size, which is not greater than MaxOutSize. +// Should produce the same mutation given the same Seed. +FUZZER_INTERFACE_VISIBILITY size_t +LLVMFuzzerCustomCrossOver(const uint8_t *Data1, size_t Size1, + const uint8_t *Data2, size_t Size2, uint8_t *Out, + size_t MaxOutSize, unsigned int Seed); + +// Experimental, may go away in future. +// libFuzzer-provided function to be used inside LLVMFuzzerCustomMutator. +// Mutates raw data in [Data, Data+Size) inplace. +// Returns the new size, which is not greater than MaxSize. +FUZZER_INTERFACE_VISIBILITY size_t +LLVMFuzzerMutate(uint8_t *Data, size_t Size, size_t MaxSize); + +#undef FUZZER_INTERFACE_VISIBILITY + +#ifdef __cplusplus +} // extern "C" +#endif // __cplusplus + +#endif // LLVM_FUZZER_INTERFACE_H diff --git a/tools/fuzzing/libfuzzer/FuzzerInternal.h b/tools/fuzzing/libfuzzer/FuzzerInternal.h new file mode 100644 index 0000000000..cc2650b58e --- /dev/null +++ b/tools/fuzzing/libfuzzer/FuzzerInternal.h @@ -0,0 +1,175 @@ +//===- FuzzerInternal.h - Internal header for the Fuzzer --------*- C++ -* ===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Define the main class fuzzer::Fuzzer and most functions. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_FUZZER_INTERNAL_H +#define LLVM_FUZZER_INTERNAL_H + +#include "FuzzerDataFlowTrace.h" +#include "FuzzerDefs.h" +#include "FuzzerExtFunctions.h" +#include "FuzzerInterface.h" +#include "FuzzerOptions.h" +#include "FuzzerSHA1.h" +#include "FuzzerValueBitMap.h" +#include <algorithm> +#include <atomic> +#include <chrono> +#include <climits> +#include <cstdlib> +#include <string.h> + +namespace fuzzer { + +using namespace std::chrono; + +class Fuzzer { +public: + + Fuzzer(UserCallback CB, InputCorpus &Corpus, MutationDispatcher &MD, + FuzzingOptions Options); + ~Fuzzer(); + int Loop(Vector<SizedFile> &CorporaFiles); + int ReadAndExecuteSeedCorpora(Vector<SizedFile> &CorporaFiles); + void MinimizeCrashLoop(const Unit &U); + void RereadOutputCorpus(size_t MaxSize); + + size_t secondsSinceProcessStartUp() { + return duration_cast<seconds>(system_clock::now() - ProcessStartTime) + .count(); + } + + bool TimedOut() { + return Options.MaxTotalTimeSec > 0 && + secondsSinceProcessStartUp() > + static_cast<size_t>(Options.MaxTotalTimeSec); + } + + size_t execPerSec() { + size_t Seconds = secondsSinceProcessStartUp(); + return Seconds ? TotalNumberOfRuns / Seconds : 0; + } + + size_t getTotalNumberOfRuns() { return TotalNumberOfRuns; } + + static void StaticAlarmCallback(); + static void StaticCrashSignalCallback(); + static void StaticExitCallback(); + static void StaticInterruptCallback(); + static void StaticFileSizeExceedCallback(); + static void StaticGracefulExitCallback(); + + static void GracefullyExit(); + static bool isGracefulExitRequested(); + + int ExecuteCallback(const uint8_t *Data, size_t Size); + bool RunOne(const uint8_t *Data, size_t Size, bool MayDeleteFile = false, + InputInfo *II = nullptr, bool *FoundUniqFeatures = nullptr); + + // Merge Corpora[1:] into Corpora[0]. + void Merge(const Vector<std::string> &Corpora); + int CrashResistantMergeInternalStep(const std::string &ControlFilePath); + MutationDispatcher &GetMD() { return MD; } + void PrintFinalStats(); + void SetMaxInputLen(size_t MaxInputLen); + void SetMaxMutationLen(size_t MaxMutationLen); + void RssLimitCallback(); + + bool InFuzzingThread() const { return IsMyThread; } + size_t GetCurrentUnitInFuzzingThead(const uint8_t **Data) const; + void TryDetectingAMemoryLeak(const uint8_t *Data, size_t Size, + bool DuringInitialCorpusExecution); + + void HandleMalloc(size_t Size); + static bool MaybeExitGracefully(); + std::string WriteToOutputCorpus(const Unit &U); + +private: + void AlarmCallback(); + void CrashCallback(); + void ExitCallback(); + void CrashOnOverwrittenData(); + void InterruptCallback(); + bool MutateAndTestOne(); + void PurgeAllocator(); + void ReportNewCoverage(InputInfo *II, const Unit &U); + void PrintPulseAndReportSlowInput(const uint8_t *Data, size_t Size); + void WriteUnitToFileWithPrefix(const Unit &U, const char *Prefix); + void PrintStats(const char *Where, const char *End = "\n", size_t Units = 0, + size_t Features = 0); + void PrintStatusForNewUnit(const Unit &U, const char *Text); + void CheckExitOnSrcPosOrItem(); + + static void StaticDeathCallback(); + void DumpCurrentUnit(const char *Prefix); + void DeathCallback(); + + void AllocateCurrentUnitData(); + uint8_t *CurrentUnitData = nullptr; + std::atomic<size_t> CurrentUnitSize; + uint8_t BaseSha1[kSHA1NumBytes]; // Checksum of the base unit. + + bool GracefulExitRequested = false; + + size_t TotalNumberOfRuns = 0; + size_t NumberOfNewUnitsAdded = 0; + + size_t LastCorpusUpdateRun = 0; + + bool HasMoreMallocsThanFrees = false; + size_t NumberOfLeakDetectionAttempts = 0; + + system_clock::time_point LastAllocatorPurgeAttemptTime = system_clock::now(); + + UserCallback CB; + InputCorpus &Corpus; + MutationDispatcher &MD; + FuzzingOptions Options; + DataFlowTrace DFT; + + system_clock::time_point ProcessStartTime = system_clock::now(); + system_clock::time_point UnitStartTime, UnitStopTime; + long TimeOfLongestUnitInSeconds = 0; + long EpochOfLastReadOfOutputCorpus = 0; + + size_t MaxInputLen = 0; + size_t MaxMutationLen = 0; + size_t TmpMaxMutationLen = 0; + + Vector<uint32_t> UniqFeatureSetTmp; + + // Need to know our own thread. + static thread_local bool IsMyThread; +}; + +struct ScopedEnableMsanInterceptorChecks { + ScopedEnableMsanInterceptorChecks() { + if (EF->__msan_scoped_enable_interceptor_checks) + EF->__msan_scoped_enable_interceptor_checks(); + } + ~ScopedEnableMsanInterceptorChecks() { + if (EF->__msan_scoped_disable_interceptor_checks) + EF->__msan_scoped_disable_interceptor_checks(); + } +}; + +struct ScopedDisableMsanInterceptorChecks { + ScopedDisableMsanInterceptorChecks() { + if (EF->__msan_scoped_disable_interceptor_checks) + EF->__msan_scoped_disable_interceptor_checks(); + } + ~ScopedDisableMsanInterceptorChecks() { + if (EF->__msan_scoped_enable_interceptor_checks) + EF->__msan_scoped_enable_interceptor_checks(); + } +}; + +} // namespace fuzzer + +#endif // LLVM_FUZZER_INTERNAL_H diff --git a/tools/fuzzing/libfuzzer/FuzzerLoop.cpp b/tools/fuzzing/libfuzzer/FuzzerLoop.cpp new file mode 100644 index 0000000000..e7dfc187db --- /dev/null +++ b/tools/fuzzing/libfuzzer/FuzzerLoop.cpp @@ -0,0 +1,901 @@ +//===- FuzzerLoop.cpp - Fuzzer's main loop --------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Fuzzer's main loop. +//===----------------------------------------------------------------------===// + +#include "FuzzerCorpus.h" +#include "FuzzerIO.h" +#include "FuzzerInternal.h" +#include "FuzzerMutate.h" +#include "FuzzerPlatform.h" +#include "FuzzerRandom.h" +#include "FuzzerTracePC.h" +#include <algorithm> +#include <cstring> +#include <memory> +#include <mutex> +#include <set> + +#if defined(__has_include) +#if __has_include(<sanitizer / lsan_interface.h>) +#include <sanitizer/lsan_interface.h> +#endif +#endif + +#define NO_SANITIZE_MEMORY +#if defined(__has_feature) +#if __has_feature(memory_sanitizer) +#undef NO_SANITIZE_MEMORY +#define NO_SANITIZE_MEMORY __attribute__((no_sanitize_memory)) +#endif +#endif + +namespace fuzzer { +static const size_t kMaxUnitSizeToPrint = 256; + +thread_local bool Fuzzer::IsMyThread; + +bool RunningUserCallback = false; + +// Only one Fuzzer per process. +static Fuzzer *F; + +// Leak detection is expensive, so we first check if there were more mallocs +// than frees (using the sanitizer malloc hooks) and only then try to call lsan. +struct MallocFreeTracer { + void Start(int TraceLevel) { + this->TraceLevel = TraceLevel; + if (TraceLevel) + Printf("MallocFreeTracer: START\n"); + Mallocs = 0; + Frees = 0; + } + // Returns true if there were more mallocs than frees. + bool Stop() { + if (TraceLevel) + Printf("MallocFreeTracer: STOP %zd %zd (%s)\n", Mallocs.load(), + Frees.load(), Mallocs == Frees ? "same" : "DIFFERENT"); + bool Result = Mallocs > Frees; + Mallocs = 0; + Frees = 0; + TraceLevel = 0; + return Result; + } + std::atomic<size_t> Mallocs; + std::atomic<size_t> Frees; + int TraceLevel = 0; + + std::recursive_mutex TraceMutex; + bool TraceDisabled = false; +}; + +static MallocFreeTracer AllocTracer; + +// Locks printing and avoids nested hooks triggered from mallocs/frees in +// sanitizer. +class TraceLock { +public: + TraceLock() : Lock(AllocTracer.TraceMutex) { + AllocTracer.TraceDisabled = !AllocTracer.TraceDisabled; + } + ~TraceLock() { AllocTracer.TraceDisabled = !AllocTracer.TraceDisabled; } + + bool IsDisabled() const { + // This is already inverted value. + return !AllocTracer.TraceDisabled; + } + +private: + std::lock_guard<std::recursive_mutex> Lock; +}; + +ATTRIBUTE_NO_SANITIZE_MEMORY +void MallocHook(const volatile void *ptr, size_t size) { + size_t N = AllocTracer.Mallocs++; + F->HandleMalloc(size); + if (int TraceLevel = AllocTracer.TraceLevel) { + TraceLock Lock; + if (Lock.IsDisabled()) + return; + Printf("MALLOC[%zd] %p %zd\n", N, ptr, size); + if (TraceLevel >= 2 && EF) + PrintStackTrace(); + } +} + +ATTRIBUTE_NO_SANITIZE_MEMORY +void FreeHook(const volatile void *ptr) { + size_t N = AllocTracer.Frees++; + if (int TraceLevel = AllocTracer.TraceLevel) { + TraceLock Lock; + if (Lock.IsDisabled()) + return; + Printf("FREE[%zd] %p\n", N, ptr); + if (TraceLevel >= 2 && EF) + PrintStackTrace(); + } +} + +// Crash on a single malloc that exceeds the rss limit. +void Fuzzer::HandleMalloc(size_t Size) { + if (!Options.MallocLimitMb || (Size >> 20) < (size_t)Options.MallocLimitMb) + return; + Printf("==%d== ERROR: libFuzzer: out-of-memory (malloc(%zd))\n", GetPid(), + Size); + Printf(" To change the out-of-memory limit use -rss_limit_mb=<N>\n\n"); + PrintStackTrace(); + DumpCurrentUnit("oom-"); + Printf("SUMMARY: libFuzzer: out-of-memory\n"); + PrintFinalStats(); + _Exit(Options.OOMExitCode); // Stop right now. +} + +Fuzzer::Fuzzer(UserCallback CB, InputCorpus &Corpus, MutationDispatcher &MD, + FuzzingOptions Options) + : CB(CB), Corpus(Corpus), MD(MD), Options(Options) { + if (EF->__sanitizer_set_death_callback) + EF->__sanitizer_set_death_callback(StaticDeathCallback); + assert(!F); + F = this; + TPC.ResetMaps(); + IsMyThread = true; + if (Options.DetectLeaks && EF->__sanitizer_install_malloc_and_free_hooks) + EF->__sanitizer_install_malloc_and_free_hooks(MallocHook, FreeHook); + TPC.SetUseCounters(Options.UseCounters); + TPC.SetUseValueProfileMask(Options.UseValueProfile); + + if (Options.Verbosity) + TPC.PrintModuleInfo(); + if (!Options.OutputCorpus.empty() && Options.ReloadIntervalSec) + EpochOfLastReadOfOutputCorpus = GetEpoch(Options.OutputCorpus); + MaxInputLen = MaxMutationLen = Options.MaxLen; + TmpMaxMutationLen = 0; // Will be set once we load the corpus. + AllocateCurrentUnitData(); + CurrentUnitSize = 0; + memset(BaseSha1, 0, sizeof(BaseSha1)); +} + +Fuzzer::~Fuzzer() {} + +void Fuzzer::AllocateCurrentUnitData() { + if (CurrentUnitData || MaxInputLen == 0) + return; + CurrentUnitData = new uint8_t[MaxInputLen]; +} + +void Fuzzer::StaticDeathCallback() { + assert(F); + F->DeathCallback(); +} + +void Fuzzer::DumpCurrentUnit(const char *Prefix) { + if (!CurrentUnitData) + return; // Happens when running individual inputs. + ScopedDisableMsanInterceptorChecks S; + MD.PrintMutationSequence(); + Printf("; base unit: %s\n", Sha1ToString(BaseSha1).c_str()); + size_t UnitSize = CurrentUnitSize; + if (UnitSize <= kMaxUnitSizeToPrint) { + PrintHexArray(CurrentUnitData, UnitSize, "\n"); + PrintASCII(CurrentUnitData, UnitSize, "\n"); + } + WriteUnitToFileWithPrefix({CurrentUnitData, CurrentUnitData + UnitSize}, + Prefix); +} + +NO_SANITIZE_MEMORY +void Fuzzer::DeathCallback() { + DumpCurrentUnit("crash-"); + PrintFinalStats(); +} + +void Fuzzer::StaticAlarmCallback() { + assert(F); + F->AlarmCallback(); +} + +void Fuzzer::StaticCrashSignalCallback() { + assert(F); + F->CrashCallback(); +} + +void Fuzzer::StaticExitCallback() { + assert(F); + F->ExitCallback(); +} + +void Fuzzer::StaticInterruptCallback() { + assert(F); + F->InterruptCallback(); +} + +void Fuzzer::StaticGracefulExitCallback() { + assert(F); + F->GracefulExitRequested = true; + Printf("INFO: signal received, trying to exit gracefully\n"); +} + +void Fuzzer::StaticFileSizeExceedCallback() { + Printf("==%lu== ERROR: libFuzzer: file size exceeded\n", GetPid()); + exit(1); +} + +void Fuzzer::CrashCallback() { + if (EF->__sanitizer_acquire_crash_state && + !EF->__sanitizer_acquire_crash_state()) + return; + Printf("==%lu== ERROR: libFuzzer: deadly signal\n", GetPid()); + PrintStackTrace(); + Printf("NOTE: libFuzzer has rudimentary signal handlers.\n" + " Combine libFuzzer with AddressSanitizer or similar for better " + "crash reports.\n"); + Printf("SUMMARY: libFuzzer: deadly signal\n"); + DumpCurrentUnit("crash-"); + PrintFinalStats(); + _Exit(Options.ErrorExitCode); // Stop right now. +} + +void Fuzzer::ExitCallback() { + if (!RunningUserCallback) + return; // This exit did not come from the user callback + if (EF->__sanitizer_acquire_crash_state && + !EF->__sanitizer_acquire_crash_state()) + return; + Printf("==%lu== ERROR: libFuzzer: fuzz target exited\n", GetPid()); + PrintStackTrace(); + Printf("SUMMARY: libFuzzer: fuzz target exited\n"); + DumpCurrentUnit("crash-"); + PrintFinalStats(); + _Exit(Options.ErrorExitCode); +} + +bool Fuzzer::MaybeExitGracefully() { + if (!F->GracefulExitRequested) return false; + Printf("==%lu== INFO: libFuzzer: exiting as requested\n", GetPid()); + RmDirRecursive(TempPath("FuzzWithFork", ".dir")); + F->PrintFinalStats(); + return true; +} + +void Fuzzer::GracefullyExit() { + F->GracefulExitRequested = true; +} + +bool Fuzzer::isGracefulExitRequested() { + return F->GracefulExitRequested; +} + +void Fuzzer::InterruptCallback() { + Printf("==%lu== libFuzzer: run interrupted; exiting\n", GetPid()); + PrintFinalStats(); + ScopedDisableMsanInterceptorChecks S; // RmDirRecursive may call opendir(). + RmDirRecursive(TempPath("FuzzWithFork", ".dir")); + // Stop right now, don't perform any at-exit actions. + _Exit(Options.InterruptExitCode); +} + +NO_SANITIZE_MEMORY +void Fuzzer::AlarmCallback() { + assert(Options.UnitTimeoutSec > 0); + // In Windows and Fuchsia, Alarm callback is executed by a different thread. + // NetBSD's current behavior needs this change too. +#if !LIBFUZZER_WINDOWS && !LIBFUZZER_NETBSD && !LIBFUZZER_FUCHSIA + if (!InFuzzingThread()) + return; +#endif + if (!RunningUserCallback) + return; // We have not started running units yet. + size_t Seconds = + duration_cast<seconds>(system_clock::now() - UnitStartTime).count(); + if (Seconds == 0) + return; + if (Options.Verbosity >= 2) + Printf("AlarmCallback %zd\n", Seconds); + if (Seconds >= (size_t)Options.UnitTimeoutSec) { + if (EF->__sanitizer_acquire_crash_state && + !EF->__sanitizer_acquire_crash_state()) + return; + Printf("ALARM: working on the last Unit for %zd seconds\n", Seconds); + Printf(" and the timeout value is %d (use -timeout=N to change)\n", + Options.UnitTimeoutSec); + DumpCurrentUnit("timeout-"); + Printf("==%lu== ERROR: libFuzzer: timeout after %d seconds\n", GetPid(), + Seconds); + PrintStackTrace(); + Printf("SUMMARY: libFuzzer: timeout\n"); + PrintFinalStats(); + _Exit(Options.TimeoutExitCode); // Stop right now. + } +} + +void Fuzzer::RssLimitCallback() { + if (EF->__sanitizer_acquire_crash_state && + !EF->__sanitizer_acquire_crash_state()) + return; + Printf( + "==%lu== ERROR: libFuzzer: out-of-memory (used: %zdMb; limit: %zdMb)\n", + GetPid(), GetPeakRSSMb(), Options.RssLimitMb); + Printf(" To change the out-of-memory limit use -rss_limit_mb=<N>\n\n"); + PrintMemoryProfile(); + DumpCurrentUnit("oom-"); + Printf("SUMMARY: libFuzzer: out-of-memory\n"); + PrintFinalStats(); + _Exit(Options.OOMExitCode); // Stop right now. +} + +void Fuzzer::PrintStats(const char *Where, const char *End, size_t Units, + size_t Features) { + size_t ExecPerSec = execPerSec(); + if (!Options.Verbosity) + return; + Printf("#%zd\t%s", TotalNumberOfRuns, Where); + if (size_t N = TPC.GetTotalPCCoverage()) + Printf(" cov: %zd", N); + if (size_t N = Features ? Features : Corpus.NumFeatures()) + Printf(" ft: %zd", N); + if (!Corpus.empty()) { + Printf(" corp: %zd", Corpus.NumActiveUnits()); + if (size_t N = Corpus.SizeInBytes()) { + if (N < (1 << 14)) + Printf("/%zdb", N); + else if (N < (1 << 24)) + Printf("/%zdKb", N >> 10); + else + Printf("/%zdMb", N >> 20); + } + if (size_t FF = Corpus.NumInputsThatTouchFocusFunction()) + Printf(" focus: %zd", FF); + } + if (TmpMaxMutationLen) + Printf(" lim: %zd", TmpMaxMutationLen); + if (Units) + Printf(" units: %zd", Units); + + Printf(" exec/s: %zd", ExecPerSec); + Printf(" rss: %zdMb", GetPeakRSSMb()); + Printf("%s", End); +} + +void Fuzzer::PrintFinalStats() { + if (Options.PrintCoverage) + TPC.PrintCoverage(); + if (Options.PrintCorpusStats) + Corpus.PrintStats(); + if (!Options.PrintFinalStats) + return; + size_t ExecPerSec = execPerSec(); + Printf("stat::number_of_executed_units: %zd\n", TotalNumberOfRuns); + Printf("stat::average_exec_per_sec: %zd\n", ExecPerSec); + Printf("stat::new_units_added: %zd\n", NumberOfNewUnitsAdded); + Printf("stat::slowest_unit_time_sec: %zd\n", TimeOfLongestUnitInSeconds); + Printf("stat::peak_rss_mb: %zd\n", GetPeakRSSMb()); +} + +void Fuzzer::SetMaxInputLen(size_t MaxInputLen) { + assert(this->MaxInputLen == 0); // Can only reset MaxInputLen from 0 to non-0. + assert(MaxInputLen); + this->MaxInputLen = MaxInputLen; + this->MaxMutationLen = MaxInputLen; + AllocateCurrentUnitData(); + Printf("INFO: -max_len is not provided; " + "libFuzzer will not generate inputs larger than %zd bytes\n", + MaxInputLen); +} + +void Fuzzer::SetMaxMutationLen(size_t MaxMutationLen) { + assert(MaxMutationLen && MaxMutationLen <= MaxInputLen); + this->MaxMutationLen = MaxMutationLen; +} + +void Fuzzer::CheckExitOnSrcPosOrItem() { + if (!Options.ExitOnSrcPos.empty()) { + static auto *PCsSet = new Set<uintptr_t>; + auto HandlePC = [&](const TracePC::PCTableEntry *TE) { + if (!PCsSet->insert(TE->PC).second) + return; + std::string Descr = DescribePC("%F %L", TE->PC + 1); + if (Descr.find(Options.ExitOnSrcPos) != std::string::npos) { + Printf("INFO: found line matching '%s', exiting.\n", + Options.ExitOnSrcPos.c_str()); + _Exit(0); + } + }; + TPC.ForEachObservedPC(HandlePC); + } + if (!Options.ExitOnItem.empty()) { + if (Corpus.HasUnit(Options.ExitOnItem)) { + Printf("INFO: found item with checksum '%s', exiting.\n", + Options.ExitOnItem.c_str()); + _Exit(0); + } + } +} + +void Fuzzer::RereadOutputCorpus(size_t MaxSize) { + if (Options.OutputCorpus.empty() || !Options.ReloadIntervalSec) + return; + Vector<Unit> AdditionalCorpus; + ReadDirToVectorOfUnits(Options.OutputCorpus.c_str(), &AdditionalCorpus, + &EpochOfLastReadOfOutputCorpus, MaxSize, + /*ExitOnError*/ false); + if (Options.Verbosity >= 2) + Printf("Reload: read %zd new units.\n", AdditionalCorpus.size()); + bool Reloaded = false; + for (auto &U : AdditionalCorpus) { + if (U.size() > MaxSize) + U.resize(MaxSize); + if (!Corpus.HasUnit(U)) { + if (RunOne(U.data(), U.size())) { + CheckExitOnSrcPosOrItem(); + Reloaded = true; + } + } + } + if (Reloaded) + PrintStats("RELOAD"); +} + +void Fuzzer::PrintPulseAndReportSlowInput(const uint8_t *Data, size_t Size) { + auto TimeOfUnit = + duration_cast<seconds>(UnitStopTime - UnitStartTime).count(); + if (!(TotalNumberOfRuns & (TotalNumberOfRuns - 1)) && + secondsSinceProcessStartUp() >= 2) + PrintStats("pulse "); + if (TimeOfUnit > TimeOfLongestUnitInSeconds * 1.1 && + TimeOfUnit >= Options.ReportSlowUnits) { + TimeOfLongestUnitInSeconds = TimeOfUnit; + Printf("Slowest unit: %zd s:\n", TimeOfLongestUnitInSeconds); + WriteUnitToFileWithPrefix({Data, Data + Size}, "slow-unit-"); + } +} + +static void WriteFeatureSetToFile(const std::string &FeaturesDir, + const std::string &FileName, + const Vector<uint32_t> &FeatureSet) { + if (FeaturesDir.empty() || FeatureSet.empty()) return; + WriteToFile(reinterpret_cast<const uint8_t *>(FeatureSet.data()), + FeatureSet.size() * sizeof(FeatureSet[0]), + DirPlusFile(FeaturesDir, FileName)); +} + +static void RenameFeatureSetFile(const std::string &FeaturesDir, + const std::string &OldFile, + const std::string &NewFile) { + if (FeaturesDir.empty()) return; + RenameFile(DirPlusFile(FeaturesDir, OldFile), + DirPlusFile(FeaturesDir, NewFile)); +} + +bool Fuzzer::RunOne(const uint8_t *Data, size_t Size, bool MayDeleteFile, + InputInfo *II, bool *FoundUniqFeatures) { + if (!Size) + return false; + + if (ExecuteCallback(Data, Size) > 0) { + return false; + } + + UniqFeatureSetTmp.clear(); + size_t FoundUniqFeaturesOfII = 0; + size_t NumUpdatesBefore = Corpus.NumFeatureUpdates(); + TPC.CollectFeatures([&](size_t Feature) { + if (Corpus.AddFeature(Feature, Size, Options.Shrink)) + UniqFeatureSetTmp.push_back(Feature); + if (Options.Entropic) + Corpus.UpdateFeatureFrequency(II, Feature); + if (Options.ReduceInputs && II) + if (std::binary_search(II->UniqFeatureSet.begin(), + II->UniqFeatureSet.end(), Feature)) + FoundUniqFeaturesOfII++; + }); + if (FoundUniqFeatures) + *FoundUniqFeatures = FoundUniqFeaturesOfII; + PrintPulseAndReportSlowInput(Data, Size); + size_t NumNewFeatures = Corpus.NumFeatureUpdates() - NumUpdatesBefore; + if (NumNewFeatures) { + TPC.UpdateObservedPCs(); + auto NewII = Corpus.AddToCorpus({Data, Data + Size}, NumNewFeatures, + MayDeleteFile, TPC.ObservedFocusFunction(), + UniqFeatureSetTmp, DFT, II); + WriteFeatureSetToFile(Options.FeaturesDir, Sha1ToString(NewII->Sha1), + NewII->UniqFeatureSet); + return true; + } + if (II && FoundUniqFeaturesOfII && + II->DataFlowTraceForFocusFunction.empty() && + FoundUniqFeaturesOfII == II->UniqFeatureSet.size() && + II->U.size() > Size) { + auto OldFeaturesFile = Sha1ToString(II->Sha1); + Corpus.Replace(II, {Data, Data + Size}); + RenameFeatureSetFile(Options.FeaturesDir, OldFeaturesFile, + Sha1ToString(II->Sha1)); + return true; + } + return false; +} + +size_t Fuzzer::GetCurrentUnitInFuzzingThead(const uint8_t **Data) const { + assert(InFuzzingThread()); + *Data = CurrentUnitData; + return CurrentUnitSize; +} + +void Fuzzer::CrashOnOverwrittenData() { + Printf("==%d== ERROR: libFuzzer: fuzz target overwrites its const input\n", + GetPid()); + PrintStackTrace(); + Printf("SUMMARY: libFuzzer: overwrites-const-input\n"); + DumpCurrentUnit("crash-"); + PrintFinalStats(); + _Exit(Options.ErrorExitCode); // Stop right now. +} + +// Compare two arrays, but not all bytes if the arrays are large. +static bool LooseMemeq(const uint8_t *A, const uint8_t *B, size_t Size) { + const size_t Limit = 64; + if (Size <= 64) + return !memcmp(A, B, Size); + // Compare first and last Limit/2 bytes. + return !memcmp(A, B, Limit / 2) && + !memcmp(A + Size - Limit / 2, B + Size - Limit / 2, Limit / 2); +} + +int Fuzzer::ExecuteCallback(const uint8_t *Data, size_t Size) { + TPC.RecordInitialStack(); + TotalNumberOfRuns++; + assert(InFuzzingThread()); + // We copy the contents of Unit into a separate heap buffer + // so that we reliably find buffer overflows in it. + uint8_t *DataCopy = new uint8_t[Size]; + memcpy(DataCopy, Data, Size); + if (EF->__msan_unpoison) + EF->__msan_unpoison(DataCopy, Size); + if (EF->__msan_unpoison_param) + EF->__msan_unpoison_param(2); + if (CurrentUnitData && CurrentUnitData != Data) + memcpy(CurrentUnitData, Data, Size); + CurrentUnitSize = Size; + int Res = 0; + { + ScopedEnableMsanInterceptorChecks S; + AllocTracer.Start(Options.TraceMalloc); + UnitStartTime = system_clock::now(); + TPC.ResetMaps(); + RunningUserCallback = true; + Res = CB(DataCopy, Size); + RunningUserCallback = false; + UnitStopTime = system_clock::now(); + assert(Res >= 0); + HasMoreMallocsThanFrees = AllocTracer.Stop(); + } + if (!LooseMemeq(DataCopy, Data, Size)) + CrashOnOverwrittenData(); + CurrentUnitSize = 0; + delete[] DataCopy; + return Res; +} + +std::string Fuzzer::WriteToOutputCorpus(const Unit &U) { + if (Options.OnlyASCII) + assert(IsASCII(U)); + if (Options.OutputCorpus.empty()) + return ""; + std::string Path = DirPlusFile(Options.OutputCorpus, Hash(U)); + WriteToFile(U, Path); + if (Options.Verbosity >= 2) + Printf("Written %zd bytes to %s\n", U.size(), Path.c_str()); + return Path; +} + +void Fuzzer::WriteUnitToFileWithPrefix(const Unit &U, const char *Prefix) { + if (!Options.SaveArtifacts) + return; + std::string Path = Options.ArtifactPrefix + Prefix + Hash(U); + if (!Options.ExactArtifactPath.empty()) + Path = Options.ExactArtifactPath; // Overrides ArtifactPrefix. + WriteToFile(U, Path); + Printf("artifact_prefix='%s'; Test unit written to %s\n", + Options.ArtifactPrefix.c_str(), Path.c_str()); + if (U.size() <= kMaxUnitSizeToPrint) + Printf("Base64: %s\n", Base64(U).c_str()); +} + +void Fuzzer::PrintStatusForNewUnit(const Unit &U, const char *Text) { + if (!Options.PrintNEW) + return; + PrintStats(Text, ""); + if (Options.Verbosity) { + Printf(" L: %zd/%zd ", U.size(), Corpus.MaxInputSize()); + MD.PrintMutationSequence(); + Printf("\n"); + } +} + +void Fuzzer::ReportNewCoverage(InputInfo *II, const Unit &U) { + II->NumSuccessfullMutations++; + MD.RecordSuccessfulMutationSequence(); + PrintStatusForNewUnit(U, II->Reduced ? "REDUCE" : "NEW "); + WriteToOutputCorpus(U); + NumberOfNewUnitsAdded++; + CheckExitOnSrcPosOrItem(); // Check only after the unit is saved to corpus. + LastCorpusUpdateRun = TotalNumberOfRuns; +} + +// Tries detecting a memory leak on the particular input that we have just +// executed before calling this function. +void Fuzzer::TryDetectingAMemoryLeak(const uint8_t *Data, size_t Size, + bool DuringInitialCorpusExecution) { + if (!HasMoreMallocsThanFrees) + return; // mallocs==frees, a leak is unlikely. + if (!Options.DetectLeaks) + return; + if (!DuringInitialCorpusExecution && + TotalNumberOfRuns >= Options.MaxNumberOfRuns) + return; + if (!&(EF->__lsan_enable) || !&(EF->__lsan_disable) || + !(EF->__lsan_do_recoverable_leak_check)) + return; // No lsan. + // Run the target once again, but with lsan disabled so that if there is + // a real leak we do not report it twice. + EF->__lsan_disable(); + ExecuteCallback(Data, Size); + EF->__lsan_enable(); + if (!HasMoreMallocsThanFrees) + return; // a leak is unlikely. + if (NumberOfLeakDetectionAttempts++ > 1000) { + Options.DetectLeaks = false; + Printf("INFO: libFuzzer disabled leak detection after every mutation.\n" + " Most likely the target function accumulates allocated\n" + " memory in a global state w/o actually leaking it.\n" + " You may try running this binary with -trace_malloc=[12]" + " to get a trace of mallocs and frees.\n" + " If LeakSanitizer is enabled in this process it will still\n" + " run on the process shutdown.\n"); + return; + } + // Now perform the actual lsan pass. This is expensive and we must ensure + // we don't call it too often. + if (EF->__lsan_do_recoverable_leak_check()) { // Leak is found, report it. + if (DuringInitialCorpusExecution) + Printf("\nINFO: a leak has been found in the initial corpus.\n\n"); + Printf("INFO: to ignore leaks on libFuzzer side use -detect_leaks=0.\n\n"); + CurrentUnitSize = Size; + DumpCurrentUnit("leak-"); + PrintFinalStats(); + _Exit(Options.ErrorExitCode); // not exit() to disable lsan further on. + } +} + +bool Fuzzer::MutateAndTestOne() { + MD.StartMutationSequence(); + + auto &II = Corpus.ChooseUnitToMutate(MD.GetRand()); + if (Options.DoCrossOver) + MD.SetCrossOverWith(&Corpus.ChooseUnitToMutate(MD.GetRand()).U); + const auto &U = II.U; + memcpy(BaseSha1, II.Sha1, sizeof(BaseSha1)); + assert(CurrentUnitData); + size_t Size = U.size(); + assert(Size <= MaxInputLen && "Oversized Unit"); + memcpy(CurrentUnitData, U.data(), Size); + + assert(MaxMutationLen > 0); + + size_t CurrentMaxMutationLen = + Min(MaxMutationLen, Max(U.size(), TmpMaxMutationLen)); + assert(CurrentMaxMutationLen > 0); + + for (int i = 0; i < Options.MutateDepth; i++) { + if (TotalNumberOfRuns >= Options.MaxNumberOfRuns) + break; + if (MaybeExitGracefully()) return true; + size_t NewSize = 0; + if (II.HasFocusFunction && !II.DataFlowTraceForFocusFunction.empty() && + Size <= CurrentMaxMutationLen) + NewSize = MD.MutateWithMask(CurrentUnitData, Size, Size, + II.DataFlowTraceForFocusFunction); + + // If MutateWithMask either failed or wasn't called, call default Mutate. + if (!NewSize) + NewSize = MD.Mutate(CurrentUnitData, Size, CurrentMaxMutationLen); + + if (!NewSize) + continue; + + assert(NewSize > 0 && "Mutator returned empty unit"); + assert(NewSize <= CurrentMaxMutationLen && "Mutator return oversized unit"); + Size = NewSize; + II.NumExecutedMutations++; + Corpus.IncrementNumExecutedMutations(); + + bool FoundUniqFeatures = false; + bool NewCov = RunOne(CurrentUnitData, Size, /*MayDeleteFile=*/true, &II, + &FoundUniqFeatures); + TryDetectingAMemoryLeak(CurrentUnitData, Size, + /*DuringInitialCorpusExecution*/ false); + if (NewCov) { + ReportNewCoverage(&II, {CurrentUnitData, CurrentUnitData + Size}); + break; // We will mutate this input more in the next rounds. + } + if (Options.ReduceDepth && !FoundUniqFeatures) + break; + } + + II.NeedsEnergyUpdate = true; + return false; +} + +void Fuzzer::PurgeAllocator() { + if (Options.PurgeAllocatorIntervalSec < 0 || !EF->__sanitizer_purge_allocator) + return; + if (duration_cast<seconds>(system_clock::now() - + LastAllocatorPurgeAttemptTime) + .count() < Options.PurgeAllocatorIntervalSec) + return; + + if (Options.RssLimitMb <= 0 || + GetPeakRSSMb() > static_cast<size_t>(Options.RssLimitMb) / 2) + EF->__sanitizer_purge_allocator(); + + LastAllocatorPurgeAttemptTime = system_clock::now(); +} + +int Fuzzer::ReadAndExecuteSeedCorpora(Vector<SizedFile> &CorporaFiles) { + const size_t kMaxSaneLen = 1 << 20; + const size_t kMinDefaultLen = 4096; + size_t MaxSize = 0; + size_t MinSize = -1; + size_t TotalSize = 0; + for (auto &File : CorporaFiles) { + MaxSize = Max(File.Size, MaxSize); + MinSize = Min(File.Size, MinSize); + TotalSize += File.Size; + } + if (Options.MaxLen == 0) + SetMaxInputLen(std::min(std::max(kMinDefaultLen, MaxSize), kMaxSaneLen)); + assert(MaxInputLen > 0); + + // Test the callback with empty input and never try it again. + uint8_t dummy = 0; + ExecuteCallback(&dummy, 0); + + if (CorporaFiles.empty()) { + Printf("INFO: A corpus is not provided, starting from an empty corpus\n"); + Unit U({'\n'}); // Valid ASCII input. + RunOne(U.data(), U.size()); + } else { + Printf("INFO: seed corpus: files: %zd min: %zdb max: %zdb total: %zdb" + " rss: %zdMb\n", + CorporaFiles.size(), MinSize, MaxSize, TotalSize, GetPeakRSSMb()); + if (Options.ShuffleAtStartUp) + std::shuffle(CorporaFiles.begin(), CorporaFiles.end(), MD.GetRand()); + + if (Options.PreferSmall) { + std::stable_sort(CorporaFiles.begin(), CorporaFiles.end()); + assert(CorporaFiles.front().Size <= CorporaFiles.back().Size); + } + + // Load and execute inputs one by one. + for (auto &SF : CorporaFiles) { + auto U = FileToVector(SF.File, MaxInputLen, /*ExitOnError=*/false); + assert(U.size() <= MaxInputLen); + RunOne(U.data(), U.size()); + CheckExitOnSrcPosOrItem(); + TryDetectingAMemoryLeak(U.data(), U.size(), + /*DuringInitialCorpusExecution*/ true); + } + } + + PrintStats("INITED"); + if (!Options.FocusFunction.empty()) { + Printf("INFO: %zd/%zd inputs touch the focus function\n", + Corpus.NumInputsThatTouchFocusFunction(), Corpus.size()); + if (!Options.DataFlowTrace.empty()) + Printf("INFO: %zd/%zd inputs have the Data Flow Trace\n", + Corpus.NumInputsWithDataFlowTrace(), + Corpus.NumInputsThatTouchFocusFunction()); + } + + if (Corpus.empty() && Options.MaxNumberOfRuns) { + Printf("ERROR: no interesting inputs were found. " + "Is the code instrumented for coverage? Exiting.\n"); + return 1; + } + return 0; +} + +int Fuzzer::Loop(Vector<SizedFile> &CorporaFiles) { + auto FocusFunctionOrAuto = Options.FocusFunction; + int Res = DFT.Init(Options.DataFlowTrace, &FocusFunctionOrAuto, CorporaFiles, + MD.GetRand()); + if (Res != 0) + return Res; + Res = TPC.SetFocusFunction(FocusFunctionOrAuto); + if (Res != 0) + return Res; + Res = ReadAndExecuteSeedCorpora(CorporaFiles); + if (Res != 0) + return Res; + DFT.Clear(); // No need for DFT any more. + TPC.SetPrintNewPCs(Options.PrintNewCovPcs); + TPC.SetPrintNewFuncs(Options.PrintNewCovFuncs); + system_clock::time_point LastCorpusReload = system_clock::now(); + + TmpMaxMutationLen = + Min(MaxMutationLen, Max(size_t(4), Corpus.MaxInputSize())); + + while (true) { + auto Now = system_clock::now(); + if (!Options.StopFile.empty() && + !FileToVector(Options.StopFile, 1, false).empty()) + break; + if (duration_cast<seconds>(Now - LastCorpusReload).count() >= + Options.ReloadIntervalSec) { + RereadOutputCorpus(MaxInputLen); + LastCorpusReload = system_clock::now(); + } + if (TotalNumberOfRuns >= Options.MaxNumberOfRuns) + break; + if (TimedOut()) + break; + + // Update TmpMaxMutationLen + if (Options.LenControl) { + if (TmpMaxMutationLen < MaxMutationLen && + TotalNumberOfRuns - LastCorpusUpdateRun > + Options.LenControl * Log(TmpMaxMutationLen)) { + TmpMaxMutationLen = + Min(MaxMutationLen, TmpMaxMutationLen + Log(TmpMaxMutationLen)); + LastCorpusUpdateRun = TotalNumberOfRuns; + } + } else { + TmpMaxMutationLen = MaxMutationLen; + } + + // Perform several mutations and runs. + if (MutateAndTestOne()) + return 0; + + PurgeAllocator(); + } + + PrintStats("DONE ", "\n"); + MD.PrintRecommendedDictionary(); + return 0; +} + +void Fuzzer::MinimizeCrashLoop(const Unit &U) { + if (U.size() <= 1) + return; + while (!TimedOut() && TotalNumberOfRuns < Options.MaxNumberOfRuns) { + MD.StartMutationSequence(); + memcpy(CurrentUnitData, U.data(), U.size()); + for (int i = 0; i < Options.MutateDepth; i++) { + size_t NewSize = MD.Mutate(CurrentUnitData, U.size(), MaxMutationLen); + assert(NewSize <= MaxMutationLen); + if (!NewSize) + continue; + ExecuteCallback(CurrentUnitData, NewSize); + PrintPulseAndReportSlowInput(CurrentUnitData, NewSize); + TryDetectingAMemoryLeak(CurrentUnitData, NewSize, + /*DuringInitialCorpusExecution*/ false); + } + } +} + +} // namespace fuzzer + +extern "C" { + +ATTRIBUTE_INTERFACE size_t +LLVMFuzzerMutate(uint8_t *Data, size_t Size, size_t MaxSize) { + assert(fuzzer::F); + return fuzzer::F->GetMD().DefaultMutate(Data, Size, MaxSize); +} + +} // extern "C" diff --git a/tools/fuzzing/libfuzzer/FuzzerMain.cpp b/tools/fuzzing/libfuzzer/FuzzerMain.cpp new file mode 100644 index 0000000000..75f2f8e75c --- /dev/null +++ b/tools/fuzzing/libfuzzer/FuzzerMain.cpp @@ -0,0 +1,21 @@ +//===- FuzzerMain.cpp - main() function and flags -------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// main() and flags. +//===----------------------------------------------------------------------===// + +#include "FuzzerDefs.h" +#include "FuzzerPlatform.h" + +extern "C" { +// This function should be defined by the user. +int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size); +} // extern "C" + +ATTRIBUTE_INTERFACE int main(int argc, char **argv) { + return fuzzer::FuzzerDriver(&argc, &argv, LLVMFuzzerTestOneInput); +} diff --git a/tools/fuzzing/libfuzzer/FuzzerMerge.cpp b/tools/fuzzing/libfuzzer/FuzzerMerge.cpp new file mode 100644 index 0000000000..0a185c7325 --- /dev/null +++ b/tools/fuzzing/libfuzzer/FuzzerMerge.cpp @@ -0,0 +1,419 @@ +//===- FuzzerMerge.cpp - merging corpora ----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Merging corpora. +//===----------------------------------------------------------------------===// + +#include "FuzzerCommand.h" +#include "FuzzerMerge.h" +#include "FuzzerIO.h" +#include "FuzzerInternal.h" +#include "FuzzerTracePC.h" +#include "FuzzerUtil.h" + +#include <fstream> +#include <iterator> +#include <set> +#include <sstream> +#include <unordered_set> + +namespace fuzzer { + +bool Merger::Parse(const std::string &Str, bool ParseCoverage) { + std::istringstream SS(Str); + return Parse(SS, ParseCoverage); +} + +int Merger::ParseOrExit(std::istream &IS, bool ParseCoverage) { + if (!Parse(IS, ParseCoverage)) { + Printf("MERGE: failed to parse the control file (unexpected error)\n"); + return 1; + } + return 0; +} + +// The control file example: +// +// 3 # The number of inputs +// 1 # The number of inputs in the first corpus, <= the previous number +// file0 +// file1 +// file2 # One file name per line. +// STARTED 0 123 # FileID, file size +// FT 0 1 4 6 8 # FileID COV1 COV2 ... +// COV 0 7 8 9 # FileID COV1 COV1 +// STARTED 1 456 # If FT is missing, the input crashed while processing. +// STARTED 2 567 +// FT 2 8 9 +// COV 2 11 12 +bool Merger::Parse(std::istream &IS, bool ParseCoverage) { + LastFailure.clear(); + std::string Line; + + // Parse NumFiles. + if (!std::getline(IS, Line, '\n')) return false; + std::istringstream L1(Line); + size_t NumFiles = 0; + L1 >> NumFiles; + if (NumFiles == 0 || NumFiles > 10000000) return false; + + // Parse NumFilesInFirstCorpus. + if (!std::getline(IS, Line, '\n')) return false; + std::istringstream L2(Line); + NumFilesInFirstCorpus = NumFiles + 1; + L2 >> NumFilesInFirstCorpus; + if (NumFilesInFirstCorpus > NumFiles) return false; + + // Parse file names. + Files.resize(NumFiles); + for (size_t i = 0; i < NumFiles; i++) + if (!std::getline(IS, Files[i].Name, '\n')) + return false; + + // Parse STARTED, FT, and COV lines. + size_t ExpectedStartMarker = 0; + const size_t kInvalidStartMarker = -1; + size_t LastSeenStartMarker = kInvalidStartMarker; + Vector<uint32_t> TmpFeatures; + Set<uint32_t> PCs; + while (std::getline(IS, Line, '\n')) { + std::istringstream ISS1(Line); + std::string Marker; + size_t N; + ISS1 >> Marker; + ISS1 >> N; + if (Marker == "STARTED") { + // STARTED FILE_ID FILE_SIZE + if (ExpectedStartMarker != N) + return false; + ISS1 >> Files[ExpectedStartMarker].Size; + LastSeenStartMarker = ExpectedStartMarker; + assert(ExpectedStartMarker < Files.size()); + ExpectedStartMarker++; + } else if (Marker == "FT") { + // FT FILE_ID COV1 COV2 COV3 ... + size_t CurrentFileIdx = N; + if (CurrentFileIdx != LastSeenStartMarker) + return false; + LastSeenStartMarker = kInvalidStartMarker; + if (ParseCoverage) { + TmpFeatures.clear(); // use a vector from outer scope to avoid resizes. + while (ISS1 >> N) + TmpFeatures.push_back(N); + std::sort(TmpFeatures.begin(), TmpFeatures.end()); + Files[CurrentFileIdx].Features = TmpFeatures; + } + } else if (Marker == "COV") { + size_t CurrentFileIdx = N; + if (ParseCoverage) + while (ISS1 >> N) + if (PCs.insert(N).second) + Files[CurrentFileIdx].Cov.push_back(N); + } else { + return false; + } + } + if (LastSeenStartMarker != kInvalidStartMarker) + LastFailure = Files[LastSeenStartMarker].Name; + + FirstNotProcessedFile = ExpectedStartMarker; + return true; +} + +size_t Merger::ApproximateMemoryConsumption() const { + size_t Res = 0; + for (const auto &F: Files) + Res += sizeof(F) + F.Features.size() * sizeof(F.Features[0]); + return Res; +} + +// Decides which files need to be merged (add those to NewFiles). +// Returns the number of new features added. +size_t Merger::Merge(const Set<uint32_t> &InitialFeatures, + Set<uint32_t> *NewFeatures, + const Set<uint32_t> &InitialCov, Set<uint32_t> *NewCov, + Vector<std::string> *NewFiles) { + NewFiles->clear(); + assert(NumFilesInFirstCorpus <= Files.size()); + Set<uint32_t> AllFeatures = InitialFeatures; + + // What features are in the initial corpus? + for (size_t i = 0; i < NumFilesInFirstCorpus; i++) { + auto &Cur = Files[i].Features; + AllFeatures.insert(Cur.begin(), Cur.end()); + } + // Remove all features that we already know from all other inputs. + for (size_t i = NumFilesInFirstCorpus; i < Files.size(); i++) { + auto &Cur = Files[i].Features; + Vector<uint32_t> Tmp; + std::set_difference(Cur.begin(), Cur.end(), AllFeatures.begin(), + AllFeatures.end(), std::inserter(Tmp, Tmp.begin())); + Cur.swap(Tmp); + } + + // Sort. Give preference to + // * smaller files + // * files with more features. + std::sort(Files.begin() + NumFilesInFirstCorpus, Files.end(), + [&](const MergeFileInfo &a, const MergeFileInfo &b) -> bool { + if (a.Size != b.Size) + return a.Size < b.Size; + return a.Features.size() > b.Features.size(); + }); + + // One greedy pass: add the file's features to AllFeatures. + // If new features were added, add this file to NewFiles. + for (size_t i = NumFilesInFirstCorpus; i < Files.size(); i++) { + auto &Cur = Files[i].Features; + // Printf("%s -> sz %zd ft %zd\n", Files[i].Name.c_str(), + // Files[i].Size, Cur.size()); + bool FoundNewFeatures = false; + for (auto Fe: Cur) { + if (AllFeatures.insert(Fe).second) { + FoundNewFeatures = true; + NewFeatures->insert(Fe); + } + } + if (FoundNewFeatures) + NewFiles->push_back(Files[i].Name); + for (auto Cov : Files[i].Cov) + if (InitialCov.find(Cov) == InitialCov.end()) + NewCov->insert(Cov); + } + return NewFeatures->size(); +} + +Set<uint32_t> Merger::AllFeatures() const { + Set<uint32_t> S; + for (auto &File : Files) + S.insert(File.Features.begin(), File.Features.end()); + return S; +} + +// Inner process. May crash if the target crashes. +int Fuzzer::CrashResistantMergeInternalStep(const std::string &CFPath) { + Printf("MERGE-INNER: using the control file '%s'\n", CFPath.c_str()); + Merger M; + std::ifstream IF(CFPath); + int Res = M.ParseOrExit(IF, false); + if (Res != 0) + return Res; + IF.close(); + if (!M.LastFailure.empty()) + Printf("MERGE-INNER: '%s' caused a failure at the previous merge step\n", + M.LastFailure.c_str()); + + Printf("MERGE-INNER: %zd total files;" + " %zd processed earlier; will process %zd files now\n", + M.Files.size(), M.FirstNotProcessedFile, + M.Files.size() - M.FirstNotProcessedFile); + + std::ofstream OF(CFPath, std::ofstream::out | std::ofstream::app); + Set<size_t> AllFeatures; + auto PrintStatsWrapper = [this, &AllFeatures](const char* Where) { + this->PrintStats(Where, "\n", 0, AllFeatures.size()); + }; + Set<const TracePC::PCTableEntry *> AllPCs; + for (size_t i = M.FirstNotProcessedFile; i < M.Files.size(); i++) { + if (Fuzzer::MaybeExitGracefully()) + return 0; + auto U = FileToVector(M.Files[i].Name); + if (U.size() > MaxInputLen) { + U.resize(MaxInputLen); + U.shrink_to_fit(); + } + + // Write the pre-run marker. + OF << "STARTED " << i << " " << U.size() << "\n"; + OF.flush(); // Flush is important since Command::Execute may crash. + // Run. + TPC.ResetMaps(); + if (ExecuteCallback(U.data(), U.size()) > 0) { + continue; + } + // Collect coverage. We are iterating over the files in this order: + // * First, files in the initial corpus ordered by size, smallest first. + // * Then, all other files, smallest first. + // So it makes no sense to record all features for all files, instead we + // only record features that were not seen before. + Set<size_t> UniqFeatures; + TPC.CollectFeatures([&](size_t Feature) { + if (AllFeatures.insert(Feature).second) + UniqFeatures.insert(Feature); + }); + TPC.UpdateObservedPCs(); + // Show stats. + if (!(TotalNumberOfRuns & (TotalNumberOfRuns - 1))) + PrintStatsWrapper("pulse "); + if (TotalNumberOfRuns == M.NumFilesInFirstCorpus) + PrintStatsWrapper("LOADED"); + // Write the post-run marker and the coverage. + OF << "FT " << i; + for (size_t F : UniqFeatures) + OF << " " << F; + OF << "\n"; + OF << "COV " << i; + TPC.ForEachObservedPC([&](const TracePC::PCTableEntry *TE) { + if (AllPCs.insert(TE).second) + OF << " " << TPC.PCTableEntryIdx(TE); + }); + OF << "\n"; + OF.flush(); + } + PrintStatsWrapper("DONE "); + return 0; +} + +static int WriteNewControlFile(const std::string &CFPath, + const Vector<SizedFile> &OldCorpus, + const Vector<SizedFile> &NewCorpus, + const Vector<MergeFileInfo> &KnownFiles, + size_t &NumFiles) { + std::unordered_set<std::string> FilesToSkip; + for (auto &SF: KnownFiles) + FilesToSkip.insert(SF.Name); + + Vector<std::string> FilesToUse; + auto MaybeUseFile = [=, &FilesToUse](std::string Name) { + if (FilesToSkip.find(Name) == FilesToSkip.end()) + FilesToUse.push_back(Name); + }; + for (auto &SF: OldCorpus) + MaybeUseFile(SF.File); + auto FilesToUseFromOldCorpus = FilesToUse.size(); + for (auto &SF: NewCorpus) + MaybeUseFile(SF.File); + + RemoveFile(CFPath); + std::ofstream ControlFile(CFPath); + ControlFile << FilesToUse.size() << "\n"; + ControlFile << FilesToUseFromOldCorpus << "\n"; + for (auto &FN: FilesToUse) + ControlFile << FN << "\n"; + + if (!ControlFile) { + Printf("MERGE-OUTER: failed to write to the control file: %s\n", + CFPath.c_str()); + return 1; + } + + NumFiles = FilesToUse.size(); + return 0; +} + +// Outer process. Does not call the target code and thus should not fail. +int CrashResistantMerge(const Vector<std::string> &Args, + const Vector<SizedFile> &OldCorpus, + const Vector<SizedFile> &NewCorpus, + Vector<std::string> *NewFiles, + const Set<uint32_t> &InitialFeatures, + Set<uint32_t> *NewFeatures, + const Set<uint32_t> &InitialCov, + Set<uint32_t> *NewCov, + const std::string &CFPath, + bool V /*Verbose*/) { + if (NewCorpus.empty() && OldCorpus.empty()) return 0; // Nothing to merge. + size_t NumAttempts = 0; + int Res; + Vector<MergeFileInfo> KnownFiles; + if (FileSize(CFPath)) { + VPrintf(V, "MERGE-OUTER: non-empty control file provided: '%s'\n", + CFPath.c_str()); + Merger M; + std::ifstream IF(CFPath); + if (M.Parse(IF, /*ParseCoverage=*/true)) { + VPrintf(V, "MERGE-OUTER: control file ok, %zd files total," + " first not processed file %zd\n", + M.Files.size(), M.FirstNotProcessedFile); + if (!M.LastFailure.empty()) + VPrintf(V, "MERGE-OUTER: '%s' will be skipped as unlucky " + "(merge has stumbled on it the last time)\n", + M.LastFailure.c_str()); + if (M.FirstNotProcessedFile >= M.Files.size()) { + // Merge has already been completed with the given merge control file. + if (M.Files.size() == OldCorpus.size() + NewCorpus.size()) { + VPrintf( + V, + "MERGE-OUTER: nothing to do, merge has been completed before\n"); + Fuzzer::GracefullyExit(); + return 0; + } + + // Number of input files likely changed, start merge from scratch, but + // reuse coverage information from the given merge control file. + VPrintf( + V, + "MERGE-OUTER: starting merge from scratch, but reusing coverage " + "information from the given control file\n"); + KnownFiles = M.Files; + } else { + // There is a merge in progress, continue. + NumAttempts = M.Files.size() - M.FirstNotProcessedFile; + } + } else { + VPrintf(V, "MERGE-OUTER: bad control file, will overwrite it\n"); + } + } + + if (!NumAttempts) { + // The supplied control file is empty or bad, create a fresh one. + VPrintf(V, "MERGE-OUTER: " + "%zd files, %zd in the initial corpus, %zd processed earlier\n", + OldCorpus.size() + NewCorpus.size(), OldCorpus.size(), + KnownFiles.size()); + Res = WriteNewControlFile(CFPath, OldCorpus, NewCorpus, KnownFiles, NumAttempts); + if (Res != 0) + return Res; + } + + // Execute the inner process until it passes. + // Every inner process should execute at least one input. + Command BaseCmd(Args); + BaseCmd.removeFlag("merge"); + BaseCmd.removeFlag("fork"); + BaseCmd.removeFlag("collect_data_flow"); + for (size_t Attempt = 1; Attempt <= NumAttempts; Attempt++) { + if (Fuzzer::MaybeExitGracefully()) + return 0; + VPrintf(V, "MERGE-OUTER: attempt %zd\n", Attempt); + Command Cmd(BaseCmd); + Cmd.addFlag("merge_control_file", CFPath); + Cmd.addFlag("merge_inner", "1"); + if (!V) { + Cmd.setOutputFile(getDevNull()); + Cmd.combineOutAndErr(); + } + auto ExitCode = ExecuteCommand(Cmd); + if (!ExitCode) { + VPrintf(V, "MERGE-OUTER: succesfull in %zd attempt(s)\n", Attempt); + break; + } + } + // Read the control file and do the merge. + Merger M; + std::ifstream IF(CFPath); + IF.seekg(0, IF.end); + VPrintf(V, "MERGE-OUTER: the control file has %zd bytes\n", + (size_t)IF.tellg()); + IF.seekg(0, IF.beg); + Res = M.ParseOrExit(IF, true); + if (Res != 0) + return Res; + IF.close(); + VPrintf(V, + "MERGE-OUTER: consumed %zdMb (%zdMb rss) to parse the control file\n", + M.ApproximateMemoryConsumption() >> 20, GetPeakRSSMb()); + + M.Files.insert(M.Files.end(), KnownFiles.begin(), KnownFiles.end()); + M.Merge(InitialFeatures, NewFeatures, InitialCov, NewCov, NewFiles); + VPrintf(V, "MERGE-OUTER: %zd new files with %zd new features added; " + "%zd new coverage edges\n", + NewFiles->size(), NewFeatures->size(), NewCov->size()); + return 0; +} + +} // namespace fuzzer diff --git a/tools/fuzzing/libfuzzer/FuzzerMerge.h b/tools/fuzzing/libfuzzer/FuzzerMerge.h new file mode 100644 index 0000000000..6dc1c4c45a --- /dev/null +++ b/tools/fuzzing/libfuzzer/FuzzerMerge.h @@ -0,0 +1,87 @@ +//===- FuzzerMerge.h - merging corpa ----------------------------*- C++ -* ===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Merging Corpora. +// +// The task: +// Take the existing corpus (possibly empty) and merge new inputs into +// it so that only inputs with new coverage ('features') are added. +// The process should tolerate the crashes, OOMs, leaks, etc. +// +// Algorithm: +// The outer process collects the set of files and writes their names +// into a temporary "control" file, then repeatedly launches the inner +// process until all inputs are processed. +// The outer process does not actually execute the target code. +// +// The inner process reads the control file and sees a) list of all the inputs +// and b) the last processed input. Then it starts processing the inputs one +// by one. Before processing every input it writes one line to control file: +// STARTED INPUT_ID INPUT_SIZE +// After processing an input it writes the following lines: +// FT INPUT_ID Feature1 Feature2 Feature3 ... +// COV INPUT_ID Coverage1 Coverage2 Coverage3 ... +// If a crash happens while processing an input the last line in the control +// file will be "STARTED INPUT_ID" and so the next process will know +// where to resume. +// +// Once all inputs are processed by the inner process(es) the outer process +// reads the control files and does the merge based entirely on the contents +// of control file. +// It uses a single pass greedy algorithm choosing first the smallest inputs +// within the same size the inputs that have more new features. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_FUZZER_MERGE_H +#define LLVM_FUZZER_MERGE_H + +#include "FuzzerDefs.h" + +#include <istream> +#include <ostream> +#include <set> +#include <vector> + +namespace fuzzer { + +struct MergeFileInfo { + std::string Name; + size_t Size = 0; + Vector<uint32_t> Features, Cov; +}; + +struct Merger { + Vector<MergeFileInfo> Files; + size_t NumFilesInFirstCorpus = 0; + size_t FirstNotProcessedFile = 0; + std::string LastFailure; + + bool Parse(std::istream &IS, bool ParseCoverage); + bool Parse(const std::string &Str, bool ParseCoverage); + int ParseOrExit(std::istream &IS, bool ParseCoverage); + size_t Merge(const Set<uint32_t> &InitialFeatures, Set<uint32_t> *NewFeatures, + const Set<uint32_t> &InitialCov, Set<uint32_t> *NewCov, + Vector<std::string> *NewFiles); + size_t ApproximateMemoryConsumption() const; + Set<uint32_t> AllFeatures() const; +}; + +int CrashResistantMerge(const Vector<std::string> &Args, + const Vector<SizedFile> &OldCorpus, + const Vector<SizedFile> &NewCorpus, + Vector<std::string> *NewFiles, + const Set<uint32_t> &InitialFeatures, + Set<uint32_t> *NewFeatures, + const Set<uint32_t> &InitialCov, + Set<uint32_t> *NewCov, + const std::string &CFPath, + bool Verbose); + +} // namespace fuzzer + +#endif // LLVM_FUZZER_MERGE_H diff --git a/tools/fuzzing/libfuzzer/FuzzerMutate.cpp b/tools/fuzzing/libfuzzer/FuzzerMutate.cpp new file mode 100644 index 0000000000..29541eac5d --- /dev/null +++ b/tools/fuzzing/libfuzzer/FuzzerMutate.cpp @@ -0,0 +1,562 @@ +//===- FuzzerMutate.cpp - Mutate a test input -----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Mutate a test input. +//===----------------------------------------------------------------------===// + +#include "FuzzerDefs.h" +#include "FuzzerExtFunctions.h" +#include "FuzzerIO.h" +#include "FuzzerMutate.h" +#include "FuzzerOptions.h" +#include "FuzzerTracePC.h" + +namespace fuzzer { + +const size_t Dictionary::kMaxDictSize; + +static void PrintASCII(const Word &W, const char *PrintAfter) { + PrintASCII(W.data(), W.size(), PrintAfter); +} + +MutationDispatcher::MutationDispatcher(Random &Rand, + const FuzzingOptions &Options) + : Rand(Rand), Options(Options) { + DefaultMutators.insert( + DefaultMutators.begin(), + { + {&MutationDispatcher::Mutate_EraseBytes, "EraseBytes"}, + {&MutationDispatcher::Mutate_InsertByte, "InsertByte"}, + {&MutationDispatcher::Mutate_InsertRepeatedBytes, + "InsertRepeatedBytes"}, + {&MutationDispatcher::Mutate_ChangeByte, "ChangeByte"}, + {&MutationDispatcher::Mutate_ChangeBit, "ChangeBit"}, + {&MutationDispatcher::Mutate_ShuffleBytes, "ShuffleBytes"}, + {&MutationDispatcher::Mutate_ChangeASCIIInteger, "ChangeASCIIInt"}, + {&MutationDispatcher::Mutate_ChangeBinaryInteger, "ChangeBinInt"}, + {&MutationDispatcher::Mutate_CopyPart, "CopyPart"}, + {&MutationDispatcher::Mutate_CrossOver, "CrossOver"}, + {&MutationDispatcher::Mutate_AddWordFromManualDictionary, + "ManualDict"}, + {&MutationDispatcher::Mutate_AddWordFromPersistentAutoDictionary, + "PersAutoDict"}, + }); + if(Options.UseCmp) + DefaultMutators.push_back( + {&MutationDispatcher::Mutate_AddWordFromTORC, "CMP"}); + + if (EF->LLVMFuzzerCustomMutator) + Mutators.push_back({&MutationDispatcher::Mutate_Custom, "Custom"}); + else + Mutators = DefaultMutators; + + if (EF->LLVMFuzzerCustomCrossOver) + Mutators.push_back( + {&MutationDispatcher::Mutate_CustomCrossOver, "CustomCrossOver"}); +} + +static char RandCh(Random &Rand) { + if (Rand.RandBool()) return Rand(256); + const char Special[] = "!*'();:@&=+$,/?%#[]012Az-`~.\xff\x00"; + return Special[Rand(sizeof(Special) - 1)]; +} + +size_t MutationDispatcher::Mutate_Custom(uint8_t *Data, size_t Size, + size_t MaxSize) { + return EF->LLVMFuzzerCustomMutator(Data, Size, MaxSize, Rand.Rand()); +} + +size_t MutationDispatcher::Mutate_CustomCrossOver(uint8_t *Data, size_t Size, + size_t MaxSize) { + if (Size == 0) + return 0; + if (!CrossOverWith) return 0; + const Unit &Other = *CrossOverWith; + if (Other.empty()) + return 0; + CustomCrossOverInPlaceHere.resize(MaxSize); + auto &U = CustomCrossOverInPlaceHere; + size_t NewSize = EF->LLVMFuzzerCustomCrossOver( + Data, Size, Other.data(), Other.size(), U.data(), U.size(), Rand.Rand()); + if (!NewSize) + return 0; + assert(NewSize <= MaxSize && "CustomCrossOver returned overisized unit"); + memcpy(Data, U.data(), NewSize); + return NewSize; +} + +size_t MutationDispatcher::Mutate_ShuffleBytes(uint8_t *Data, size_t Size, + size_t MaxSize) { + if (Size > MaxSize || Size == 0) return 0; + size_t ShuffleAmount = + Rand(std::min(Size, (size_t)8)) + 1; // [1,8] and <= Size. + size_t ShuffleStart = Rand(Size - ShuffleAmount); + assert(ShuffleStart + ShuffleAmount <= Size); + std::shuffle(Data + ShuffleStart, Data + ShuffleStart + ShuffleAmount, Rand); + return Size; +} + +size_t MutationDispatcher::Mutate_EraseBytes(uint8_t *Data, size_t Size, + size_t MaxSize) { + if (Size <= 1) return 0; + size_t N = Rand(Size / 2) + 1; + assert(N < Size); + size_t Idx = Rand(Size - N + 1); + // Erase Data[Idx:Idx+N]. + memmove(Data + Idx, Data + Idx + N, Size - Idx - N); + // Printf("Erase: %zd %zd => %zd; Idx %zd\n", N, Size, Size - N, Idx); + return Size - N; +} + +size_t MutationDispatcher::Mutate_InsertByte(uint8_t *Data, size_t Size, + size_t MaxSize) { + if (Size >= MaxSize) return 0; + size_t Idx = Rand(Size + 1); + // Insert new value at Data[Idx]. + memmove(Data + Idx + 1, Data + Idx, Size - Idx); + Data[Idx] = RandCh(Rand); + return Size + 1; +} + +size_t MutationDispatcher::Mutate_InsertRepeatedBytes(uint8_t *Data, + size_t Size, + size_t MaxSize) { + const size_t kMinBytesToInsert = 3; + if (Size + kMinBytesToInsert >= MaxSize) return 0; + size_t MaxBytesToInsert = std::min(MaxSize - Size, (size_t)128); + size_t N = Rand(MaxBytesToInsert - kMinBytesToInsert + 1) + kMinBytesToInsert; + assert(Size + N <= MaxSize && N); + size_t Idx = Rand(Size + 1); + // Insert new values at Data[Idx]. + memmove(Data + Idx + N, Data + Idx, Size - Idx); + // Give preference to 0x00 and 0xff. + uint8_t Byte = Rand.RandBool() ? Rand(256) : (Rand.RandBool() ? 0 : 255); + for (size_t i = 0; i < N; i++) + Data[Idx + i] = Byte; + return Size + N; +} + +size_t MutationDispatcher::Mutate_ChangeByte(uint8_t *Data, size_t Size, + size_t MaxSize) { + if (Size > MaxSize) return 0; + size_t Idx = Rand(Size); + Data[Idx] = RandCh(Rand); + return Size; +} + +size_t MutationDispatcher::Mutate_ChangeBit(uint8_t *Data, size_t Size, + size_t MaxSize) { + if (Size > MaxSize) return 0; + size_t Idx = Rand(Size); + Data[Idx] ^= 1 << Rand(8); + return Size; +} + +size_t MutationDispatcher::Mutate_AddWordFromManualDictionary(uint8_t *Data, + size_t Size, + size_t MaxSize) { + return AddWordFromDictionary(ManualDictionary, Data, Size, MaxSize); +} + +size_t MutationDispatcher::ApplyDictionaryEntry(uint8_t *Data, size_t Size, + size_t MaxSize, + DictionaryEntry &DE) { + const Word &W = DE.GetW(); + bool UsePositionHint = DE.HasPositionHint() && + DE.GetPositionHint() + W.size() < Size && + Rand.RandBool(); + if (Rand.RandBool()) { // Insert W. + if (Size + W.size() > MaxSize) return 0; + size_t Idx = UsePositionHint ? DE.GetPositionHint() : Rand(Size + 1); + memmove(Data + Idx + W.size(), Data + Idx, Size - Idx); + memcpy(Data + Idx, W.data(), W.size()); + Size += W.size(); + } else { // Overwrite some bytes with W. + if (W.size() > Size) return 0; + size_t Idx = UsePositionHint ? DE.GetPositionHint() : Rand(Size - W.size()); + memcpy(Data + Idx, W.data(), W.size()); + } + return Size; +} + +// Somewhere in the past we have observed a comparison instructions +// with arguments Arg1 Arg2. This function tries to guess a dictionary +// entry that will satisfy that comparison. +// It first tries to find one of the arguments (possibly swapped) in the +// input and if it succeeds it creates a DE with a position hint. +// Otherwise it creates a DE with one of the arguments w/o a position hint. +DictionaryEntry MutationDispatcher::MakeDictionaryEntryFromCMP( + const void *Arg1, const void *Arg2, + const void *Arg1Mutation, const void *Arg2Mutation, + size_t ArgSize, const uint8_t *Data, + size_t Size) { + bool HandleFirst = Rand.RandBool(); + const void *ExistingBytes, *DesiredBytes; + Word W; + const uint8_t *End = Data + Size; + for (int Arg = 0; Arg < 2; Arg++) { + ExistingBytes = HandleFirst ? Arg1 : Arg2; + DesiredBytes = HandleFirst ? Arg2Mutation : Arg1Mutation; + HandleFirst = !HandleFirst; + W.Set(reinterpret_cast<const uint8_t*>(DesiredBytes), ArgSize); + const size_t kMaxNumPositions = 8; + size_t Positions[kMaxNumPositions]; + size_t NumPositions = 0; + for (const uint8_t *Cur = Data; + Cur < End && NumPositions < kMaxNumPositions; Cur++) { + Cur = + (const uint8_t *)SearchMemory(Cur, End - Cur, ExistingBytes, ArgSize); + if (!Cur) break; + Positions[NumPositions++] = Cur - Data; + } + if (!NumPositions) continue; + return DictionaryEntry(W, Positions[Rand(NumPositions)]); + } + DictionaryEntry DE(W); + return DE; +} + + +template <class T> +DictionaryEntry MutationDispatcher::MakeDictionaryEntryFromCMP( + T Arg1, T Arg2, const uint8_t *Data, size_t Size) { + if (Rand.RandBool()) Arg1 = Bswap(Arg1); + if (Rand.RandBool()) Arg2 = Bswap(Arg2); + T Arg1Mutation = Arg1 + Rand(-1, 1); + T Arg2Mutation = Arg2 + Rand(-1, 1); + return MakeDictionaryEntryFromCMP(&Arg1, &Arg2, &Arg1Mutation, &Arg2Mutation, + sizeof(Arg1), Data, Size); +} + +DictionaryEntry MutationDispatcher::MakeDictionaryEntryFromCMP( + const Word &Arg1, const Word &Arg2, const uint8_t *Data, size_t Size) { + return MakeDictionaryEntryFromCMP(Arg1.data(), Arg2.data(), Arg1.data(), + Arg2.data(), Arg1.size(), Data, Size); +} + +size_t MutationDispatcher::Mutate_AddWordFromTORC( + uint8_t *Data, size_t Size, size_t MaxSize) { + Word W; + DictionaryEntry DE; + switch (Rand(4)) { + case 0: { + auto X = TPC.TORC8.Get(Rand.Rand()); + DE = MakeDictionaryEntryFromCMP(X.A, X.B, Data, Size); + } break; + case 1: { + auto X = TPC.TORC4.Get(Rand.Rand()); + if ((X.A >> 16) == 0 && (X.B >> 16) == 0 && Rand.RandBool()) + DE = MakeDictionaryEntryFromCMP((uint16_t)X.A, (uint16_t)X.B, Data, Size); + else + DE = MakeDictionaryEntryFromCMP(X.A, X.B, Data, Size); + } break; + case 2: { + auto X = TPC.TORCW.Get(Rand.Rand()); + DE = MakeDictionaryEntryFromCMP(X.A, X.B, Data, Size); + } break; + case 3: if (Options.UseMemmem) { + auto X = TPC.MMT.Get(Rand.Rand()); + DE = DictionaryEntry(X); + } break; + default: + assert(0); + } + if (!DE.GetW().size()) return 0; + Size = ApplyDictionaryEntry(Data, Size, MaxSize, DE); + if (!Size) return 0; + DictionaryEntry &DERef = + CmpDictionaryEntriesDeque[CmpDictionaryEntriesDequeIdx++ % + kCmpDictionaryEntriesDequeSize]; + DERef = DE; + CurrentDictionaryEntrySequence.push_back(&DERef); + return Size; +} + +size_t MutationDispatcher::Mutate_AddWordFromPersistentAutoDictionary( + uint8_t *Data, size_t Size, size_t MaxSize) { + return AddWordFromDictionary(PersistentAutoDictionary, Data, Size, MaxSize); +} + +size_t MutationDispatcher::AddWordFromDictionary(Dictionary &D, uint8_t *Data, + size_t Size, size_t MaxSize) { + if (Size > MaxSize) return 0; + if (D.empty()) return 0; + DictionaryEntry &DE = D[Rand(D.size())]; + Size = ApplyDictionaryEntry(Data, Size, MaxSize, DE); + if (!Size) return 0; + DE.IncUseCount(); + CurrentDictionaryEntrySequence.push_back(&DE); + return Size; +} + +// Overwrites part of To[0,ToSize) with a part of From[0,FromSize). +// Returns ToSize. +size_t MutationDispatcher::CopyPartOf(const uint8_t *From, size_t FromSize, + uint8_t *To, size_t ToSize) { + // Copy From[FromBeg, FromBeg + CopySize) into To[ToBeg, ToBeg + CopySize). + size_t ToBeg = Rand(ToSize); + size_t CopySize = Rand(ToSize - ToBeg) + 1; + assert(ToBeg + CopySize <= ToSize); + CopySize = std::min(CopySize, FromSize); + size_t FromBeg = Rand(FromSize - CopySize + 1); + assert(FromBeg + CopySize <= FromSize); + memmove(To + ToBeg, From + FromBeg, CopySize); + return ToSize; +} + +// Inserts part of From[0,ToSize) into To. +// Returns new size of To on success or 0 on failure. +size_t MutationDispatcher::InsertPartOf(const uint8_t *From, size_t FromSize, + uint8_t *To, size_t ToSize, + size_t MaxToSize) { + if (ToSize >= MaxToSize) return 0; + size_t AvailableSpace = MaxToSize - ToSize; + size_t MaxCopySize = std::min(AvailableSpace, FromSize); + size_t CopySize = Rand(MaxCopySize) + 1; + size_t FromBeg = Rand(FromSize - CopySize + 1); + assert(FromBeg + CopySize <= FromSize); + size_t ToInsertPos = Rand(ToSize + 1); + assert(ToInsertPos + CopySize <= MaxToSize); + size_t TailSize = ToSize - ToInsertPos; + if (To == From) { + MutateInPlaceHere.resize(MaxToSize); + memcpy(MutateInPlaceHere.data(), From + FromBeg, CopySize); + memmove(To + ToInsertPos + CopySize, To + ToInsertPos, TailSize); + memmove(To + ToInsertPos, MutateInPlaceHere.data(), CopySize); + } else { + memmove(To + ToInsertPos + CopySize, To + ToInsertPos, TailSize); + memmove(To + ToInsertPos, From + FromBeg, CopySize); + } + return ToSize + CopySize; +} + +size_t MutationDispatcher::Mutate_CopyPart(uint8_t *Data, size_t Size, + size_t MaxSize) { + if (Size > MaxSize || Size == 0) return 0; + // If Size == MaxSize, `InsertPartOf(...)` will + // fail so there's no point using it in this case. + if (Size == MaxSize || Rand.RandBool()) + return CopyPartOf(Data, Size, Data, Size); + else + return InsertPartOf(Data, Size, Data, Size, MaxSize); +} + +size_t MutationDispatcher::Mutate_ChangeASCIIInteger(uint8_t *Data, size_t Size, + size_t MaxSize) { + if (Size > MaxSize) return 0; + size_t B = Rand(Size); + while (B < Size && !isdigit(Data[B])) B++; + if (B == Size) return 0; + size_t E = B; + while (E < Size && isdigit(Data[E])) E++; + assert(B < E); + // now we have digits in [B, E). + // strtol and friends don't accept non-zero-teminated data, parse it manually. + uint64_t Val = Data[B] - '0'; + for (size_t i = B + 1; i < E; i++) + Val = Val * 10 + Data[i] - '0'; + + // Mutate the integer value. + switch(Rand(5)) { + case 0: Val++; break; + case 1: Val--; break; + case 2: Val /= 2; break; + case 3: Val *= 2; break; + case 4: Val = Rand(Val * Val); break; + default: assert(0); + } + // Just replace the bytes with the new ones, don't bother moving bytes. + for (size_t i = B; i < E; i++) { + size_t Idx = E + B - i - 1; + assert(Idx >= B && Idx < E); + Data[Idx] = (Val % 10) + '0'; + Val /= 10; + } + return Size; +} + +template<class T> +size_t ChangeBinaryInteger(uint8_t *Data, size_t Size, Random &Rand) { + if (Size < sizeof(T)) return 0; + size_t Off = Rand(Size - sizeof(T) + 1); + assert(Off + sizeof(T) <= Size); + T Val; + if (Off < 64 && !Rand(4)) { + Val = Size; + if (Rand.RandBool()) + Val = Bswap(Val); + } else { + memcpy(&Val, Data + Off, sizeof(Val)); + T Add = Rand(21); + Add -= 10; + if (Rand.RandBool()) + Val = Bswap(T(Bswap(Val) + Add)); // Add assuming different endiannes. + else + Val = Val + Add; // Add assuming current endiannes. + if (Add == 0 || Rand.RandBool()) // Maybe negate. + Val = -Val; + } + memcpy(Data + Off, &Val, sizeof(Val)); + return Size; +} + +size_t MutationDispatcher::Mutate_ChangeBinaryInteger(uint8_t *Data, + size_t Size, + size_t MaxSize) { + if (Size > MaxSize) return 0; + switch (Rand(4)) { + case 3: return ChangeBinaryInteger<uint64_t>(Data, Size, Rand); + case 2: return ChangeBinaryInteger<uint32_t>(Data, Size, Rand); + case 1: return ChangeBinaryInteger<uint16_t>(Data, Size, Rand); + case 0: return ChangeBinaryInteger<uint8_t>(Data, Size, Rand); + default: assert(0); + } + return 0; +} + +size_t MutationDispatcher::Mutate_CrossOver(uint8_t *Data, size_t Size, + size_t MaxSize) { + if (Size > MaxSize) return 0; + if (Size == 0) return 0; + if (!CrossOverWith) return 0; + const Unit &O = *CrossOverWith; + if (O.empty()) return 0; + MutateInPlaceHere.resize(MaxSize); + auto &U = MutateInPlaceHere; + size_t NewSize = 0; + switch(Rand(3)) { + case 0: + NewSize = CrossOver(Data, Size, O.data(), O.size(), U.data(), U.size()); + break; + case 1: + NewSize = InsertPartOf(O.data(), O.size(), U.data(), U.size(), MaxSize); + if (!NewSize) + NewSize = CopyPartOf(O.data(), O.size(), U.data(), U.size()); + break; + case 2: + NewSize = CopyPartOf(O.data(), O.size(), U.data(), U.size()); + break; + default: assert(0); + } + assert(NewSize > 0 && "CrossOver returned empty unit"); + assert(NewSize <= MaxSize && "CrossOver returned overisized unit"); + memcpy(Data, U.data(), NewSize); + return NewSize; +} + +void MutationDispatcher::StartMutationSequence() { + CurrentMutatorSequence.clear(); + CurrentDictionaryEntrySequence.clear(); +} + +// Copy successful dictionary entries to PersistentAutoDictionary. +void MutationDispatcher::RecordSuccessfulMutationSequence() { + for (auto DE : CurrentDictionaryEntrySequence) { + // PersistentAutoDictionary.AddWithSuccessCountOne(DE); + DE->IncSuccessCount(); + assert(DE->GetW().size()); + // Linear search is fine here as this happens seldom. + if (!PersistentAutoDictionary.ContainsWord(DE->GetW())) + PersistentAutoDictionary.push_back({DE->GetW(), 1}); + } +} + +void MutationDispatcher::PrintRecommendedDictionary() { + Vector<DictionaryEntry> V; + for (auto &DE : PersistentAutoDictionary) + if (!ManualDictionary.ContainsWord(DE.GetW())) + V.push_back(DE); + if (V.empty()) return; + Printf("###### Recommended dictionary. ######\n"); + for (auto &DE: V) { + assert(DE.GetW().size()); + Printf("\""); + PrintASCII(DE.GetW(), "\""); + Printf(" # Uses: %zd\n", DE.GetUseCount()); + } + Printf("###### End of recommended dictionary. ######\n"); +} + +void MutationDispatcher::PrintMutationSequence() { + Printf("MS: %zd ", CurrentMutatorSequence.size()); + for (auto M : CurrentMutatorSequence) + Printf("%s-", M.Name); + if (!CurrentDictionaryEntrySequence.empty()) { + Printf(" DE: "); + for (auto DE : CurrentDictionaryEntrySequence) { + Printf("\""); + PrintASCII(DE->GetW(), "\"-"); + } + } +} + +size_t MutationDispatcher::Mutate(uint8_t *Data, size_t Size, size_t MaxSize) { + return MutateImpl(Data, Size, MaxSize, Mutators); +} + +size_t MutationDispatcher::DefaultMutate(uint8_t *Data, size_t Size, + size_t MaxSize) { + return MutateImpl(Data, Size, MaxSize, DefaultMutators); +} + +// Mutates Data in place, returns new size. +size_t MutationDispatcher::MutateImpl(uint8_t *Data, size_t Size, + size_t MaxSize, + Vector<Mutator> &Mutators) { + assert(MaxSize > 0); + // Some mutations may fail (e.g. can't insert more bytes if Size == MaxSize), + // in which case they will return 0. + // Try several times before returning un-mutated data. + for (int Iter = 0; Iter < 100; Iter++) { + auto M = Mutators[Rand(Mutators.size())]; + size_t NewSize = (this->*(M.Fn))(Data, Size, MaxSize); + if (NewSize && NewSize <= MaxSize) { + if (Options.OnlyASCII) + ToASCII(Data, NewSize); + CurrentMutatorSequence.push_back(M); + return NewSize; + } + } + *Data = ' '; + return 1; // Fallback, should not happen frequently. +} + +// Mask represents the set of Data bytes that are worth mutating. +size_t MutationDispatcher::MutateWithMask(uint8_t *Data, size_t Size, + size_t MaxSize, + const Vector<uint8_t> &Mask) { + size_t MaskedSize = std::min(Size, Mask.size()); + // * Copy the worthy bytes into a temporary array T + // * Mutate T + // * Copy T back. + // This is totally unoptimized. + auto &T = MutateWithMaskTemp; + if (T.size() < Size) + T.resize(Size); + size_t OneBits = 0; + for (size_t I = 0; I < MaskedSize; I++) + if (Mask[I]) + T[OneBits++] = Data[I]; + + if (!OneBits) return 0; + assert(!T.empty()); + size_t NewSize = Mutate(T.data(), OneBits, OneBits); + assert(NewSize <= OneBits); + (void)NewSize; + // Even if NewSize < OneBits we still use all OneBits bytes. + for (size_t I = 0, J = 0; I < MaskedSize; I++) + if (Mask[I]) + Data[I] = T[J++]; + return Size; +} + +void MutationDispatcher::AddWordToManualDictionary(const Word &W) { + ManualDictionary.push_back( + {W, std::numeric_limits<size_t>::max()}); +} + +} // namespace fuzzer diff --git a/tools/fuzzing/libfuzzer/FuzzerMutate.h b/tools/fuzzing/libfuzzer/FuzzerMutate.h new file mode 100644 index 0000000000..6cbce80276 --- /dev/null +++ b/tools/fuzzing/libfuzzer/FuzzerMutate.h @@ -0,0 +1,156 @@ +//===- FuzzerMutate.h - Internal header for the Fuzzer ----------*- C++ -* ===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// fuzzer::MutationDispatcher +//===----------------------------------------------------------------------===// + +#ifndef LLVM_FUZZER_MUTATE_H +#define LLVM_FUZZER_MUTATE_H + +#include "FuzzerDefs.h" +#include "FuzzerDictionary.h" +#include "FuzzerOptions.h" +#include "FuzzerRandom.h" + +namespace fuzzer { + +class MutationDispatcher { +public: + MutationDispatcher(Random &Rand, const FuzzingOptions &Options); + ~MutationDispatcher() {} + /// Indicate that we are about to start a new sequence of mutations. + void StartMutationSequence(); + /// Print the current sequence of mutations. + void PrintMutationSequence(); + /// Indicate that the current sequence of mutations was successful. + void RecordSuccessfulMutationSequence(); + /// Mutates data by invoking user-provided mutator. + size_t Mutate_Custom(uint8_t *Data, size_t Size, size_t MaxSize); + /// Mutates data by invoking user-provided crossover. + size_t Mutate_CustomCrossOver(uint8_t *Data, size_t Size, size_t MaxSize); + /// Mutates data by shuffling bytes. + size_t Mutate_ShuffleBytes(uint8_t *Data, size_t Size, size_t MaxSize); + /// Mutates data by erasing bytes. + size_t Mutate_EraseBytes(uint8_t *Data, size_t Size, size_t MaxSize); + /// Mutates data by inserting a byte. + size_t Mutate_InsertByte(uint8_t *Data, size_t Size, size_t MaxSize); + /// Mutates data by inserting several repeated bytes. + size_t Mutate_InsertRepeatedBytes(uint8_t *Data, size_t Size, size_t MaxSize); + /// Mutates data by chanding one byte. + size_t Mutate_ChangeByte(uint8_t *Data, size_t Size, size_t MaxSize); + /// Mutates data by chanding one bit. + size_t Mutate_ChangeBit(uint8_t *Data, size_t Size, size_t MaxSize); + /// Mutates data by copying/inserting a part of data into a different place. + size_t Mutate_CopyPart(uint8_t *Data, size_t Size, size_t MaxSize); + + /// Mutates data by adding a word from the manual dictionary. + size_t Mutate_AddWordFromManualDictionary(uint8_t *Data, size_t Size, + size_t MaxSize); + + /// Mutates data by adding a word from the TORC. + size_t Mutate_AddWordFromTORC(uint8_t *Data, size_t Size, size_t MaxSize); + + /// Mutates data by adding a word from the persistent automatic dictionary. + size_t Mutate_AddWordFromPersistentAutoDictionary(uint8_t *Data, size_t Size, + size_t MaxSize); + + /// Tries to find an ASCII integer in Data, changes it to another ASCII int. + size_t Mutate_ChangeASCIIInteger(uint8_t *Data, size_t Size, size_t MaxSize); + /// Change a 1-, 2-, 4-, or 8-byte integer in interesting ways. + size_t Mutate_ChangeBinaryInteger(uint8_t *Data, size_t Size, size_t MaxSize); + + /// CrossOver Data with CrossOverWith. + size_t Mutate_CrossOver(uint8_t *Data, size_t Size, size_t MaxSize); + + /// Applies one of the configured mutations. + /// Returns the new size of data which could be up to MaxSize. + size_t Mutate(uint8_t *Data, size_t Size, size_t MaxSize); + + /// Applies one of the configured mutations to the bytes of Data + /// that have '1' in Mask. + /// Mask.size() should be >= Size. + size_t MutateWithMask(uint8_t *Data, size_t Size, size_t MaxSize, + const Vector<uint8_t> &Mask); + + /// Applies one of the default mutations. Provided as a service + /// to mutation authors. + size_t DefaultMutate(uint8_t *Data, size_t Size, size_t MaxSize); + + /// Creates a cross-over of two pieces of Data, returns its size. + size_t CrossOver(const uint8_t *Data1, size_t Size1, const uint8_t *Data2, + size_t Size2, uint8_t *Out, size_t MaxOutSize); + + void AddWordToManualDictionary(const Word &W); + + void PrintRecommendedDictionary(); + + void SetCrossOverWith(const Unit *U) { CrossOverWith = U; } + + Random &GetRand() { return Rand; } + + private: + struct Mutator { + size_t (MutationDispatcher::*Fn)(uint8_t *Data, size_t Size, size_t Max); + const char *Name; + }; + + size_t AddWordFromDictionary(Dictionary &D, uint8_t *Data, size_t Size, + size_t MaxSize); + size_t MutateImpl(uint8_t *Data, size_t Size, size_t MaxSize, + Vector<Mutator> &Mutators); + + size_t InsertPartOf(const uint8_t *From, size_t FromSize, uint8_t *To, + size_t ToSize, size_t MaxToSize); + size_t CopyPartOf(const uint8_t *From, size_t FromSize, uint8_t *To, + size_t ToSize); + size_t ApplyDictionaryEntry(uint8_t *Data, size_t Size, size_t MaxSize, + DictionaryEntry &DE); + + template <class T> + DictionaryEntry MakeDictionaryEntryFromCMP(T Arg1, T Arg2, + const uint8_t *Data, size_t Size); + DictionaryEntry MakeDictionaryEntryFromCMP(const Word &Arg1, const Word &Arg2, + const uint8_t *Data, size_t Size); + DictionaryEntry MakeDictionaryEntryFromCMP(const void *Arg1, const void *Arg2, + const void *Arg1Mutation, + const void *Arg2Mutation, + size_t ArgSize, + const uint8_t *Data, size_t Size); + + Random &Rand; + const FuzzingOptions Options; + + // Dictionary provided by the user via -dict=DICT_FILE. + Dictionary ManualDictionary; + // Temporary dictionary modified by the fuzzer itself, + // recreated periodically. + Dictionary TempAutoDictionary; + // Persistent dictionary modified by the fuzzer, consists of + // entries that led to successful discoveries in the past mutations. + Dictionary PersistentAutoDictionary; + + Vector<DictionaryEntry *> CurrentDictionaryEntrySequence; + + static const size_t kCmpDictionaryEntriesDequeSize = 16; + DictionaryEntry CmpDictionaryEntriesDeque[kCmpDictionaryEntriesDequeSize]; + size_t CmpDictionaryEntriesDequeIdx = 0; + + const Unit *CrossOverWith = nullptr; + Vector<uint8_t> MutateInPlaceHere; + Vector<uint8_t> MutateWithMaskTemp; + // CustomCrossOver needs its own buffer as a custom implementation may call + // LLVMFuzzerMutate, which in turn may resize MutateInPlaceHere. + Vector<uint8_t> CustomCrossOverInPlaceHere; + + Vector<Mutator> Mutators; + Vector<Mutator> DefaultMutators; + Vector<Mutator> CurrentMutatorSequence; +}; + +} // namespace fuzzer + +#endif // LLVM_FUZZER_MUTATE_H diff --git a/tools/fuzzing/libfuzzer/FuzzerOptions.h b/tools/fuzzing/libfuzzer/FuzzerOptions.h new file mode 100644 index 0000000000..9d975bd61f --- /dev/null +++ b/tools/fuzzing/libfuzzer/FuzzerOptions.h @@ -0,0 +1,85 @@ +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// fuzzer::FuzzingOptions +//===----------------------------------------------------------------------===// + +#ifndef LLVM_FUZZER_OPTIONS_H +#define LLVM_FUZZER_OPTIONS_H + +#include "FuzzerDefs.h" + +namespace fuzzer { + +struct FuzzingOptions { + int Verbosity = 1; + size_t MaxLen = 0; + size_t LenControl = 1000; + int UnitTimeoutSec = 300; + int TimeoutExitCode = 70; + int OOMExitCode = 71; + int InterruptExitCode = 72; + int ErrorExitCode = 77; + bool IgnoreTimeouts = true; + bool IgnoreOOMs = true; + bool IgnoreCrashes = false; + int MaxTotalTimeSec = 0; + int RssLimitMb = 0; + int MallocLimitMb = 0; + bool DoCrossOver = true; + int MutateDepth = 5; + bool ReduceDepth = false; + bool UseCounters = false; + bool UseMemmem = true; + bool UseCmp = false; + int UseValueProfile = false; + bool Shrink = false; + bool ReduceInputs = false; + int ReloadIntervalSec = 1; + bool ShuffleAtStartUp = true; + bool PreferSmall = true; + size_t MaxNumberOfRuns = -1L; + int ReportSlowUnits = 10; + bool OnlyASCII = false; + bool Entropic = false; + size_t EntropicFeatureFrequencyThreshold = 0xFF; + size_t EntropicNumberOfRarestFeatures = 100; + std::string OutputCorpus; + std::string ArtifactPrefix = "./"; + std::string ExactArtifactPath; + std::string ExitOnSrcPos; + std::string ExitOnItem; + std::string FocusFunction; + std::string DataFlowTrace; + std::string CollectDataFlow; + std::string FeaturesDir; + std::string StopFile; + bool SaveArtifacts = true; + bool PrintNEW = true; // Print a status line when new units are found; + bool PrintNewCovPcs = false; + int PrintNewCovFuncs = 0; + bool PrintFinalStats = false; + bool PrintCorpusStats = false; + bool PrintCoverage = false; + bool DumpCoverage = false; + bool DetectLeaks = true; + int PurgeAllocatorIntervalSec = 1; + int TraceMalloc = 0; + bool HandleAbrt = false; + bool HandleBus = false; + bool HandleFpe = false; + bool HandleIll = false; + bool HandleInt = false; + bool HandleSegv = false; + bool HandleTerm = false; + bool HandleXfsz = false; + bool HandleUsr1 = false; + bool HandleUsr2 = false; +}; + +} // namespace fuzzer + +#endif // LLVM_FUZZER_OPTIONS_H diff --git a/tools/fuzzing/libfuzzer/FuzzerPlatform.h b/tools/fuzzing/libfuzzer/FuzzerPlatform.h new file mode 100644 index 0000000000..8befdb882c --- /dev/null +++ b/tools/fuzzing/libfuzzer/FuzzerPlatform.h @@ -0,0 +1,163 @@ +//===-- FuzzerPlatform.h --------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Common platform macros. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_FUZZER_PLATFORM_H +#define LLVM_FUZZER_PLATFORM_H + +// Platform detection. +#ifdef __linux__ +#define LIBFUZZER_APPLE 0 +#define LIBFUZZER_FUCHSIA 0 +#define LIBFUZZER_LINUX 1 +#define LIBFUZZER_NETBSD 0 +#define LIBFUZZER_FREEBSD 0 +#define LIBFUZZER_OPENBSD 0 +#define LIBFUZZER_WINDOWS 0 +#define LIBFUZZER_EMSCRIPTEN 0 +#elif __APPLE__ +#define LIBFUZZER_APPLE 1 +#define LIBFUZZER_FUCHSIA 0 +#define LIBFUZZER_LINUX 0 +#define LIBFUZZER_NETBSD 0 +#define LIBFUZZER_FREEBSD 0 +#define LIBFUZZER_OPENBSD 0 +#define LIBFUZZER_WINDOWS 0 +#define LIBFUZZER_EMSCRIPTEN 0 +#elif __NetBSD__ +#define LIBFUZZER_APPLE 0 +#define LIBFUZZER_FUCHSIA 0 +#define LIBFUZZER_LINUX 0 +#define LIBFUZZER_NETBSD 1 +#define LIBFUZZER_FREEBSD 0 +#define LIBFUZZER_OPENBSD 0 +#define LIBFUZZER_WINDOWS 0 +#define LIBFUZZER_EMSCRIPTEN 0 +#elif __FreeBSD__ +#define LIBFUZZER_APPLE 0 +#define LIBFUZZER_FUCHSIA 0 +#define LIBFUZZER_LINUX 0 +#define LIBFUZZER_NETBSD 0 +#define LIBFUZZER_FREEBSD 1 +#define LIBFUZZER_OPENBSD 0 +#define LIBFUZZER_WINDOWS 0 +#define LIBFUZZER_EMSCRIPTEN 0 +#elif __OpenBSD__ +#define LIBFUZZER_APPLE 0 +#define LIBFUZZER_FUCHSIA 0 +#define LIBFUZZER_LINUX 0 +#define LIBFUZZER_NETBSD 0 +#define LIBFUZZER_FREEBSD 0 +#define LIBFUZZER_OPENBSD 1 +#define LIBFUZZER_WINDOWS 0 +#define LIBFUZZER_EMSCRIPTEN 0 +#elif _WIN32 +#define LIBFUZZER_APPLE 0 +#define LIBFUZZER_FUCHSIA 0 +#define LIBFUZZER_LINUX 0 +#define LIBFUZZER_NETBSD 0 +#define LIBFUZZER_FREEBSD 0 +#define LIBFUZZER_OPENBSD 0 +#define LIBFUZZER_WINDOWS 1 +#define LIBFUZZER_EMSCRIPTEN 0 +#elif __Fuchsia__ +#define LIBFUZZER_APPLE 0 +#define LIBFUZZER_FUCHSIA 1 +#define LIBFUZZER_LINUX 0 +#define LIBFUZZER_NETBSD 0 +#define LIBFUZZER_FREEBSD 0 +#define LIBFUZZER_OPENBSD 0 +#define LIBFUZZER_WINDOWS 0 +#define LIBFUZZER_EMSCRIPTEN 0 +#elif __EMSCRIPTEN__ +#define LIBFUZZER_APPLE 0 +#define LIBFUZZER_FUCHSIA 0 +#define LIBFUZZER_LINUX 0 +#define LIBFUZZER_NETBSD 0 +#define LIBFUZZER_FREEBSD 0 +#define LIBFUZZER_OPENBSD 0 +#define LIBFUZZER_WINDOWS 0 +#define LIBFUZZER_EMSCRIPTEN 1 +#else +#error "Support for your platform has not been implemented" +#endif + +#if defined(_MSC_VER) && !defined(__clang__) +// MSVC compiler is being used. +#define LIBFUZZER_MSVC 1 +#else +#define LIBFUZZER_MSVC 0 +#endif + +#ifndef __has_attribute +#define __has_attribute(x) 0 +#endif + +#define LIBFUZZER_POSIX \ + (LIBFUZZER_APPLE || LIBFUZZER_LINUX || LIBFUZZER_NETBSD || \ + LIBFUZZER_FREEBSD || LIBFUZZER_OPENBSD || LIBFUZZER_EMSCRIPTEN) + +#ifdef __x86_64 +#if __has_attribute(target) +#define ATTRIBUTE_TARGET_POPCNT __attribute__((target("popcnt"))) +#else +#define ATTRIBUTE_TARGET_POPCNT +#endif +#else +#define ATTRIBUTE_TARGET_POPCNT +#endif + +#ifdef __clang__ // avoid gcc warning. +#if __has_attribute(no_sanitize) +#define ATTRIBUTE_NO_SANITIZE_MEMORY __attribute__((no_sanitize("memory"))) +#else +#define ATTRIBUTE_NO_SANITIZE_MEMORY +#endif +#define ALWAYS_INLINE __attribute__((always_inline)) +#else +#define ATTRIBUTE_NO_SANITIZE_MEMORY +#define ALWAYS_INLINE +#endif // __clang__ + +#if LIBFUZZER_WINDOWS +#define ATTRIBUTE_NO_SANITIZE_ADDRESS +#else +#define ATTRIBUTE_NO_SANITIZE_ADDRESS __attribute__((no_sanitize_address)) +#endif + +#if LIBFUZZER_WINDOWS +#define ATTRIBUTE_ALIGNED(X) __declspec(align(X)) +#define ATTRIBUTE_INTERFACE __declspec(dllexport) +// This is used for __sancov_lowest_stack which is needed for +// -fsanitize-coverage=stack-depth. That feature is not yet available on +// Windows, so make the symbol static to avoid linking errors. +#define ATTRIBUTES_INTERFACE_TLS_INITIAL_EXEC static +#define ATTRIBUTE_NOINLINE __declspec(noinline) +#else +#define ATTRIBUTE_ALIGNED(X) __attribute__((aligned(X))) +#define ATTRIBUTE_INTERFACE __attribute__((visibility("default"))) +#define ATTRIBUTES_INTERFACE_TLS_INITIAL_EXEC \ + ATTRIBUTE_INTERFACE __attribute__((tls_model("initial-exec"))) thread_local + +#define ATTRIBUTE_NOINLINE __attribute__((noinline)) +#endif + +#if defined(__has_feature) +#if __has_feature(address_sanitizer) +#define ATTRIBUTE_NO_SANITIZE_ALL ATTRIBUTE_NO_SANITIZE_ADDRESS +#elif __has_feature(memory_sanitizer) +#define ATTRIBUTE_NO_SANITIZE_ALL ATTRIBUTE_NO_SANITIZE_MEMORY +#else +#define ATTRIBUTE_NO_SANITIZE_ALL +#endif +#else +#define ATTRIBUTE_NO_SANITIZE_ALL +#endif + +#endif // LLVM_FUZZER_PLATFORM_H diff --git a/tools/fuzzing/libfuzzer/FuzzerRandom.h b/tools/fuzzing/libfuzzer/FuzzerRandom.h new file mode 100644 index 0000000000..659283eee2 --- /dev/null +++ b/tools/fuzzing/libfuzzer/FuzzerRandom.h @@ -0,0 +1,38 @@ +//===- FuzzerRandom.h - Internal header for the Fuzzer ----------*- C++ -* ===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// fuzzer::Random +//===----------------------------------------------------------------------===// + +#ifndef LLVM_FUZZER_RANDOM_H +#define LLVM_FUZZER_RANDOM_H + +#include <random> + +namespace fuzzer { +class Random : public std::minstd_rand { + public: + Random(unsigned int seed) : std::minstd_rand(seed) {} + result_type operator()() { return this->std::minstd_rand::operator()(); } + size_t Rand() { return this->operator()(); } + size_t RandBool() { return Rand() % 2; } + size_t SkewTowardsLast(size_t n) { + size_t T = this->operator()(n * n); + size_t Res = sqrt(T); + return Res; + } + size_t operator()(size_t n) { return n ? Rand() % n : 0; } + intptr_t operator()(intptr_t From, intptr_t To) { + assert(From < To); + intptr_t RangeSize = To - From + 1; + return operator()(RangeSize) + From; + } +}; + +} // namespace fuzzer + +#endif // LLVM_FUZZER_RANDOM_H diff --git a/tools/fuzzing/libfuzzer/FuzzerSHA1.cpp b/tools/fuzzing/libfuzzer/FuzzerSHA1.cpp new file mode 100644 index 0000000000..2005dc7003 --- /dev/null +++ b/tools/fuzzing/libfuzzer/FuzzerSHA1.cpp @@ -0,0 +1,223 @@ +//===- FuzzerSHA1.h - Private copy of the SHA1 implementation ---*- C++ -* ===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// This code is taken from public domain +// (http://oauth.googlecode.com/svn/code/c/liboauth/src/sha1.c) +// and modified by adding anonymous namespace, adding an interface +// function fuzzer::ComputeSHA1() and removing unnecessary code. +// +// lib/Fuzzer can not use SHA1 implementation from openssl because +// openssl may not be available and because we may be fuzzing openssl itself. +// For the same reason we do not want to depend on SHA1 from LLVM tree. +//===----------------------------------------------------------------------===// + +#include "FuzzerSHA1.h" +#include "FuzzerDefs.h" +#include "FuzzerPlatform.h" + +/* This code is public-domain - it is based on libcrypt + * placed in the public domain by Wei Dai and other contributors. + */ + +#include <iomanip> +#include <sstream> +#include <stdint.h> +#include <string.h> + +namespace { // Added for LibFuzzer + +#ifdef __BIG_ENDIAN__ +# define SHA_BIG_ENDIAN +// Windows is always little endian and MSVC doesn't have <endian.h> +#elif defined __LITTLE_ENDIAN__ || LIBFUZZER_WINDOWS +/* override */ +#elif defined __BYTE_ORDER +# if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +# define SHA_BIG_ENDIAN +# endif +#else // ! defined __LITTLE_ENDIAN__ +# include <endian.h> // machine/endian.h +# if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +# define SHA_BIG_ENDIAN +# endif +#endif + + +/* header */ + +#define HASH_LENGTH 20 +#define BLOCK_LENGTH 64 + +typedef struct sha1nfo { + uint32_t buffer[BLOCK_LENGTH/4]; + uint32_t state[HASH_LENGTH/4]; + uint32_t byteCount; + uint8_t bufferOffset; + uint8_t keyBuffer[BLOCK_LENGTH]; + uint8_t innerHash[HASH_LENGTH]; +} sha1nfo; + +/* public API - prototypes - TODO: doxygen*/ + +/** + */ +void sha1_init(sha1nfo *s); +/** + */ +void sha1_writebyte(sha1nfo *s, uint8_t data); +/** + */ +void sha1_write(sha1nfo *s, const char *data, size_t len); +/** + */ +uint8_t* sha1_result(sha1nfo *s); + + +/* code */ +#define SHA1_K0 0x5a827999 +#define SHA1_K20 0x6ed9eba1 +#define SHA1_K40 0x8f1bbcdc +#define SHA1_K60 0xca62c1d6 + +void sha1_init(sha1nfo *s) { + s->state[0] = 0x67452301; + s->state[1] = 0xefcdab89; + s->state[2] = 0x98badcfe; + s->state[3] = 0x10325476; + s->state[4] = 0xc3d2e1f0; + s->byteCount = 0; + s->bufferOffset = 0; +} + +uint32_t sha1_rol32(uint32_t number, uint8_t bits) { + return ((number << bits) | (number >> (32-bits))); +} + +void sha1_hashBlock(sha1nfo *s) { + uint8_t i; + uint32_t a,b,c,d,e,t; + + a=s->state[0]; + b=s->state[1]; + c=s->state[2]; + d=s->state[3]; + e=s->state[4]; + for (i=0; i<80; i++) { + if (i>=16) { + t = s->buffer[(i+13)&15] ^ s->buffer[(i+8)&15] ^ s->buffer[(i+2)&15] ^ s->buffer[i&15]; + s->buffer[i&15] = sha1_rol32(t,1); + } + if (i<20) { + t = (d ^ (b & (c ^ d))) + SHA1_K0; + } else if (i<40) { + t = (b ^ c ^ d) + SHA1_K20; + } else if (i<60) { + t = ((b & c) | (d & (b | c))) + SHA1_K40; + } else { + t = (b ^ c ^ d) + SHA1_K60; + } + t+=sha1_rol32(a,5) + e + s->buffer[i&15]; + e=d; + d=c; + c=sha1_rol32(b,30); + b=a; + a=t; + } + s->state[0] += a; + s->state[1] += b; + s->state[2] += c; + s->state[3] += d; + s->state[4] += e; +} + +void sha1_addUncounted(sha1nfo *s, uint8_t data) { + uint8_t * const b = (uint8_t*) s->buffer; +#ifdef SHA_BIG_ENDIAN + b[s->bufferOffset] = data; +#else + b[s->bufferOffset ^ 3] = data; +#endif + s->bufferOffset++; + if (s->bufferOffset == BLOCK_LENGTH) { + sha1_hashBlock(s); + s->bufferOffset = 0; + } +} + +void sha1_writebyte(sha1nfo *s, uint8_t data) { + ++s->byteCount; + sha1_addUncounted(s, data); +} + +void sha1_write(sha1nfo *s, const char *data, size_t len) { + for (;len--;) sha1_writebyte(s, (uint8_t) *data++); +} + +void sha1_pad(sha1nfo *s) { + // Implement SHA-1 padding (fips180-2 §5.1.1) + + // Pad with 0x80 followed by 0x00 until the end of the block + sha1_addUncounted(s, 0x80); + while (s->bufferOffset != 56) sha1_addUncounted(s, 0x00); + + // Append length in the last 8 bytes + sha1_addUncounted(s, 0); // We're only using 32 bit lengths + sha1_addUncounted(s, 0); // But SHA-1 supports 64 bit lengths + sha1_addUncounted(s, 0); // So zero pad the top bits + sha1_addUncounted(s, s->byteCount >> 29); // Shifting to multiply by 8 + sha1_addUncounted(s, s->byteCount >> 21); // as SHA-1 supports bitstreams as well as + sha1_addUncounted(s, s->byteCount >> 13); // byte. + sha1_addUncounted(s, s->byteCount >> 5); + sha1_addUncounted(s, s->byteCount << 3); +} + +uint8_t* sha1_result(sha1nfo *s) { + // Pad to complete the last block + sha1_pad(s); + +#ifndef SHA_BIG_ENDIAN + // Swap byte order back + int i; + for (i=0; i<5; i++) { + s->state[i]= + (((s->state[i])<<24)& 0xff000000) + | (((s->state[i])<<8) & 0x00ff0000) + | (((s->state[i])>>8) & 0x0000ff00) + | (((s->state[i])>>24)& 0x000000ff); + } +#endif + + // Return pointer to hash (20 characters) + return (uint8_t*) s->state; +} + +} // namespace; Added for LibFuzzer + +namespace fuzzer { + +// The rest is added for LibFuzzer +void ComputeSHA1(const uint8_t *Data, size_t Len, uint8_t *Out) { + sha1nfo s; + sha1_init(&s); + sha1_write(&s, (const char*)Data, Len); + memcpy(Out, sha1_result(&s), HASH_LENGTH); +} + +std::string Sha1ToString(const uint8_t Sha1[kSHA1NumBytes]) { + std::stringstream SS; + for (int i = 0; i < kSHA1NumBytes; i++) + SS << std::hex << std::setfill('0') << std::setw(2) << (unsigned)Sha1[i]; + return SS.str(); +} + +std::string Hash(const Unit &U) { + uint8_t Hash[kSHA1NumBytes]; + ComputeSHA1(U.data(), U.size(), Hash); + return Sha1ToString(Hash); +} + +} diff --git a/tools/fuzzing/libfuzzer/FuzzerSHA1.h b/tools/fuzzing/libfuzzer/FuzzerSHA1.h new file mode 100644 index 0000000000..05cbacda87 --- /dev/null +++ b/tools/fuzzing/libfuzzer/FuzzerSHA1.h @@ -0,0 +1,32 @@ +//===- FuzzerSHA1.h - Internal header for the SHA1 utils --------*- C++ -* ===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// SHA1 utils. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_FUZZER_SHA1_H +#define LLVM_FUZZER_SHA1_H + +#include "FuzzerDefs.h" +#include <cstddef> +#include <stdint.h> + +namespace fuzzer { + +// Private copy of SHA1 implementation. +static const int kSHA1NumBytes = 20; + +// Computes SHA1 hash of 'Len' bytes in 'Data', writes kSHA1NumBytes to 'Out'. +void ComputeSHA1(const uint8_t *Data, size_t Len, uint8_t *Out); + +std::string Sha1ToString(const uint8_t Sha1[kSHA1NumBytes]); + +std::string Hash(const Unit &U); + +} // namespace fuzzer + +#endif // LLVM_FUZZER_SHA1_H diff --git a/tools/fuzzing/libfuzzer/FuzzerTracePC.cpp b/tools/fuzzing/libfuzzer/FuzzerTracePC.cpp new file mode 100644 index 0000000000..fbceda39bc --- /dev/null +++ b/tools/fuzzing/libfuzzer/FuzzerTracePC.cpp @@ -0,0 +1,657 @@ +//===- FuzzerTracePC.cpp - PC tracing--------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Trace PCs. +// This module implements __sanitizer_cov_trace_pc_guard[_init], +// the callback required for -fsanitize-coverage=trace-pc-guard instrumentation. +// +//===----------------------------------------------------------------------===// + +#include "FuzzerTracePC.h" +#include "FuzzerBuiltins.h" +#include "FuzzerBuiltinsMsvc.h" +#include "FuzzerCorpus.h" +#include "FuzzerDefs.h" +#include "FuzzerDictionary.h" +#include "FuzzerExtFunctions.h" +#include "FuzzerIO.h" +#include "FuzzerPlatform.h" +#include "FuzzerUtil.h" +#include "FuzzerValueBitMap.h" +#include <set> + +// Used by -fsanitize-coverage=stack-depth to track stack depth +ATTRIBUTES_INTERFACE_TLS_INITIAL_EXEC uintptr_t __sancov_lowest_stack; + +namespace fuzzer { + +TracePC TPC; + +size_t TracePC::GetTotalPCCoverage() { + return ObservedPCs.size(); +} + + +void TracePC::HandleInline8bitCountersInit(uint8_t *Start, uint8_t *Stop) { + if (Start == Stop) return; + if (NumModules && + Modules[NumModules - 1].Start() == Start) + return; + assert(NumModules < + sizeof(Modules) / sizeof(Modules[0])); + auto &M = Modules[NumModules++]; + uint8_t *AlignedStart = RoundUpByPage(Start); + uint8_t *AlignedStop = RoundDownByPage(Stop); + size_t NumFullPages = AlignedStop > AlignedStart ? + (AlignedStop - AlignedStart) / PageSize() : 0; + bool NeedFirst = Start < AlignedStart || !NumFullPages; + bool NeedLast = Stop > AlignedStop && AlignedStop >= AlignedStart; + M.NumRegions = NumFullPages + NeedFirst + NeedLast;; + assert(M.NumRegions > 0); + M.Regions = new Module::Region[M.NumRegions]; + assert(M.Regions); + size_t R = 0; + if (NeedFirst) + M.Regions[R++] = {Start, std::min(Stop, AlignedStart), true, false}; + for (uint8_t *P = AlignedStart; P < AlignedStop; P += PageSize()) + M.Regions[R++] = {P, P + PageSize(), true, true}; + if (NeedLast) + M.Regions[R++] = {AlignedStop, Stop, true, false}; + assert(R == M.NumRegions); + assert(M.Size() == (size_t)(Stop - Start)); + assert(M.Stop() == Stop); + assert(M.Start() == Start); + NumInline8bitCounters += M.Size(); +} + +void TracePC::HandlePCsInit(const uintptr_t *Start, const uintptr_t *Stop) { + const PCTableEntry *B = reinterpret_cast<const PCTableEntry *>(Start); + const PCTableEntry *E = reinterpret_cast<const PCTableEntry *>(Stop); + if (NumPCTables && ModulePCTable[NumPCTables - 1].Start == B) return; + assert(NumPCTables < sizeof(ModulePCTable) / sizeof(ModulePCTable[0])); + ModulePCTable[NumPCTables++] = {B, E}; + NumPCsInPCTables += E - B; +} + +void TracePC::PrintModuleInfo() { + if (NumModules) { + Printf("INFO: Loaded %zd modules (%zd inline 8-bit counters): ", + NumModules, NumInline8bitCounters); + for (size_t i = 0; i < NumModules; i++) + Printf("%zd [%p, %p), ", Modules[i].Size(), Modules[i].Start(), + Modules[i].Stop()); + Printf("\n"); + } + if (NumPCTables) { + Printf("INFO: Loaded %zd PC tables (%zd PCs): ", NumPCTables, + NumPCsInPCTables); + for (size_t i = 0; i < NumPCTables; i++) { + Printf("%zd [%p,%p), ", ModulePCTable[i].Stop - ModulePCTable[i].Start, + ModulePCTable[i].Start, ModulePCTable[i].Stop); + } + Printf("\n"); + + if (NumInline8bitCounters && NumInline8bitCounters != NumPCsInPCTables) { + Printf("ERROR: The size of coverage PC tables does not match the\n" + "number of instrumented PCs. This might be a compiler bug,\n" + "please contact the libFuzzer developers.\n" + "Also check https://bugs.llvm.org/show_bug.cgi?id=34636\n" + "for possible workarounds (tl;dr: don't use the old GNU ld)\n"); + _Exit(1); + } + } + if (size_t NumExtraCounters = ExtraCountersEnd() - ExtraCountersBegin()) + Printf("INFO: %zd Extra Counters\n", NumExtraCounters); +} + +ATTRIBUTE_NO_SANITIZE_ALL +void TracePC::HandleCallerCallee(uintptr_t Caller, uintptr_t Callee) { + const uintptr_t kBits = 12; + const uintptr_t kMask = (1 << kBits) - 1; + uintptr_t Idx = (Caller & kMask) | ((Callee & kMask) << kBits); + ValueProfileMap.AddValueModPrime(Idx); +} + +/// \return the address of the previous instruction. +/// Note: the logic is copied from `sanitizer_common/sanitizer_stacktrace.h` +inline ALWAYS_INLINE uintptr_t GetPreviousInstructionPc(uintptr_t PC) { +#if defined(__arm__) + // T32 (Thumb) branch instructions might be 16 or 32 bit long, + // so we return (pc-2) in that case in order to be safe. + // For A32 mode we return (pc-4) because all instructions are 32 bit long. + return (PC - 3) & (~1); +#elif defined(__powerpc__) || defined(__powerpc64__) || defined(__aarch64__) + // PCs are always 4 byte aligned. + return PC - 4; +#elif defined(__sparc__) || defined(__mips__) + return PC - 8; +#else + return PC - 1; +#endif +} + +/// \return the address of the next instruction. +/// Note: the logic is copied from `sanitizer_common/sanitizer_stacktrace.cpp` +ALWAYS_INLINE uintptr_t TracePC::GetNextInstructionPc(uintptr_t PC) { +#if defined(__mips__) + return PC + 8; +#elif defined(__powerpc__) || defined(__sparc__) || defined(__arm__) || \ + defined(__aarch64__) + return PC + 4; +#else + return PC + 1; +#endif +} + +void TracePC::UpdateObservedPCs() { + Vector<uintptr_t> CoveredFuncs; + auto ObservePC = [&](const PCTableEntry *TE) { + if (ObservedPCs.insert(TE).second && DoPrintNewPCs) { + PrintPC("\tNEW_PC: %p %F %L", "\tNEW_PC: %p", + GetNextInstructionPc(TE->PC)); + Printf("\n"); + } + }; + + auto Observe = [&](const PCTableEntry *TE) { + if (PcIsFuncEntry(TE)) + if (++ObservedFuncs[TE->PC] == 1 && NumPrintNewFuncs) + CoveredFuncs.push_back(TE->PC); + ObservePC(TE); + }; + + if (NumPCsInPCTables) { + if (NumInline8bitCounters == NumPCsInPCTables) { + for (size_t i = 0; i < NumModules; i++) { + auto &M = Modules[i]; + assert(M.Size() == + (size_t)(ModulePCTable[i].Stop - ModulePCTable[i].Start)); + for (size_t r = 0; r < M.NumRegions; r++) { + auto &R = M.Regions[r]; + if (!R.Enabled) continue; + for (uint8_t *P = R.Start; P < R.Stop; P++) + if (*P) + Observe(&ModulePCTable[i].Start[M.Idx(P)]); + } + } + } + } + + for (size_t i = 0, N = Min(CoveredFuncs.size(), NumPrintNewFuncs); i < N; + i++) { + Printf("\tNEW_FUNC[%zd/%zd]: ", i + 1, CoveredFuncs.size()); + PrintPC("%p %F %L", "%p", GetNextInstructionPc(CoveredFuncs[i])); + Printf("\n"); + } +} + +uintptr_t TracePC::PCTableEntryIdx(const PCTableEntry *TE) { + size_t TotalTEs = 0; + for (size_t i = 0; i < NumPCTables; i++) { + auto &M = ModulePCTable[i]; + if (TE >= M.Start && TE < M.Stop) + return TotalTEs + TE - M.Start; + TotalTEs += M.Stop - M.Start; + } + assert(0); + return 0; +} + +const TracePC::PCTableEntry *TracePC::PCTableEntryByIdx(uintptr_t Idx) { + for (size_t i = 0; i < NumPCTables; i++) { + auto &M = ModulePCTable[i]; + size_t Size = M.Stop - M.Start; + if (Idx < Size) return &M.Start[Idx]; + Idx -= Size; + } + return nullptr; +} + +static std::string GetModuleName(uintptr_t PC) { + char ModulePathRaw[4096] = ""; // What's PATH_MAX in portable C++? + void *OffsetRaw = nullptr; + if (!EF->__sanitizer_get_module_and_offset_for_pc( + reinterpret_cast<void *>(PC), ModulePathRaw, + sizeof(ModulePathRaw), &OffsetRaw)) + return ""; + return ModulePathRaw; +} + +template<class CallBack> +void TracePC::IterateCoveredFunctions(CallBack CB) { + for (size_t i = 0; i < NumPCTables; i++) { + auto &M = ModulePCTable[i]; + assert(M.Start < M.Stop); + auto ModuleName = GetModuleName(M.Start->PC); + for (auto NextFE = M.Start; NextFE < M.Stop; ) { + auto FE = NextFE; + assert(PcIsFuncEntry(FE) && "Not a function entry point"); + do { + NextFE++; + } while (NextFE < M.Stop && !(PcIsFuncEntry(NextFE))); + CB(FE, NextFE, ObservedFuncs[FE->PC]); + } + } +} + +int TracePC::SetFocusFunction(const std::string &FuncName) { + // This function should be called once. + assert(!FocusFunctionCounterPtr); + // "auto" is not a valid function name. If this function is called with "auto" + // that means the auto focus functionality failed. + if (FuncName.empty() || FuncName == "auto") + return 0; + for (size_t M = 0; M < NumModules; M++) { + auto &PCTE = ModulePCTable[M]; + size_t N = PCTE.Stop - PCTE.Start; + for (size_t I = 0; I < N; I++) { + if (!(PcIsFuncEntry(&PCTE.Start[I]))) continue; // not a function entry. + auto Name = DescribePC("%F", GetNextInstructionPc(PCTE.Start[I].PC)); + if (Name[0] == 'i' && Name[1] == 'n' && Name[2] == ' ') + Name = Name.substr(3, std::string::npos); + if (FuncName != Name) continue; + Printf("INFO: Focus function is set to '%s'\n", Name.c_str()); + FocusFunctionCounterPtr = Modules[M].Start() + I; + return 0; + } + } + + Printf("ERROR: Failed to set focus function. Make sure the function name is " + "valid (%s) and symbolization is enabled.\n", FuncName.c_str()); + return 1; +} + +bool TracePC::ObservedFocusFunction() { + return FocusFunctionCounterPtr && *FocusFunctionCounterPtr; +} + +void TracePC::PrintCoverage() { + if (!EF->__sanitizer_symbolize_pc || + !EF->__sanitizer_get_module_and_offset_for_pc) { + Printf("INFO: __sanitizer_symbolize_pc or " + "__sanitizer_get_module_and_offset_for_pc is not available," + " not printing coverage\n"); + return; + } + Printf("COVERAGE:\n"); + auto CoveredFunctionCallback = [&](const PCTableEntry *First, + const PCTableEntry *Last, + uintptr_t Counter) { + assert(First < Last); + auto VisualizePC = GetNextInstructionPc(First->PC); + std::string FileStr = DescribePC("%s", VisualizePC); + if (!IsInterestingCoverageFile(FileStr)) + return; + std::string FunctionStr = DescribePC("%F", VisualizePC); + if (FunctionStr.find("in ") == 0) + FunctionStr = FunctionStr.substr(3); + std::string LineStr = DescribePC("%l", VisualizePC); + size_t NumEdges = Last - First; + Vector<uintptr_t> UncoveredPCs; + for (auto TE = First; TE < Last; TE++) + if (!ObservedPCs.count(TE)) + UncoveredPCs.push_back(TE->PC); + Printf("%sCOVERED_FUNC: hits: %zd", Counter ? "" : "UN", Counter); + Printf(" edges: %zd/%zd", NumEdges - UncoveredPCs.size(), NumEdges); + Printf(" %s %s:%s\n", FunctionStr.c_str(), FileStr.c_str(), + LineStr.c_str()); + if (Counter) + for (auto PC : UncoveredPCs) + Printf(" UNCOVERED_PC: %s\n", + DescribePC("%s:%l", GetNextInstructionPc(PC)).c_str()); + }; + + IterateCoveredFunctions(CoveredFunctionCallback); +} + +// Value profile. +// We keep track of various values that affect control flow. +// These values are inserted into a bit-set-based hash map. +// Every new bit in the map is treated as a new coverage. +// +// For memcmp/strcmp/etc the interesting value is the length of the common +// prefix of the parameters. +// For cmp instructions the interesting value is a XOR of the parameters. +// The interesting value is mixed up with the PC and is then added to the map. + +ATTRIBUTE_NO_SANITIZE_ALL +void TracePC::AddValueForMemcmp(void *caller_pc, const void *s1, const void *s2, + size_t n, bool StopAtZero) { + if (!n) return; + size_t Len = std::min(n, Word::GetMaxSize()); + const uint8_t *A1 = reinterpret_cast<const uint8_t *>(s1); + const uint8_t *A2 = reinterpret_cast<const uint8_t *>(s2); + uint8_t B1[Word::kMaxSize]; + uint8_t B2[Word::kMaxSize]; + // Copy the data into locals in this non-msan-instrumented function + // to avoid msan complaining further. + size_t Hash = 0; // Compute some simple hash of both strings. + for (size_t i = 0; i < Len; i++) { + B1[i] = A1[i]; + B2[i] = A2[i]; + size_t T = B1[i]; + Hash ^= (T << 8) | B2[i]; + } + size_t I = 0; + uint8_t HammingDistance = 0; + for (; I < Len; I++) { + if (B1[I] != B2[I] || (StopAtZero && B1[I] == 0)) { + HammingDistance = Popcountll(B1[I] ^ B2[I]); + break; + } + } + size_t PC = reinterpret_cast<size_t>(caller_pc); + size_t Idx = (PC & 4095) | (I << 12); + Idx += HammingDistance; + ValueProfileMap.AddValue(Idx); + TORCW.Insert(Idx ^ Hash, Word(B1, Len), Word(B2, Len)); +} + +template <class T> +ATTRIBUTE_TARGET_POPCNT ALWAYS_INLINE +ATTRIBUTE_NO_SANITIZE_ALL +void TracePC::HandleCmp(uintptr_t PC, T Arg1, T Arg2) { + uint64_t ArgXor = Arg1 ^ Arg2; + if (sizeof(T) == 4) + TORC4.Insert(ArgXor, Arg1, Arg2); + else if (sizeof(T) == 8) + TORC8.Insert(ArgXor, Arg1, Arg2); + uint64_t HammingDistance = Popcountll(ArgXor); // [0,64] + uint64_t AbsoluteDistance = (Arg1 == Arg2 ? 0 : Clzll(Arg1 - Arg2) + 1); + ValueProfileMap.AddValue(PC * 128 + HammingDistance); + ValueProfileMap.AddValue(PC * 128 + 64 + AbsoluteDistance); +} + +static size_t InternalStrnlen(const char *S, size_t MaxLen) { + size_t Len = 0; + for (; Len < MaxLen && S[Len]; Len++) {} + return Len; +} + +// Finds min of (strlen(S1), strlen(S2)). +// Needed bacause one of these strings may actually be non-zero terminated. +static size_t InternalStrnlen2(const char *S1, const char *S2) { + size_t Len = 0; + for (; S1[Len] && S2[Len]; Len++) {} + return Len; +} + +void TracePC::ClearInlineCounters() { + IterateCounterRegions([](const Module::Region &R){ + if (R.Enabled) + memset(R.Start, 0, R.Stop - R.Start); + }); +} + +ATTRIBUTE_NO_SANITIZE_ALL +void TracePC::RecordInitialStack() { + int stack; + __sancov_lowest_stack = InitialStack = reinterpret_cast<uintptr_t>(&stack); +} + +uintptr_t TracePC::GetMaxStackOffset() const { + return InitialStack - __sancov_lowest_stack; // Stack grows down +} + +void WarnAboutDeprecatedInstrumentation(const char *flag) { + // Use RawPrint because Printf cannot be used on Windows before OutputFile is + // initialized. + RawPrint(flag); + RawPrint( + " is no longer supported by libFuzzer.\n" + "Please either migrate to a compiler that supports -fsanitize=fuzzer\n" + "or use an older version of libFuzzer\n"); + exit(1); +} + +} // namespace fuzzer + +extern "C" { +ATTRIBUTE_INTERFACE +ATTRIBUTE_NO_SANITIZE_ALL +void __sanitizer_cov_trace_pc_guard(uint32_t *Guard) { + fuzzer::WarnAboutDeprecatedInstrumentation( + "-fsanitize-coverage=trace-pc-guard"); +} + +// Best-effort support for -fsanitize-coverage=trace-pc, which is available +// in both Clang and GCC. +ATTRIBUTE_INTERFACE +ATTRIBUTE_NO_SANITIZE_ALL +void __sanitizer_cov_trace_pc() { + fuzzer::WarnAboutDeprecatedInstrumentation("-fsanitize-coverage=trace-pc"); +} + +ATTRIBUTE_INTERFACE +void __sanitizer_cov_trace_pc_guard_init(uint32_t *Start, uint32_t *Stop) { + fuzzer::WarnAboutDeprecatedInstrumentation( + "-fsanitize-coverage=trace-pc-guard"); +} + +ATTRIBUTE_INTERFACE +void __sanitizer_cov_8bit_counters_init(uint8_t *Start, uint8_t *Stop) { + fuzzer::TPC.HandleInline8bitCountersInit(Start, Stop); +} + +ATTRIBUTE_INTERFACE +void __sanitizer_cov_pcs_init(const uintptr_t *pcs_beg, + const uintptr_t *pcs_end) { + fuzzer::TPC.HandlePCsInit(pcs_beg, pcs_end); +} + +ATTRIBUTE_INTERFACE +ATTRIBUTE_NO_SANITIZE_ALL +void __sanitizer_cov_trace_pc_indir(uintptr_t Callee) { + uintptr_t PC = reinterpret_cast<uintptr_t>(GET_CALLER_PC()); + fuzzer::TPC.HandleCallerCallee(PC, Callee); +} + +ATTRIBUTE_INTERFACE +ATTRIBUTE_NO_SANITIZE_ALL +ATTRIBUTE_TARGET_POPCNT +void __sanitizer_cov_trace_cmp8(uint64_t Arg1, uint64_t Arg2) { + uintptr_t PC = reinterpret_cast<uintptr_t>(GET_CALLER_PC()); + fuzzer::TPC.HandleCmp(PC, Arg1, Arg2); +} + +ATTRIBUTE_INTERFACE +ATTRIBUTE_NO_SANITIZE_ALL +ATTRIBUTE_TARGET_POPCNT +// Now the __sanitizer_cov_trace_const_cmp[1248] callbacks just mimic +// the behaviour of __sanitizer_cov_trace_cmp[1248] ones. This, however, +// should be changed later to make full use of instrumentation. +void __sanitizer_cov_trace_const_cmp8(uint64_t Arg1, uint64_t Arg2) { + uintptr_t PC = reinterpret_cast<uintptr_t>(GET_CALLER_PC()); + fuzzer::TPC.HandleCmp(PC, Arg1, Arg2); +} + +ATTRIBUTE_INTERFACE +ATTRIBUTE_NO_SANITIZE_ALL +ATTRIBUTE_TARGET_POPCNT +void __sanitizer_cov_trace_cmp4(uint32_t Arg1, uint32_t Arg2) { + uintptr_t PC = reinterpret_cast<uintptr_t>(GET_CALLER_PC()); + fuzzer::TPC.HandleCmp(PC, Arg1, Arg2); +} + +ATTRIBUTE_INTERFACE +ATTRIBUTE_NO_SANITIZE_ALL +ATTRIBUTE_TARGET_POPCNT +void __sanitizer_cov_trace_const_cmp4(uint32_t Arg1, uint32_t Arg2) { + uintptr_t PC = reinterpret_cast<uintptr_t>(GET_CALLER_PC()); + fuzzer::TPC.HandleCmp(PC, Arg1, Arg2); +} + +ATTRIBUTE_INTERFACE +ATTRIBUTE_NO_SANITIZE_ALL +ATTRIBUTE_TARGET_POPCNT +void __sanitizer_cov_trace_cmp2(uint16_t Arg1, uint16_t Arg2) { + uintptr_t PC = reinterpret_cast<uintptr_t>(GET_CALLER_PC()); + fuzzer::TPC.HandleCmp(PC, Arg1, Arg2); +} + +ATTRIBUTE_INTERFACE +ATTRIBUTE_NO_SANITIZE_ALL +ATTRIBUTE_TARGET_POPCNT +void __sanitizer_cov_trace_const_cmp2(uint16_t Arg1, uint16_t Arg2) { + uintptr_t PC = reinterpret_cast<uintptr_t>(GET_CALLER_PC()); + fuzzer::TPC.HandleCmp(PC, Arg1, Arg2); +} + +ATTRIBUTE_INTERFACE +ATTRIBUTE_NO_SANITIZE_ALL +ATTRIBUTE_TARGET_POPCNT +void __sanitizer_cov_trace_cmp1(uint8_t Arg1, uint8_t Arg2) { + uintptr_t PC = reinterpret_cast<uintptr_t>(GET_CALLER_PC()); + fuzzer::TPC.HandleCmp(PC, Arg1, Arg2); +} + +ATTRIBUTE_INTERFACE +ATTRIBUTE_NO_SANITIZE_ALL +ATTRIBUTE_TARGET_POPCNT +void __sanitizer_cov_trace_const_cmp1(uint8_t Arg1, uint8_t Arg2) { + uintptr_t PC = reinterpret_cast<uintptr_t>(GET_CALLER_PC()); + fuzzer::TPC.HandleCmp(PC, Arg1, Arg2); +} + +ATTRIBUTE_INTERFACE +ATTRIBUTE_NO_SANITIZE_ALL +ATTRIBUTE_TARGET_POPCNT +void __sanitizer_cov_trace_switch(uint64_t Val, uint64_t *Cases) { + uint64_t N = Cases[0]; + uint64_t ValSizeInBits = Cases[1]; + uint64_t *Vals = Cases + 2; + // Skip the most common and the most boring case: all switch values are small. + // We may want to skip this at compile-time, but it will make the + // instrumentation less general. + if (Vals[N - 1] < 256) + return; + // Also skip small inputs values, they won't give good signal. + if (Val < 256) + return; + uintptr_t PC = reinterpret_cast<uintptr_t>(GET_CALLER_PC()); + size_t i; + uint64_t Smaller = 0; + uint64_t Larger = ~(uint64_t)0; + // Find two switch values such that Smaller < Val < Larger. + // Use 0 and 0xfff..f as the defaults. + for (i = 0; i < N; i++) { + if (Val < Vals[i]) { + Larger = Vals[i]; + break; + } + if (Val > Vals[i]) Smaller = Vals[i]; + } + + // Apply HandleCmp to {Val,Smaller} and {Val, Larger}, + // use i as the PC modifier for HandleCmp. + if (ValSizeInBits == 16) { + fuzzer::TPC.HandleCmp(PC + 2 * i, static_cast<uint16_t>(Val), + (uint16_t)(Smaller)); + fuzzer::TPC.HandleCmp(PC + 2 * i + 1, static_cast<uint16_t>(Val), + (uint16_t)(Larger)); + } else if (ValSizeInBits == 32) { + fuzzer::TPC.HandleCmp(PC + 2 * i, static_cast<uint32_t>(Val), + (uint32_t)(Smaller)); + fuzzer::TPC.HandleCmp(PC + 2 * i + 1, static_cast<uint32_t>(Val), + (uint32_t)(Larger)); + } else { + fuzzer::TPC.HandleCmp(PC + 2*i, Val, Smaller); + fuzzer::TPC.HandleCmp(PC + 2*i + 1, Val, Larger); + } +} + +ATTRIBUTE_INTERFACE +ATTRIBUTE_NO_SANITIZE_ALL +ATTRIBUTE_TARGET_POPCNT +void __sanitizer_cov_trace_div4(uint32_t Val) { + uintptr_t PC = reinterpret_cast<uintptr_t>(GET_CALLER_PC()); + fuzzer::TPC.HandleCmp(PC, Val, (uint32_t)0); +} + +ATTRIBUTE_INTERFACE +ATTRIBUTE_NO_SANITIZE_ALL +ATTRIBUTE_TARGET_POPCNT +void __sanitizer_cov_trace_div8(uint64_t Val) { + uintptr_t PC = reinterpret_cast<uintptr_t>(GET_CALLER_PC()); + fuzzer::TPC.HandleCmp(PC, Val, (uint64_t)0); +} + +ATTRIBUTE_INTERFACE +ATTRIBUTE_NO_SANITIZE_ALL +ATTRIBUTE_TARGET_POPCNT +void __sanitizer_cov_trace_gep(uintptr_t Idx) { + uintptr_t PC = reinterpret_cast<uintptr_t>(GET_CALLER_PC()); + fuzzer::TPC.HandleCmp(PC, Idx, (uintptr_t)0); +} + +ATTRIBUTE_INTERFACE ATTRIBUTE_NO_SANITIZE_MEMORY +void __sanitizer_weak_hook_memcmp(void *caller_pc, const void *s1, + const void *s2, size_t n, int result) { + if (!fuzzer::RunningUserCallback) return; + if (result == 0) return; // No reason to mutate. + if (n <= 1) return; // Not interesting. + fuzzer::TPC.AddValueForMemcmp(caller_pc, s1, s2, n, /*StopAtZero*/false); +} + +ATTRIBUTE_INTERFACE ATTRIBUTE_NO_SANITIZE_MEMORY +void __sanitizer_weak_hook_strncmp(void *caller_pc, const char *s1, + const char *s2, size_t n, int result) { + if (!fuzzer::RunningUserCallback) return; + if (result == 0) return; // No reason to mutate. + size_t Len1 = fuzzer::InternalStrnlen(s1, n); + size_t Len2 = fuzzer::InternalStrnlen(s2, n); + n = std::min(n, Len1); + n = std::min(n, Len2); + if (n <= 1) return; // Not interesting. + fuzzer::TPC.AddValueForMemcmp(caller_pc, s1, s2, n, /*StopAtZero*/true); +} + +ATTRIBUTE_INTERFACE ATTRIBUTE_NO_SANITIZE_MEMORY +void __sanitizer_weak_hook_strcmp(void *caller_pc, const char *s1, + const char *s2, int result) { + if (!fuzzer::RunningUserCallback) return; + if (result == 0) return; // No reason to mutate. + size_t N = fuzzer::InternalStrnlen2(s1, s2); + if (N <= 1) return; // Not interesting. + fuzzer::TPC.AddValueForMemcmp(caller_pc, s1, s2, N, /*StopAtZero*/true); +} + +ATTRIBUTE_INTERFACE ATTRIBUTE_NO_SANITIZE_MEMORY +void __sanitizer_weak_hook_strncasecmp(void *called_pc, const char *s1, + const char *s2, size_t n, int result) { + if (!fuzzer::RunningUserCallback) return; + return __sanitizer_weak_hook_strncmp(called_pc, s1, s2, n, result); +} + +ATTRIBUTE_INTERFACE ATTRIBUTE_NO_SANITIZE_MEMORY +void __sanitizer_weak_hook_strcasecmp(void *called_pc, const char *s1, + const char *s2, int result) { + if (!fuzzer::RunningUserCallback) return; + return __sanitizer_weak_hook_strcmp(called_pc, s1, s2, result); +} + +ATTRIBUTE_INTERFACE ATTRIBUTE_NO_SANITIZE_MEMORY +void __sanitizer_weak_hook_strstr(void *called_pc, const char *s1, + const char *s2, char *result) { + if (!fuzzer::RunningUserCallback) return; + fuzzer::TPC.MMT.Add(reinterpret_cast<const uint8_t *>(s2), strlen(s2)); +} + +ATTRIBUTE_INTERFACE ATTRIBUTE_NO_SANITIZE_MEMORY +void __sanitizer_weak_hook_strcasestr(void *called_pc, const char *s1, + const char *s2, char *result) { + if (!fuzzer::RunningUserCallback) return; + fuzzer::TPC.MMT.Add(reinterpret_cast<const uint8_t *>(s2), strlen(s2)); +} + +ATTRIBUTE_INTERFACE ATTRIBUTE_NO_SANITIZE_MEMORY +void __sanitizer_weak_hook_memmem(void *called_pc, const void *s1, size_t len1, + const void *s2, size_t len2, void *result) { + if (!fuzzer::RunningUserCallback) return; + fuzzer::TPC.MMT.Add(reinterpret_cast<const uint8_t *>(s2), len2); +} +} // extern "C" diff --git a/tools/fuzzing/libfuzzer/FuzzerTracePC.h b/tools/fuzzing/libfuzzer/FuzzerTracePC.h new file mode 100644 index 0000000000..b46ebb909d --- /dev/null +++ b/tools/fuzzing/libfuzzer/FuzzerTracePC.h @@ -0,0 +1,289 @@ +//===- FuzzerTracePC.h - Internal header for the Fuzzer ---------*- C++ -* ===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// fuzzer::TracePC +//===----------------------------------------------------------------------===// + +#ifndef LLVM_FUZZER_TRACE_PC +#define LLVM_FUZZER_TRACE_PC + +#include "FuzzerDefs.h" +#include "FuzzerDictionary.h" +#include "FuzzerValueBitMap.h" + +#include <set> +#include <unordered_map> + +namespace fuzzer { + +// TableOfRecentCompares (TORC) remembers the most recently performed +// comparisons of type T. +// We record the arguments of CMP instructions in this table unconditionally +// because it seems cheaper this way than to compute some expensive +// conditions inside __sanitizer_cov_trace_cmp*. +// After the unit has been executed we may decide to use the contents of +// this table to populate a Dictionary. +template<class T, size_t kSizeT> +struct TableOfRecentCompares { + static const size_t kSize = kSizeT; + struct Pair { + T A, B; + }; + ATTRIBUTE_NO_SANITIZE_ALL + void Insert(size_t Idx, const T &Arg1, const T &Arg2) { + Idx = Idx % kSize; + Table[Idx].A = Arg1; + Table[Idx].B = Arg2; + } + + Pair Get(size_t I) { return Table[I % kSize]; } + + Pair Table[kSize]; +}; + +template <size_t kSizeT> +struct MemMemTable { + static const size_t kSize = kSizeT; + Word MemMemWords[kSize]; + Word EmptyWord; + + void Add(const uint8_t *Data, size_t Size) { + if (Size <= 2) return; + Size = std::min(Size, Word::GetMaxSize()); + size_t Idx = SimpleFastHash(Data, Size) % kSize; + MemMemWords[Idx].Set(Data, Size); + } + const Word &Get(size_t Idx) { + for (size_t i = 0; i < kSize; i++) { + const Word &W = MemMemWords[(Idx + i) % kSize]; + if (W.size()) return W; + } + EmptyWord.Set(nullptr, 0); + return EmptyWord; + } +}; + +class TracePC { + public: + void HandleInline8bitCountersInit(uint8_t *Start, uint8_t *Stop); + void HandlePCsInit(const uintptr_t *Start, const uintptr_t *Stop); + void HandleCallerCallee(uintptr_t Caller, uintptr_t Callee); + template <class T> void HandleCmp(uintptr_t PC, T Arg1, T Arg2); + size_t GetTotalPCCoverage(); + void SetUseCounters(bool UC) { UseCounters = UC; } + void SetUseValueProfileMask(uint32_t VPMask) { UseValueProfileMask = VPMask; } + void SetPrintNewPCs(bool P) { DoPrintNewPCs = P; } + void SetPrintNewFuncs(size_t P) { NumPrintNewFuncs = P; } + void UpdateObservedPCs(); + template <class Callback> void CollectFeatures(Callback CB) const; + + void ResetMaps() { + ValueProfileMap.Reset(); + ClearExtraCounters(); + ClearInlineCounters(); + } + + void ClearInlineCounters(); + + void UpdateFeatureSet(size_t CurrentElementIdx, size_t CurrentElementSize); + void PrintFeatureSet(); + + void PrintModuleInfo(); + + void PrintCoverage(); + + template<class CallBack> + void IterateCoveredFunctions(CallBack CB); + + void AddValueForMemcmp(void *caller_pc, const void *s1, const void *s2, + size_t n, bool StopAtZero); + + TableOfRecentCompares<uint32_t, 32> TORC4; + TableOfRecentCompares<uint64_t, 32> TORC8; + TableOfRecentCompares<Word, 32> TORCW; + MemMemTable<1024> MMT; + + void RecordInitialStack(); + uintptr_t GetMaxStackOffset() const; + + template<class CallBack> + void ForEachObservedPC(CallBack CB) { + for (auto PC : ObservedPCs) + CB(PC); + } + + int SetFocusFunction(const std::string &FuncName); + bool ObservedFocusFunction(); + + struct PCTableEntry { + uintptr_t PC, PCFlags; + }; + + uintptr_t PCTableEntryIdx(const PCTableEntry *TE); + const PCTableEntry *PCTableEntryByIdx(uintptr_t Idx); + static uintptr_t GetNextInstructionPc(uintptr_t PC); + bool PcIsFuncEntry(const PCTableEntry *TE) { return TE->PCFlags & 1; } + +private: + bool UseCounters = false; + uint32_t UseValueProfileMask = false; + bool DoPrintNewPCs = false; + size_t NumPrintNewFuncs = 0; + + // Module represents the array of 8-bit counters split into regions + // such that every region, except maybe the first and the last one, is one + // full page. + struct Module { + struct Region { + uint8_t *Start, *Stop; + bool Enabled; + bool OneFullPage; + }; + Region *Regions; + size_t NumRegions; + uint8_t *Start() { return Regions[0].Start; } + uint8_t *Stop() { return Regions[NumRegions - 1].Stop; } + size_t Size() { return Stop() - Start(); } + size_t Idx(uint8_t *P) { + assert(P >= Start() && P < Stop()); + return P - Start(); + } + }; + + Module Modules[4096]; + size_t NumModules; // linker-initialized. + size_t NumInline8bitCounters; + + template <class Callback> + void IterateCounterRegions(Callback CB) { + for (size_t m = 0; m < NumModules; m++) + for (size_t r = 0; r < Modules[m].NumRegions; r++) + CB(Modules[m].Regions[r]); + } + + struct { const PCTableEntry *Start, *Stop; } ModulePCTable[4096]; + size_t NumPCTables; + size_t NumPCsInPCTables; + + Set<const PCTableEntry*> ObservedPCs; + std::unordered_map<uintptr_t, uintptr_t> ObservedFuncs; // PC => Counter. + + uint8_t *FocusFunctionCounterPtr = nullptr; + + ValueBitMap ValueProfileMap; + uintptr_t InitialStack; +}; + +template <class Callback> +// void Callback(size_t FirstFeature, size_t Idx, uint8_t Value); +ATTRIBUTE_NO_SANITIZE_ALL +size_t ForEachNonZeroByte(const uint8_t *Begin, const uint8_t *End, + size_t FirstFeature, Callback Handle8bitCounter) { + typedef uintptr_t LargeType; + const size_t Step = sizeof(LargeType) / sizeof(uint8_t); + const size_t StepMask = Step - 1; + auto P = Begin; + // Iterate by 1 byte until either the alignment boundary or the end. + for (; reinterpret_cast<uintptr_t>(P) & StepMask && P < End; P++) + if (uint8_t V = *P) + Handle8bitCounter(FirstFeature, P - Begin, V); + + // Iterate by Step bytes at a time. + for (; P < End; P += Step) + if (LargeType Bundle = *reinterpret_cast<const LargeType *>(P)) + for (size_t I = 0; I < Step; I++, Bundle >>= 8) + if (uint8_t V = Bundle & 0xff) + Handle8bitCounter(FirstFeature, P - Begin + I, V); + + // Iterate by 1 byte until the end. + for (; P < End; P++) + if (uint8_t V = *P) + Handle8bitCounter(FirstFeature, P - Begin, V); + return End - Begin; +} + +// Given a non-zero Counter returns a number in the range [0,7]. +template<class T> +unsigned CounterToFeature(T Counter) { + // Returns a feature number by placing Counters into buckets as illustrated + // below. + // + // Counter bucket: [1] [2] [3] [4-7] [8-15] [16-31] [32-127] [128+] + // Feature number: 0 1 2 3 4 5 6 7 + // + // This is a heuristic taken from AFL (see + // http://lcamtuf.coredump.cx/afl/technical_details.txt). + // + // This implementation may change in the future so clients should + // not rely on it. + assert(Counter); + unsigned Bit = 0; + /**/ if (Counter >= 128) Bit = 7; + else if (Counter >= 32) Bit = 6; + else if (Counter >= 16) Bit = 5; + else if (Counter >= 8) Bit = 4; + else if (Counter >= 4) Bit = 3; + else if (Counter >= 3) Bit = 2; + else if (Counter >= 2) Bit = 1; + return Bit; +} + +template <class Callback> // void Callback(size_t Feature) +ATTRIBUTE_NO_SANITIZE_ADDRESS +ATTRIBUTE_NOINLINE +void TracePC::CollectFeatures(Callback HandleFeature) const { + auto Handle8bitCounter = [&](size_t FirstFeature, + size_t Idx, uint8_t Counter) { + if (UseCounters) + HandleFeature(FirstFeature + Idx * 8 + CounterToFeature(Counter)); + else + HandleFeature(FirstFeature + Idx); + }; + + size_t FirstFeature = 0; + + for (size_t i = 0; i < NumModules; i++) { + for (size_t r = 0; r < Modules[i].NumRegions; r++) { + if (!Modules[i].Regions[r].Enabled) continue; + FirstFeature += 8 * ForEachNonZeroByte(Modules[i].Regions[r].Start, + Modules[i].Regions[r].Stop, + FirstFeature, Handle8bitCounter); + } + } + + FirstFeature += + 8 * ForEachNonZeroByte(ExtraCountersBegin(), ExtraCountersEnd(), + FirstFeature, Handle8bitCounter); + + if (UseValueProfileMask) { + ValueProfileMap.ForEach([&](size_t Idx) { + HandleFeature(FirstFeature + Idx); + }); + FirstFeature += ValueProfileMap.SizeInBits(); + } + + // Step function, grows similar to 8 * Log_2(A). + auto StackDepthStepFunction = [](uint32_t A) -> uint32_t { + if (!A) return A; + uint32_t Log2 = Log(A); + if (Log2 < 3) return A; + Log2 -= 3; + return (Log2 + 1) * 8 + ((A >> Log2) & 7); + }; + assert(StackDepthStepFunction(1024) == 64); + assert(StackDepthStepFunction(1024 * 4) == 80); + assert(StackDepthStepFunction(1024 * 1024) == 144); + + if (auto MaxStackOffset = GetMaxStackOffset()) + HandleFeature(FirstFeature + StackDepthStepFunction(MaxStackOffset / 8)); +} + +extern TracePC TPC; + +} // namespace fuzzer + +#endif // LLVM_FUZZER_TRACE_PC diff --git a/tools/fuzzing/libfuzzer/FuzzerUtil.cpp b/tools/fuzzing/libfuzzer/FuzzerUtil.cpp new file mode 100644 index 0000000000..7eecb68d07 --- /dev/null +++ b/tools/fuzzing/libfuzzer/FuzzerUtil.cpp @@ -0,0 +1,236 @@ +//===- FuzzerUtil.cpp - Misc utils ----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Misc utils. +//===----------------------------------------------------------------------===// + +#include "FuzzerUtil.h" +#include "FuzzerIO.h" +#include "FuzzerInternal.h" +#include <cassert> +#include <chrono> +#include <cstring> +#include <errno.h> +#include <mutex> +#include <signal.h> +#include <sstream> +#include <stdio.h> +#include <sys/types.h> +#include <thread> + +namespace fuzzer { + +void PrintHexArray(const uint8_t *Data, size_t Size, + const char *PrintAfter) { + for (size_t i = 0; i < Size; i++) + Printf("0x%x,", (unsigned)Data[i]); + Printf("%s", PrintAfter); +} + +void Print(const Unit &v, const char *PrintAfter) { + PrintHexArray(v.data(), v.size(), PrintAfter); +} + +void PrintASCIIByte(uint8_t Byte) { + if (Byte == '\\') + Printf("\\\\"); + else if (Byte == '"') + Printf("\\\""); + else if (Byte >= 32 && Byte < 127) + Printf("%c", Byte); + else + Printf("\\x%02x", Byte); +} + +void PrintASCII(const uint8_t *Data, size_t Size, const char *PrintAfter) { + for (size_t i = 0; i < Size; i++) + PrintASCIIByte(Data[i]); + Printf("%s", PrintAfter); +} + +void PrintASCII(const Unit &U, const char *PrintAfter) { + PrintASCII(U.data(), U.size(), PrintAfter); +} + +bool ToASCII(uint8_t *Data, size_t Size) { + bool Changed = false; + for (size_t i = 0; i < Size; i++) { + uint8_t &X = Data[i]; + auto NewX = X; + NewX &= 127; + if (!isspace(NewX) && !isprint(NewX)) + NewX = ' '; + Changed |= NewX != X; + X = NewX; + } + return Changed; +} + +bool IsASCII(const Unit &U) { return IsASCII(U.data(), U.size()); } + +bool IsASCII(const uint8_t *Data, size_t Size) { + for (size_t i = 0; i < Size; i++) + if (!(isprint(Data[i]) || isspace(Data[i]))) return false; + return true; +} + +bool ParseOneDictionaryEntry(const std::string &Str, Unit *U) { + U->clear(); + if (Str.empty()) return false; + size_t L = 0, R = Str.size() - 1; // We are parsing the range [L,R]. + // Skip spaces from both sides. + while (L < R && isspace(Str[L])) L++; + while (R > L && isspace(Str[R])) R--; + if (R - L < 2) return false; + // Check the closing " + if (Str[R] != '"') return false; + R--; + // Find the opening " + while (L < R && Str[L] != '"') L++; + if (L >= R) return false; + assert(Str[L] == '\"'); + L++; + assert(L <= R); + for (size_t Pos = L; Pos <= R; Pos++) { + uint8_t V = (uint8_t)Str[Pos]; + if (!isprint(V) && !isspace(V)) return false; + if (V =='\\') { + // Handle '\\' + if (Pos + 1 <= R && (Str[Pos + 1] == '\\' || Str[Pos + 1] == '"')) { + U->push_back(Str[Pos + 1]); + Pos++; + continue; + } + // Handle '\xAB' + if (Pos + 3 <= R && Str[Pos + 1] == 'x' + && isxdigit(Str[Pos + 2]) && isxdigit(Str[Pos + 3])) { + char Hex[] = "0xAA"; + Hex[2] = Str[Pos + 2]; + Hex[3] = Str[Pos + 3]; + U->push_back(strtol(Hex, nullptr, 16)); + Pos += 3; + continue; + } + return false; // Invalid escape. + } else { + // Any other character. + U->push_back(V); + } + } + return true; +} + +bool ParseDictionaryFile(const std::string &Text, Vector<Unit> *Units) { + if (Text.empty()) { + Printf("ParseDictionaryFile: file does not exist or is empty\n"); + return false; + } + std::istringstream ISS(Text); + Units->clear(); + Unit U; + int LineNo = 0; + std::string S; + while (std::getline(ISS, S, '\n')) { + LineNo++; + size_t Pos = 0; + while (Pos < S.size() && isspace(S[Pos])) Pos++; // Skip spaces. + if (Pos == S.size()) continue; // Empty line. + if (S[Pos] == '#') continue; // Comment line. + if (ParseOneDictionaryEntry(S, &U)) { + Units->push_back(U); + } else { + Printf("ParseDictionaryFile: error in line %d\n\t\t%s\n", LineNo, + S.c_str()); + return false; + } + } + return true; +} + +// Code duplicated (and tested) in llvm/include/llvm/Support/Base64.h +std::string Base64(const Unit &U) { + static const char Table[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz" + "0123456789+/"; + std::string Buffer; + Buffer.resize(((U.size() + 2) / 3) * 4); + + size_t i = 0, j = 0; + for (size_t n = U.size() / 3 * 3; i < n; i += 3, j += 4) { + uint32_t x = ((unsigned char)U[i] << 16) | ((unsigned char)U[i + 1] << 8) | + (unsigned char)U[i + 2]; + Buffer[j + 0] = Table[(x >> 18) & 63]; + Buffer[j + 1] = Table[(x >> 12) & 63]; + Buffer[j + 2] = Table[(x >> 6) & 63]; + Buffer[j + 3] = Table[x & 63]; + } + if (i + 1 == U.size()) { + uint32_t x = ((unsigned char)U[i] << 16); + Buffer[j + 0] = Table[(x >> 18) & 63]; + Buffer[j + 1] = Table[(x >> 12) & 63]; + Buffer[j + 2] = '='; + Buffer[j + 3] = '='; + } else if (i + 2 == U.size()) { + uint32_t x = ((unsigned char)U[i] << 16) | ((unsigned char)U[i + 1] << 8); + Buffer[j + 0] = Table[(x >> 18) & 63]; + Buffer[j + 1] = Table[(x >> 12) & 63]; + Buffer[j + 2] = Table[(x >> 6) & 63]; + Buffer[j + 3] = '='; + } + return Buffer; +} + +static std::mutex SymbolizeMutex; + +std::string DescribePC(const char *SymbolizedFMT, uintptr_t PC) { + std::unique_lock<std::mutex> l(SymbolizeMutex, std::try_to_lock); + if (!EF->__sanitizer_symbolize_pc || !l.owns_lock()) + return "<can not symbolize>"; + char PcDescr[1024] = {}; + EF->__sanitizer_symbolize_pc(reinterpret_cast<void*>(PC), + SymbolizedFMT, PcDescr, sizeof(PcDescr)); + PcDescr[sizeof(PcDescr) - 1] = 0; // Just in case. + return PcDescr; +} + +void PrintPC(const char *SymbolizedFMT, const char *FallbackFMT, uintptr_t PC) { + if (EF->__sanitizer_symbolize_pc) + Printf("%s", DescribePC(SymbolizedFMT, PC).c_str()); + else + Printf(FallbackFMT, PC); +} + +void PrintStackTrace() { + std::unique_lock<std::mutex> l(SymbolizeMutex, std::try_to_lock); + if (EF->__sanitizer_print_stack_trace && l.owns_lock()) + EF->__sanitizer_print_stack_trace(); +} + +void PrintMemoryProfile() { + std::unique_lock<std::mutex> l(SymbolizeMutex, std::try_to_lock); + if (EF->__sanitizer_print_memory_profile && l.owns_lock()) + EF->__sanitizer_print_memory_profile(95, 8); +} + +unsigned NumberOfCpuCores() { + unsigned N = std::thread::hardware_concurrency(); + if (!N) { + Printf("WARNING: std::thread::hardware_concurrency not well defined for " + "your platform. Assuming CPU count of 1.\n"); + N = 1; + } + return N; +} + +size_t SimpleFastHash(const uint8_t *Data, size_t Size) { + size_t Res = 0; + for (size_t i = 0; i < Size; i++) + Res = Res * 11 + Data[i]; + return Res; +} + +} // namespace fuzzer diff --git a/tools/fuzzing/libfuzzer/FuzzerUtil.h b/tools/fuzzing/libfuzzer/FuzzerUtil.h new file mode 100644 index 0000000000..4ae3583830 --- /dev/null +++ b/tools/fuzzing/libfuzzer/FuzzerUtil.h @@ -0,0 +1,111 @@ +//===- FuzzerUtil.h - Internal header for the Fuzzer Utils ------*- C++ -* ===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Util functions. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_FUZZER_UTIL_H +#define LLVM_FUZZER_UTIL_H + +#include "FuzzerBuiltins.h" +#include "FuzzerBuiltinsMsvc.h" +#include "FuzzerCommand.h" +#include "FuzzerDefs.h" + +namespace fuzzer { + +void PrintHexArray(const Unit &U, const char *PrintAfter = ""); + +void PrintHexArray(const uint8_t *Data, size_t Size, + const char *PrintAfter = ""); + +void PrintASCII(const uint8_t *Data, size_t Size, const char *PrintAfter = ""); + +void PrintASCII(const Unit &U, const char *PrintAfter = ""); + +// Changes U to contain only ASCII (isprint+isspace) characters. +// Returns true iff U has been changed. +bool ToASCII(uint8_t *Data, size_t Size); + +bool IsASCII(const Unit &U); + +bool IsASCII(const uint8_t *Data, size_t Size); + +std::string Base64(const Unit &U); + +void PrintPC(const char *SymbolizedFMT, const char *FallbackFMT, uintptr_t PC); + +std::string DescribePC(const char *SymbolizedFMT, uintptr_t PC); + +void PrintStackTrace(); + +void PrintMemoryProfile(); + +unsigned NumberOfCpuCores(); + +// Platform specific functions. +void SetSignalHandler(const FuzzingOptions& Options); + +void SleepSeconds(int Seconds); + +unsigned long GetPid(); + +size_t GetPeakRSSMb(); + +int ExecuteCommand(const Command &Cmd); +bool ExecuteCommand(const Command &Cmd, std::string *CmdOutput); + +// Fuchsia does not have popen/pclose. +FILE *OpenProcessPipe(const char *Command, const char *Mode); +int CloseProcessPipe(FILE *F); + +const void *SearchMemory(const void *haystack, size_t haystacklen, + const void *needle, size_t needlelen); + +std::string CloneArgsWithoutX(const Vector<std::string> &Args, + const char *X1, const char *X2); + +inline std::string CloneArgsWithoutX(const Vector<std::string> &Args, + const char *X) { + return CloneArgsWithoutX(Args, X, X); +} + +inline std::pair<std::string, std::string> SplitBefore(std::string X, + std::string S) { + auto Pos = S.find(X); + if (Pos == std::string::npos) + return std::make_pair(S, ""); + return std::make_pair(S.substr(0, Pos), S.substr(Pos)); +} + +void DiscardOutput(int Fd); + +std::string DisassembleCmd(const std::string &FileName); + +std::string SearchRegexCmd(const std::string &Regex); + +size_t SimpleFastHash(const uint8_t *Data, size_t Size); + +inline uint32_t Log(uint32_t X) { return 32 - Clz(X) - 1; } + +inline size_t PageSize() { return 4096; } +inline uint8_t *RoundUpByPage(uint8_t *P) { + uintptr_t X = reinterpret_cast<uintptr_t>(P); + size_t Mask = PageSize() - 1; + X = (X + Mask) & ~Mask; + return reinterpret_cast<uint8_t *>(X); +} +inline uint8_t *RoundDownByPage(uint8_t *P) { + uintptr_t X = reinterpret_cast<uintptr_t>(P); + size_t Mask = PageSize() - 1; + X = X & ~Mask; + return reinterpret_cast<uint8_t *>(X); +} + +} // namespace fuzzer + +#endif // LLVM_FUZZER_UTIL_H diff --git a/tools/fuzzing/libfuzzer/FuzzerUtilDarwin.cpp b/tools/fuzzing/libfuzzer/FuzzerUtilDarwin.cpp new file mode 100644 index 0000000000..a5bed658a4 --- /dev/null +++ b/tools/fuzzing/libfuzzer/FuzzerUtilDarwin.cpp @@ -0,0 +1,170 @@ +//===- FuzzerUtilDarwin.cpp - Misc utils ----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Misc utils for Darwin. +//===----------------------------------------------------------------------===// +#include "FuzzerPlatform.h" +#if LIBFUZZER_APPLE +#include "FuzzerCommand.h" +#include "FuzzerIO.h" +#include <mutex> +#include <signal.h> +#include <spawn.h> +#include <stdlib.h> +#include <string.h> +#include <sys/wait.h> +#include <unistd.h> + +// There is no header for this on macOS so declare here +extern "C" char **environ; + +namespace fuzzer { + +static std::mutex SignalMutex; +// Global variables used to keep track of how signal handling should be +// restored. They should **not** be accessed without holding `SignalMutex`. +static int ActiveThreadCount = 0; +static struct sigaction OldSigIntAction; +static struct sigaction OldSigQuitAction; +static sigset_t OldBlockedSignalsSet; + +// This is a reimplementation of Libc's `system()`. On Darwin the Libc +// implementation contains a mutex which prevents it from being used +// concurrently. This implementation **can** be used concurrently. It sets the +// signal handlers when the first thread enters and restores them when the last +// thread finishes execution of the function and ensures this is not racey by +// using a mutex. +int ExecuteCommand(const Command &Cmd) { + std::string CmdLine = Cmd.toString(); + posix_spawnattr_t SpawnAttributes; + if (posix_spawnattr_init(&SpawnAttributes)) + return -1; + // Block and ignore signals of the current process when the first thread + // enters. + { + std::lock_guard<std::mutex> Lock(SignalMutex); + if (ActiveThreadCount == 0) { + static struct sigaction IgnoreSignalAction; + sigset_t BlockedSignalsSet; + memset(&IgnoreSignalAction, 0, sizeof(IgnoreSignalAction)); + IgnoreSignalAction.sa_handler = SIG_IGN; + + if (sigaction(SIGINT, &IgnoreSignalAction, &OldSigIntAction) == -1) { + Printf("Failed to ignore SIGINT\n"); + (void)posix_spawnattr_destroy(&SpawnAttributes); + return -1; + } + if (sigaction(SIGQUIT, &IgnoreSignalAction, &OldSigQuitAction) == -1) { + Printf("Failed to ignore SIGQUIT\n"); + // Try our best to restore the signal handlers. + (void)sigaction(SIGINT, &OldSigIntAction, NULL); + (void)posix_spawnattr_destroy(&SpawnAttributes); + return -1; + } + + (void)sigemptyset(&BlockedSignalsSet); + (void)sigaddset(&BlockedSignalsSet, SIGCHLD); + if (sigprocmask(SIG_BLOCK, &BlockedSignalsSet, &OldBlockedSignalsSet) == + -1) { + Printf("Failed to block SIGCHLD\n"); + // Try our best to restore the signal handlers. + (void)sigaction(SIGQUIT, &OldSigQuitAction, NULL); + (void)sigaction(SIGINT, &OldSigIntAction, NULL); + (void)posix_spawnattr_destroy(&SpawnAttributes); + return -1; + } + } + ++ActiveThreadCount; + } + + // NOTE: Do not introduce any new `return` statements past this + // point. It is important that `ActiveThreadCount` always be decremented + // when leaving this function. + + // Make sure the child process uses the default handlers for the + // following signals rather than inheriting what the parent has. + sigset_t DefaultSigSet; + (void)sigemptyset(&DefaultSigSet); + (void)sigaddset(&DefaultSigSet, SIGQUIT); + (void)sigaddset(&DefaultSigSet, SIGINT); + (void)posix_spawnattr_setsigdefault(&SpawnAttributes, &DefaultSigSet); + // Make sure the child process doesn't block SIGCHLD + (void)posix_spawnattr_setsigmask(&SpawnAttributes, &OldBlockedSignalsSet); + short SpawnFlags = POSIX_SPAWN_SETSIGDEF | POSIX_SPAWN_SETSIGMASK; + (void)posix_spawnattr_setflags(&SpawnAttributes, SpawnFlags); + + pid_t Pid; + char **Environ = environ; // Read from global + const char *CommandCStr = CmdLine.c_str(); + char *const Argv[] = { + strdup("sh"), + strdup("-c"), + strdup(CommandCStr), + NULL + }; + int ErrorCode = 0, ProcessStatus = 0; + // FIXME: We probably shouldn't hardcode the shell path. + ErrorCode = posix_spawn(&Pid, "/bin/sh", NULL, &SpawnAttributes, + Argv, Environ); + (void)posix_spawnattr_destroy(&SpawnAttributes); + if (!ErrorCode) { + pid_t SavedPid = Pid; + do { + // Repeat until call completes uninterrupted. + Pid = waitpid(SavedPid, &ProcessStatus, /*options=*/0); + } while (Pid == -1 && errno == EINTR); + if (Pid == -1) { + // Fail for some other reason. + ProcessStatus = -1; + } + } else if (ErrorCode == ENOMEM || ErrorCode == EAGAIN) { + // Fork failure. + ProcessStatus = -1; + } else { + // Shell execution failure. + ProcessStatus = W_EXITCODE(127, 0); + } + for (unsigned i = 0, n = sizeof(Argv) / sizeof(Argv[0]); i < n; ++i) + free(Argv[i]); + + // Restore the signal handlers of the current process when the last thread + // using this function finishes. + { + std::lock_guard<std::mutex> Lock(SignalMutex); + --ActiveThreadCount; + if (ActiveThreadCount == 0) { + bool FailedRestore = false; + if (sigaction(SIGINT, &OldSigIntAction, NULL) == -1) { + Printf("Failed to restore SIGINT handling\n"); + FailedRestore = true; + } + if (sigaction(SIGQUIT, &OldSigQuitAction, NULL) == -1) { + Printf("Failed to restore SIGQUIT handling\n"); + FailedRestore = true; + } + if (sigprocmask(SIG_BLOCK, &OldBlockedSignalsSet, NULL) == -1) { + Printf("Failed to unblock SIGCHLD\n"); + FailedRestore = true; + } + if (FailedRestore) + ProcessStatus = -1; + } + } + return ProcessStatus; +} + +void DiscardOutput(int Fd) { + FILE* Temp = fopen("/dev/null", "w"); + if (!Temp) + return; + dup2(fileno(Temp), Fd); + fclose(Temp); +} + +} // namespace fuzzer + +#endif // LIBFUZZER_APPLE diff --git a/tools/fuzzing/libfuzzer/FuzzerUtilFuchsia.cpp b/tools/fuzzing/libfuzzer/FuzzerUtilFuchsia.cpp new file mode 100644 index 0000000000..190fb78666 --- /dev/null +++ b/tools/fuzzing/libfuzzer/FuzzerUtilFuchsia.cpp @@ -0,0 +1,565 @@ +//===- FuzzerUtilFuchsia.cpp - Misc utils for Fuchsia. --------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Misc utils implementation using Fuchsia/Zircon APIs. +//===----------------------------------------------------------------------===// +#include "FuzzerPlatform.h" + +#if LIBFUZZER_FUCHSIA + +#include "FuzzerInternal.h" +#include "FuzzerUtil.h" +#include <cassert> +#include <cerrno> +#include <cinttypes> +#include <cstdint> +#include <fcntl.h> +#include <lib/fdio/fdio.h> +#include <lib/fdio/spawn.h> +#include <string> +#include <sys/select.h> +#include <thread> +#include <unistd.h> +#include <zircon/errors.h> +#include <zircon/process.h> +#include <zircon/sanitizer.h> +#include <zircon/status.h> +#include <zircon/syscalls.h> +#include <zircon/syscalls/debug.h> +#include <zircon/syscalls/exception.h> +#include <zircon/syscalls/object.h> +#include <zircon/types.h> + +#include <vector> + +namespace fuzzer { + +// Given that Fuchsia doesn't have the POSIX signals that libFuzzer was written +// around, the general approach is to spin up dedicated threads to watch for +// each requested condition (alarm, interrupt, crash). Of these, the crash +// handler is the most involved, as it requires resuming the crashed thread in +// order to invoke the sanitizers to get the needed state. + +// Forward declaration of assembly trampoline needed to resume crashed threads. +// This appears to have external linkage to C++, which is why it's not in the +// anonymous namespace. The assembly definition inside MakeTrampoline() +// actually defines the symbol with internal linkage only. +void CrashTrampolineAsm() __asm__("CrashTrampolineAsm"); + +namespace { + +// Helper function to handle Zircon syscall failures. +void ExitOnErr(zx_status_t Status, const char *Syscall) { + if (Status != ZX_OK) { + Printf("libFuzzer: %s failed: %s\n", Syscall, + _zx_status_get_string(Status)); + exit(1); + } +} + +void AlarmHandler(int Seconds) { + while (true) { + SleepSeconds(Seconds); + Fuzzer::StaticAlarmCallback(); + } +} + +void InterruptHandler() { + fd_set readfds; + // Ctrl-C sends ETX in Zircon. + do { + FD_ZERO(&readfds); + FD_SET(STDIN_FILENO, &readfds); + select(STDIN_FILENO + 1, &readfds, nullptr, nullptr, nullptr); + } while(!FD_ISSET(STDIN_FILENO, &readfds) || getchar() != 0x03); + Fuzzer::StaticInterruptCallback(); +} + +// CFAOffset is used to reference the stack pointer before entering the +// trampoline (Stack Pointer + CFAOffset = prev Stack Pointer). Before jumping +// to the trampoline we copy all the registers onto the stack. We need to make +// sure that the new stack has enough space to store all the registers. +// +// The trampoline holds CFI information regarding the registers stored in the +// stack, which is then used by the unwinder to restore them. +#if defined(__x86_64__) +// In x86_64 the crashing function might also be using the red zone (128 bytes +// on top of their rsp). +constexpr size_t CFAOffset = 128 + sizeof(zx_thread_state_general_regs_t); +#elif defined(__aarch64__) +// In aarch64 we need to always have the stack pointer aligned to 16 bytes, so we +// make sure that we are keeping that same alignment. +constexpr size_t CFAOffset = (sizeof(zx_thread_state_general_regs_t) + 15) & -(uintptr_t)16; +#endif + +// For the crash handler, we need to call Fuzzer::StaticCrashSignalCallback +// without POSIX signal handlers. To achieve this, we use an assembly function +// to add the necessary CFI unwinding information and a C function to bridge +// from that back into C++. + +// FIXME: This works as a short-term solution, but this code really shouldn't be +// architecture dependent. A better long term solution is to implement remote +// unwinding and expose the necessary APIs through sanitizer_common and/or ASAN +// to allow the exception handling thread to gather the crash state directly. +// +// Alternatively, Fuchsia may in future actually implement basic signal +// handling for the machine trap signals. +#if defined(__x86_64__) +#define FOREACH_REGISTER(OP_REG, OP_NUM) \ + OP_REG(rax) \ + OP_REG(rbx) \ + OP_REG(rcx) \ + OP_REG(rdx) \ + OP_REG(rsi) \ + OP_REG(rdi) \ + OP_REG(rbp) \ + OP_REG(rsp) \ + OP_REG(r8) \ + OP_REG(r9) \ + OP_REG(r10) \ + OP_REG(r11) \ + OP_REG(r12) \ + OP_REG(r13) \ + OP_REG(r14) \ + OP_REG(r15) \ + OP_REG(rip) + +#elif defined(__aarch64__) +#define FOREACH_REGISTER(OP_REG, OP_NUM) \ + OP_NUM(0) \ + OP_NUM(1) \ + OP_NUM(2) \ + OP_NUM(3) \ + OP_NUM(4) \ + OP_NUM(5) \ + OP_NUM(6) \ + OP_NUM(7) \ + OP_NUM(8) \ + OP_NUM(9) \ + OP_NUM(10) \ + OP_NUM(11) \ + OP_NUM(12) \ + OP_NUM(13) \ + OP_NUM(14) \ + OP_NUM(15) \ + OP_NUM(16) \ + OP_NUM(17) \ + OP_NUM(18) \ + OP_NUM(19) \ + OP_NUM(20) \ + OP_NUM(21) \ + OP_NUM(22) \ + OP_NUM(23) \ + OP_NUM(24) \ + OP_NUM(25) \ + OP_NUM(26) \ + OP_NUM(27) \ + OP_NUM(28) \ + OP_NUM(29) \ + OP_REG(sp) + +#else +#error "Unsupported architecture for fuzzing on Fuchsia" +#endif + +// Produces a CFI directive for the named or numbered register. +// The value used refers to an assembler immediate operand with the same name +// as the register (see ASM_OPERAND_REG). +#define CFI_OFFSET_REG(reg) ".cfi_offset " #reg ", %c[" #reg "]\n" +#define CFI_OFFSET_NUM(num) CFI_OFFSET_REG(x##num) + +// Produces an assembler immediate operand for the named or numbered register. +// This operand contains the offset of the register relative to the CFA. +#define ASM_OPERAND_REG(reg) \ + [reg] "i"(offsetof(zx_thread_state_general_regs_t, reg) - CFAOffset), +#define ASM_OPERAND_NUM(num) \ + [x##num] "i"(offsetof(zx_thread_state_general_regs_t, r[num]) - CFAOffset), + +// Trampoline to bridge from the assembly below to the static C++ crash +// callback. +__attribute__((noreturn)) +static void StaticCrashHandler() { + Fuzzer::StaticCrashSignalCallback(); + for (;;) { + _Exit(1); + } +} + +// Creates the trampoline with the necessary CFI information to unwind through +// to the crashing call stack: +// * Defining the CFA so that it points to the stack pointer at the point +// of crash. +// * Storing all registers at the point of crash in the stack and refer to them +// via CFI information (relative to the CFA). +// * Setting the return column so the unwinder knows how to continue unwinding. +// * (x86_64) making sure rsp is aligned before calling StaticCrashHandler. +// * Calling StaticCrashHandler that will trigger the unwinder. +// +// The __attribute__((used)) is necessary because the function +// is never called; it's just a container around the assembly to allow it to +// use operands for compile-time computed constants. +__attribute__((used)) +void MakeTrampoline() { + __asm__(".cfi_endproc\n" + ".pushsection .text.CrashTrampolineAsm\n" + ".type CrashTrampolineAsm,STT_FUNC\n" +"CrashTrampolineAsm:\n" + ".cfi_startproc simple\n" + ".cfi_signal_frame\n" +#if defined(__x86_64__) + ".cfi_return_column rip\n" + ".cfi_def_cfa rsp, %c[CFAOffset]\n" + FOREACH_REGISTER(CFI_OFFSET_REG, CFI_OFFSET_NUM) + "mov %%rsp, %%rbp\n" + ".cfi_def_cfa_register rbp\n" + "andq $-16, %%rsp\n" + "call %c[StaticCrashHandler]\n" + "ud2\n" +#elif defined(__aarch64__) + ".cfi_return_column 33\n" + ".cfi_def_cfa sp, %c[CFAOffset]\n" + FOREACH_REGISTER(CFI_OFFSET_REG, CFI_OFFSET_NUM) + ".cfi_offset 33, %c[pc]\n" + ".cfi_offset 30, %c[lr]\n" + "bl %c[StaticCrashHandler]\n" + "brk 1\n" +#else +#error "Unsupported architecture for fuzzing on Fuchsia" +#endif + ".cfi_endproc\n" + ".size CrashTrampolineAsm, . - CrashTrampolineAsm\n" + ".popsection\n" + ".cfi_startproc\n" + : // No outputs + : FOREACH_REGISTER(ASM_OPERAND_REG, ASM_OPERAND_NUM) +#if defined(__aarch64__) + ASM_OPERAND_REG(pc) + ASM_OPERAND_REG(lr) +#endif + [StaticCrashHandler] "i" (StaticCrashHandler), + [CFAOffset] "i" (CFAOffset)); +} + +void CrashHandler(zx_handle_t *Event) { + // This structure is used to ensure we close handles to objects we create in + // this handler. + struct ScopedHandle { + ~ScopedHandle() { _zx_handle_close(Handle); } + zx_handle_t Handle = ZX_HANDLE_INVALID; + }; + + // Create the exception channel. We need to claim to be a "debugger" so the + // kernel will allow us to modify and resume dying threads (see below). Once + // the channel is set, we can signal the main thread to continue and wait + // for the exception to arrive. + ScopedHandle Channel; + zx_handle_t Self = _zx_process_self(); + ExitOnErr(_zx_task_create_exception_channel( + Self, ZX_EXCEPTION_CHANNEL_DEBUGGER, &Channel.Handle), + "_zx_task_create_exception_channel"); + + ExitOnErr(_zx_object_signal(*Event, 0, ZX_USER_SIGNAL_0), + "_zx_object_signal"); + + // This thread lives as long as the process in order to keep handling + // crashes. In practice, the first crashed thread to reach the end of the + // StaticCrashHandler will end the process. + while (true) { + ExitOnErr(_zx_object_wait_one(Channel.Handle, ZX_CHANNEL_READABLE, + ZX_TIME_INFINITE, nullptr), + "_zx_object_wait_one"); + + zx_exception_info_t ExceptionInfo; + ScopedHandle Exception; + ExitOnErr(_zx_channel_read(Channel.Handle, 0, &ExceptionInfo, + &Exception.Handle, sizeof(ExceptionInfo), 1, + nullptr, nullptr), + "_zx_channel_read"); + + // Ignore informational synthetic exceptions. + if (ZX_EXCP_THREAD_STARTING == ExceptionInfo.type || + ZX_EXCP_THREAD_EXITING == ExceptionInfo.type || + ZX_EXCP_PROCESS_STARTING == ExceptionInfo.type) { + continue; + } + + // At this point, we want to get the state of the crashing thread, but + // libFuzzer and the sanitizers assume this will happen from that same + // thread via a POSIX signal handler. "Resurrecting" the thread in the + // middle of the appropriate callback is as simple as forcibly setting the + // instruction pointer/program counter, provided we NEVER EVER return from + // that function (since otherwise our stack will not be valid). + ScopedHandle Thread; + ExitOnErr(_zx_exception_get_thread(Exception.Handle, &Thread.Handle), + "_zx_exception_get_thread"); + + zx_thread_state_general_regs_t GeneralRegisters; + ExitOnErr(_zx_thread_read_state(Thread.Handle, ZX_THREAD_STATE_GENERAL_REGS, + &GeneralRegisters, + sizeof(GeneralRegisters)), + "_zx_thread_read_state"); + + // To unwind properly, we need to push the crashing thread's register state + // onto the stack and jump into a trampoline with CFI instructions on how + // to restore it. +#if defined(__x86_64__) + uintptr_t StackPtr = GeneralRegisters.rsp - CFAOffset; + __unsanitized_memcpy(reinterpret_cast<void *>(StackPtr), &GeneralRegisters, + sizeof(GeneralRegisters)); + GeneralRegisters.rsp = StackPtr; + GeneralRegisters.rip = reinterpret_cast<zx_vaddr_t>(CrashTrampolineAsm); + +#elif defined(__aarch64__) + uintptr_t StackPtr = GeneralRegisters.sp - CFAOffset; + __unsanitized_memcpy(reinterpret_cast<void *>(StackPtr), &GeneralRegisters, + sizeof(GeneralRegisters)); + GeneralRegisters.sp = StackPtr; + GeneralRegisters.pc = reinterpret_cast<zx_vaddr_t>(CrashTrampolineAsm); + +#else +#error "Unsupported architecture for fuzzing on Fuchsia" +#endif + + // Now force the crashing thread's state. + ExitOnErr( + _zx_thread_write_state(Thread.Handle, ZX_THREAD_STATE_GENERAL_REGS, + &GeneralRegisters, sizeof(GeneralRegisters)), + "_zx_thread_write_state"); + + // Set the exception to HANDLED so it resumes the thread on close. + uint32_t ExceptionState = ZX_EXCEPTION_STATE_HANDLED; + ExitOnErr(_zx_object_set_property(Exception.Handle, ZX_PROP_EXCEPTION_STATE, + &ExceptionState, sizeof(ExceptionState)), + "zx_object_set_property"); + } +} + +} // namespace + +// Platform specific functions. +void SetSignalHandler(const FuzzingOptions &Options) { + // Make sure information from libFuzzer and the sanitizers are easy to + // reassemble. `__sanitizer_log_write` has the added benefit of ensuring the + // DSO map is always available for the symbolizer. + // A uint64_t fits in 20 chars, so 64 is plenty. + char Buf[64]; + memset(Buf, 0, sizeof(Buf)); + snprintf(Buf, sizeof(Buf), "==%lu== INFO: libFuzzer starting.\n", GetPid()); + if (EF->__sanitizer_log_write) + __sanitizer_log_write(Buf, sizeof(Buf)); + Printf("%s", Buf); + + // Set up alarm handler if needed. + if (Options.UnitTimeoutSec > 0) { + std::thread T(AlarmHandler, Options.UnitTimeoutSec / 2 + 1); + T.detach(); + } + + // Set up interrupt handler if needed. + if (Options.HandleInt || Options.HandleTerm) { + std::thread T(InterruptHandler); + T.detach(); + } + + // Early exit if no crash handler needed. + if (!Options.HandleSegv && !Options.HandleBus && !Options.HandleIll && + !Options.HandleFpe && !Options.HandleAbrt) + return; + + // Set up the crash handler and wait until it is ready before proceeding. + zx_handle_t Event; + ExitOnErr(_zx_event_create(0, &Event), "_zx_event_create"); + + std::thread T(CrashHandler, &Event); + zx_status_t Status = + _zx_object_wait_one(Event, ZX_USER_SIGNAL_0, ZX_TIME_INFINITE, nullptr); + _zx_handle_close(Event); + ExitOnErr(Status, "_zx_object_wait_one"); + + T.detach(); +} + +void SleepSeconds(int Seconds) { + _zx_nanosleep(_zx_deadline_after(ZX_SEC(Seconds))); +} + +unsigned long GetPid() { + zx_status_t rc; + zx_info_handle_basic_t Info; + if ((rc = _zx_object_get_info(_zx_process_self(), ZX_INFO_HANDLE_BASIC, &Info, + sizeof(Info), NULL, NULL)) != ZX_OK) { + Printf("libFuzzer: unable to get info about self: %s\n", + _zx_status_get_string(rc)); + exit(1); + } + return Info.koid; +} + +size_t GetPeakRSSMb() { + zx_status_t rc; + zx_info_task_stats_t Info; + if ((rc = _zx_object_get_info(_zx_process_self(), ZX_INFO_TASK_STATS, &Info, + sizeof(Info), NULL, NULL)) != ZX_OK) { + Printf("libFuzzer: unable to get info about self: %s\n", + _zx_status_get_string(rc)); + exit(1); + } + return (Info.mem_private_bytes + Info.mem_shared_bytes) >> 20; +} + +template <typename Fn> +class RunOnDestruction { + public: + explicit RunOnDestruction(Fn fn) : fn_(fn) {} + ~RunOnDestruction() { fn_(); } + + private: + Fn fn_; +}; + +template <typename Fn> +RunOnDestruction<Fn> at_scope_exit(Fn fn) { + return RunOnDestruction<Fn>(fn); +} + +static fdio_spawn_action_t clone_fd_action(int localFd, int targetFd) { + return { + .action = FDIO_SPAWN_ACTION_CLONE_FD, + .fd = + { + .local_fd = localFd, + .target_fd = targetFd, + }, + }; +} + +int ExecuteCommand(const Command &Cmd) { + zx_status_t rc; + + // Convert arguments to C array + auto Args = Cmd.getArguments(); + size_t Argc = Args.size(); + assert(Argc != 0); + std::unique_ptr<const char *[]> Argv(new const char *[Argc + 1]); + for (size_t i = 0; i < Argc; ++i) + Argv[i] = Args[i].c_str(); + Argv[Argc] = nullptr; + + // Determine output. On Fuchsia, the fuzzer is typically run as a component + // that lacks a mutable working directory. Fortunately, when this is the case + // a mutable output directory must be specified using "-artifact_prefix=...", + // so write the log file(s) there. + // However, we don't want to apply this logic for absolute paths. + int FdOut = STDOUT_FILENO; + bool discardStdout = false; + bool discardStderr = false; + + if (Cmd.hasOutputFile()) { + std::string Path = Cmd.getOutputFile(); + if (Path == getDevNull()) { + // On Fuchsia, there's no "/dev/null" like-file, so we + // just don't copy the FDs into the spawned process. + discardStdout = true; + } else { + bool IsAbsolutePath = Path.length() > 1 && Path[0] == '/'; + if (!IsAbsolutePath && Cmd.hasFlag("artifact_prefix")) + Path = Cmd.getFlagValue("artifact_prefix") + "/" + Path; + + FdOut = open(Path.c_str(), O_WRONLY | O_CREAT | O_TRUNC, 0); + if (FdOut == -1) { + Printf("libFuzzer: failed to open %s: %s\n", Path.c_str(), + strerror(errno)); + return ZX_ERR_IO; + } + } + } + auto CloseFdOut = at_scope_exit([FdOut]() { + if (FdOut != STDOUT_FILENO) + close(FdOut); + }); + + // Determine stderr + int FdErr = STDERR_FILENO; + if (Cmd.isOutAndErrCombined()) { + FdErr = FdOut; + if (discardStdout) + discardStderr = true; + } + + // Clone the file descriptors into the new process + std::vector<fdio_spawn_action_t> SpawnActions; + SpawnActions.push_back(clone_fd_action(STDIN_FILENO, STDIN_FILENO)); + + if (!discardStdout) + SpawnActions.push_back(clone_fd_action(FdOut, STDOUT_FILENO)); + if (!discardStderr) + SpawnActions.push_back(clone_fd_action(FdErr, STDERR_FILENO)); + + // Start the process. + char ErrorMsg[FDIO_SPAWN_ERR_MSG_MAX_LENGTH]; + zx_handle_t ProcessHandle = ZX_HANDLE_INVALID; + rc = fdio_spawn_etc(ZX_HANDLE_INVALID, + FDIO_SPAWN_CLONE_ALL & (~FDIO_SPAWN_CLONE_STDIO), Argv[0], + Argv.get(), nullptr, SpawnActions.size(), + SpawnActions.data(), &ProcessHandle, ErrorMsg); + + if (rc != ZX_OK) { + Printf("libFuzzer: failed to launch '%s': %s, %s\n", Argv[0], ErrorMsg, + _zx_status_get_string(rc)); + return rc; + } + auto CloseHandle = at_scope_exit([&]() { _zx_handle_close(ProcessHandle); }); + + // Now join the process and return the exit status. + if ((rc = _zx_object_wait_one(ProcessHandle, ZX_PROCESS_TERMINATED, + ZX_TIME_INFINITE, nullptr)) != ZX_OK) { + Printf("libFuzzer: failed to join '%s': %s\n", Argv[0], + _zx_status_get_string(rc)); + return rc; + } + + zx_info_process_t Info; + if ((rc = _zx_object_get_info(ProcessHandle, ZX_INFO_PROCESS, &Info, + sizeof(Info), nullptr, nullptr)) != ZX_OK) { + Printf("libFuzzer: unable to get return code from '%s': %s\n", Argv[0], + _zx_status_get_string(rc)); + return rc; + } + + return Info.return_code; +} + +bool ExecuteCommand(const Command &BaseCmd, std::string *CmdOutput) { + auto LogFilePath = TempPath("SimPopenOut", ".txt"); + Command Cmd(BaseCmd); + Cmd.setOutputFile(LogFilePath); + int Ret = ExecuteCommand(Cmd); + *CmdOutput = FileToString(LogFilePath); + RemoveFile(LogFilePath); + return Ret == 0; +} + +const void *SearchMemory(const void *Data, size_t DataLen, const void *Patt, + size_t PattLen) { + return memmem(Data, DataLen, Patt, PattLen); +} + +// In fuchsia, accessing /dev/null is not supported. There's nothing +// similar to a file that discards everything that is written to it. +// The way of doing something similar in fuchsia is by using +// fdio_null_create and binding that to a file descriptor. +void DiscardOutput(int Fd) { + fdio_t *fdio_null = fdio_null_create(); + if (fdio_null == nullptr) return; + int nullfd = fdio_bind_to_fd(fdio_null, -1, 0); + if (nullfd < 0) return; + dup2(nullfd, Fd); +} + +} // namespace fuzzer + +#endif // LIBFUZZER_FUCHSIA diff --git a/tools/fuzzing/libfuzzer/FuzzerUtilLinux.cpp b/tools/fuzzing/libfuzzer/FuzzerUtilLinux.cpp new file mode 100644 index 0000000000..95490b992e --- /dev/null +++ b/tools/fuzzing/libfuzzer/FuzzerUtilLinux.cpp @@ -0,0 +1,41 @@ +//===- FuzzerUtilLinux.cpp - Misc utils for Linux. ------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Misc utils for Linux. +//===----------------------------------------------------------------------===// +#include "FuzzerPlatform.h" +#if LIBFUZZER_LINUX || LIBFUZZER_NETBSD || LIBFUZZER_FREEBSD || \ + LIBFUZZER_OPENBSD || LIBFUZZER_EMSCRIPTEN +#include "FuzzerCommand.h" + +#include <stdlib.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> + + +namespace fuzzer { + +int ExecuteCommand(const Command &Cmd) { + std::string CmdLine = Cmd.toString(); + int exit_code = system(CmdLine.c_str()); + if (WIFEXITED(exit_code)) + return WEXITSTATUS(exit_code); + return exit_code; +} + +void DiscardOutput(int Fd) { + FILE* Temp = fopen("/dev/null", "w"); + if (!Temp) + return; + dup2(fileno(Temp), Fd); + fclose(Temp); +} + +} // namespace fuzzer + +#endif diff --git a/tools/fuzzing/libfuzzer/FuzzerUtilPosix.cpp b/tools/fuzzing/libfuzzer/FuzzerUtilPosix.cpp new file mode 100644 index 0000000000..fc57b724db --- /dev/null +++ b/tools/fuzzing/libfuzzer/FuzzerUtilPosix.cpp @@ -0,0 +1,185 @@ +//===- FuzzerUtilPosix.cpp - Misc utils for Posix. ------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Misc utils implementation using Posix API. +//===----------------------------------------------------------------------===// +#include "FuzzerPlatform.h" +#if LIBFUZZER_POSIX +#include "FuzzerIO.h" +#include "FuzzerInternal.h" +#include "FuzzerTracePC.h" +#include <cassert> +#include <chrono> +#include <cstring> +#include <errno.h> +#include <iomanip> +#include <signal.h> +#include <stdio.h> +#include <sys/mman.h> +#include <sys/resource.h> +#include <sys/syscall.h> +#include <sys/time.h> +#include <sys/types.h> +#include <thread> +#include <unistd.h> + +namespace fuzzer { + +static void AlarmHandler(int, siginfo_t *, void *) { + Fuzzer::StaticAlarmCallback(); +} + +static void (*upstream_segv_handler)(int, siginfo_t *, void *); + +static void SegvHandler(int sig, siginfo_t *si, void *ucontext) { + assert(si->si_signo == SIGSEGV); + if (upstream_segv_handler) + return upstream_segv_handler(sig, si, ucontext); + Fuzzer::StaticCrashSignalCallback(); +} + +static void CrashHandler(int, siginfo_t *, void *) { + Fuzzer::StaticCrashSignalCallback(); +} + +static void InterruptHandler(int, siginfo_t *, void *) { + Fuzzer::StaticInterruptCallback(); +} + +static void GracefulExitHandler(int, siginfo_t *, void *) { + Fuzzer::StaticGracefulExitCallback(); +} + +static void FileSizeExceedHandler(int, siginfo_t *, void *) { + Fuzzer::StaticFileSizeExceedCallback(); +} + +static void SetSigaction(int signum, + void (*callback)(int, siginfo_t *, void *)) { + struct sigaction sigact = {}; + if (sigaction(signum, nullptr, &sigact)) { + Printf("libFuzzer: sigaction failed with %d\n", errno); + exit(1); + } + if (sigact.sa_flags & SA_SIGINFO) { + if (sigact.sa_sigaction) { + if (signum != SIGSEGV) + return; + upstream_segv_handler = sigact.sa_sigaction; + } + } else { + if (sigact.sa_handler != SIG_DFL && sigact.sa_handler != SIG_IGN && + sigact.sa_handler != SIG_ERR) + return; + } + + sigact = {}; + sigact.sa_flags = SA_SIGINFO; + sigact.sa_sigaction = callback; + if (sigaction(signum, &sigact, 0)) { + Printf("libFuzzer: sigaction failed with %d\n", errno); + exit(1); + } +} + +// Return true on success, false otherwise. +bool ExecuteCommand(const Command &Cmd, std::string *CmdOutput) { + FILE *Pipe = popen(Cmd.toString().c_str(), "r"); + if (!Pipe) + return false; + + if (CmdOutput) { + char TmpBuffer[128]; + while (fgets(TmpBuffer, sizeof(TmpBuffer), Pipe)) + CmdOutput->append(TmpBuffer); + } + return pclose(Pipe) == 0; +} + +void SetTimer(int Seconds) { + struct itimerval T { + {Seconds, 0}, { Seconds, 0 } + }; + if (setitimer(ITIMER_REAL, &T, nullptr)) { + Printf("libFuzzer: setitimer failed with %d\n", errno); + exit(1); + } + SetSigaction(SIGALRM, AlarmHandler); +} + +void SetSignalHandler(const FuzzingOptions& Options) { + // setitimer is not implemented in emscripten. + if (Options.UnitTimeoutSec > 0 && !LIBFUZZER_EMSCRIPTEN) + SetTimer(Options.UnitTimeoutSec / 2 + 1); + if (Options.HandleInt) + SetSigaction(SIGINT, InterruptHandler); + if (Options.HandleTerm) + SetSigaction(SIGTERM, InterruptHandler); + if (Options.HandleSegv) + SetSigaction(SIGSEGV, SegvHandler); + if (Options.HandleBus) + SetSigaction(SIGBUS, CrashHandler); + if (Options.HandleAbrt) + SetSigaction(SIGABRT, CrashHandler); + if (Options.HandleIll) + SetSigaction(SIGILL, CrashHandler); + if (Options.HandleFpe) + SetSigaction(SIGFPE, CrashHandler); + if (Options.HandleXfsz) + SetSigaction(SIGXFSZ, FileSizeExceedHandler); + if (Options.HandleUsr1) + SetSigaction(SIGUSR1, GracefulExitHandler); + if (Options.HandleUsr2) + SetSigaction(SIGUSR2, GracefulExitHandler); +} + +void SleepSeconds(int Seconds) { + sleep(Seconds); // Use C API to avoid coverage from instrumented libc++. +} + +unsigned long GetPid() { return (unsigned long)getpid(); } + +size_t GetPeakRSSMb() { + struct rusage usage; + if (getrusage(RUSAGE_SELF, &usage)) + return 0; + if (LIBFUZZER_LINUX || LIBFUZZER_FREEBSD || LIBFUZZER_NETBSD || + LIBFUZZER_OPENBSD || LIBFUZZER_EMSCRIPTEN) { + // ru_maxrss is in KiB + return usage.ru_maxrss >> 10; + } else if (LIBFUZZER_APPLE) { + // ru_maxrss is in bytes + return usage.ru_maxrss >> 20; + } + assert(0 && "GetPeakRSSMb() is not implemented for your platform"); + return 0; +} + +FILE *OpenProcessPipe(const char *Command, const char *Mode) { + return popen(Command, Mode); +} + +int CloseProcessPipe(FILE *F) { + return pclose(F); +} + +const void *SearchMemory(const void *Data, size_t DataLen, const void *Patt, + size_t PattLen) { + return memmem(Data, DataLen, Patt, PattLen); +} + +std::string DisassembleCmd(const std::string &FileName) { + return "objdump -d " + FileName; +} + +std::string SearchRegexCmd(const std::string &Regex) { + return "grep '" + Regex + "'"; +} + +} // namespace fuzzer + +#endif // LIBFUZZER_POSIX diff --git a/tools/fuzzing/libfuzzer/FuzzerUtilWindows.cpp b/tools/fuzzing/libfuzzer/FuzzerUtilWindows.cpp new file mode 100644 index 0000000000..6c693e3d7e --- /dev/null +++ b/tools/fuzzing/libfuzzer/FuzzerUtilWindows.cpp @@ -0,0 +1,221 @@ +//===- FuzzerUtilWindows.cpp - Misc utils for Windows. --------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Misc utils implementation for Windows. +//===----------------------------------------------------------------------===// +#include "FuzzerPlatform.h" +#if LIBFUZZER_WINDOWS +#include "FuzzerCommand.h" +#include "FuzzerIO.h" +#include "FuzzerInternal.h" +#include <cassert> +#include <chrono> +#include <cstring> +#include <errno.h> +#include <io.h> +#include <iomanip> +#include <signal.h> +#include <stdio.h> +#include <sys/types.h> +#include <windows.h> + +// This must be included after windows.h. +#include <psapi.h> + +namespace fuzzer { + +static const FuzzingOptions* HandlerOpt = nullptr; + +static LONG CALLBACK ExceptionHandler(PEXCEPTION_POINTERS ExceptionInfo) { + switch (ExceptionInfo->ExceptionRecord->ExceptionCode) { + case EXCEPTION_ACCESS_VIOLATION: + case EXCEPTION_ARRAY_BOUNDS_EXCEEDED: + case EXCEPTION_STACK_OVERFLOW: + if (HandlerOpt->HandleSegv) + Fuzzer::StaticCrashSignalCallback(); + break; + case EXCEPTION_DATATYPE_MISALIGNMENT: + case EXCEPTION_IN_PAGE_ERROR: + if (HandlerOpt->HandleBus) + Fuzzer::StaticCrashSignalCallback(); + break; + case EXCEPTION_ILLEGAL_INSTRUCTION: + case EXCEPTION_PRIV_INSTRUCTION: + if (HandlerOpt->HandleIll) + Fuzzer::StaticCrashSignalCallback(); + break; + case EXCEPTION_FLT_DENORMAL_OPERAND: + case EXCEPTION_FLT_DIVIDE_BY_ZERO: + case EXCEPTION_FLT_INEXACT_RESULT: + case EXCEPTION_FLT_INVALID_OPERATION: + case EXCEPTION_FLT_OVERFLOW: + case EXCEPTION_FLT_STACK_CHECK: + case EXCEPTION_FLT_UNDERFLOW: + case EXCEPTION_INT_DIVIDE_BY_ZERO: + case EXCEPTION_INT_OVERFLOW: + if (HandlerOpt->HandleFpe) + Fuzzer::StaticCrashSignalCallback(); + break; + // TODO: handle (Options.HandleXfsz) + } + return EXCEPTION_CONTINUE_SEARCH; +} + +BOOL WINAPI CtrlHandler(DWORD dwCtrlType) { + switch (dwCtrlType) { + case CTRL_C_EVENT: + if (HandlerOpt->HandleInt) + Fuzzer::StaticInterruptCallback(); + return TRUE; + case CTRL_BREAK_EVENT: + if (HandlerOpt->HandleTerm) + Fuzzer::StaticInterruptCallback(); + return TRUE; + } + return FALSE; +} + +void CALLBACK AlarmHandler(PVOID, BOOLEAN) { + Fuzzer::StaticAlarmCallback(); +} + +class TimerQ { + HANDLE TimerQueue; + public: + TimerQ() : TimerQueue(NULL) {} + ~TimerQ() { + if (TimerQueue) + DeleteTimerQueueEx(TimerQueue, NULL); + } + void SetTimer(int Seconds) { + if (!TimerQueue) { + TimerQueue = CreateTimerQueue(); + if (!TimerQueue) { + Printf("libFuzzer: CreateTimerQueue failed.\n"); + exit(1); + } + } + HANDLE Timer; + if (!CreateTimerQueueTimer(&Timer, TimerQueue, AlarmHandler, NULL, + Seconds*1000, Seconds*1000, 0)) { + Printf("libFuzzer: CreateTimerQueueTimer failed.\n"); + exit(1); + } + } +}; + +static TimerQ Timer; + +static void CrashHandler(int) { Fuzzer::StaticCrashSignalCallback(); } + +void SetSignalHandler(const FuzzingOptions& Options) { + HandlerOpt = &Options; + + if (Options.UnitTimeoutSec > 0) + Timer.SetTimer(Options.UnitTimeoutSec / 2 + 1); + + if (Options.HandleInt || Options.HandleTerm) + if (!SetConsoleCtrlHandler(CtrlHandler, TRUE)) { + DWORD LastError = GetLastError(); + Printf("libFuzzer: SetConsoleCtrlHandler failed (Error code: %lu).\n", + LastError); + exit(1); + } + + if (Options.HandleSegv || Options.HandleBus || Options.HandleIll || + Options.HandleFpe) + SetUnhandledExceptionFilter(ExceptionHandler); + + if (Options.HandleAbrt) + if (SIG_ERR == signal(SIGABRT, CrashHandler)) { + Printf("libFuzzer: signal failed with %d\n", errno); + exit(1); + } +} + +void SleepSeconds(int Seconds) { Sleep(Seconds * 1000); } + +unsigned long GetPid() { return GetCurrentProcessId(); } + +size_t GetPeakRSSMb() { + PROCESS_MEMORY_COUNTERS info; + if (!GetProcessMemoryInfo(GetCurrentProcess(), &info, sizeof(info))) + return 0; + return info.PeakWorkingSetSize >> 20; +} + +FILE *OpenProcessPipe(const char *Command, const char *Mode) { + return _popen(Command, Mode); +} + +int CloseProcessPipe(FILE *F) { + return _pclose(F); +} + +int ExecuteCommand(const Command &Cmd) { + std::string CmdLine = Cmd.toString(); + return system(CmdLine.c_str()); +} + +bool ExecuteCommand(const Command &Cmd, std::string *CmdOutput) { + FILE *Pipe = _popen(Cmd.toString().c_str(), "r"); + if (!Pipe) + return false; + + if (CmdOutput) { + char TmpBuffer[128]; + while (fgets(TmpBuffer, sizeof(TmpBuffer), Pipe)) + CmdOutput->append(TmpBuffer); + } + return _pclose(Pipe) == 0; +} + +const void *SearchMemory(const void *Data, size_t DataLen, const void *Patt, + size_t PattLen) { + // TODO: make this implementation more efficient. + const char *Cdata = (const char *)Data; + const char *Cpatt = (const char *)Patt; + + if (!Data || !Patt || DataLen == 0 || PattLen == 0 || DataLen < PattLen) + return NULL; + + if (PattLen == 1) + return memchr(Data, *Cpatt, DataLen); + + const char *End = Cdata + DataLen - PattLen + 1; + + for (const char *It = Cdata; It < End; ++It) + if (It[0] == Cpatt[0] && memcmp(It, Cpatt, PattLen) == 0) + return It; + + return NULL; +} + +std::string DisassembleCmd(const std::string &FileName) { + Vector<std::string> command_vector; + command_vector.push_back("dumpbin /summary > nul"); + if (ExecuteCommand(Command(command_vector)) == 0) + return "dumpbin /disasm " + FileName; + Printf("libFuzzer: couldn't find tool to disassemble (dumpbin)\n"); + exit(1); +} + +std::string SearchRegexCmd(const std::string &Regex) { + return "findstr /r \"" + Regex + "\""; +} + +void DiscardOutput(int Fd) { + FILE* Temp = fopen("nul", "w"); + if (!Temp) + return; + _dup2(_fileno(Temp), Fd); + fclose(Temp); +} + +} // namespace fuzzer + +#endif // LIBFUZZER_WINDOWS diff --git a/tools/fuzzing/libfuzzer/FuzzerValueBitMap.h b/tools/fuzzing/libfuzzer/FuzzerValueBitMap.h new file mode 100644 index 0000000000..ddbfe200af --- /dev/null +++ b/tools/fuzzing/libfuzzer/FuzzerValueBitMap.h @@ -0,0 +1,73 @@ +//===- FuzzerValueBitMap.h - INTERNAL - Bit map -----------------*- C++ -* ===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// ValueBitMap. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_FUZZER_VALUE_BIT_MAP_H +#define LLVM_FUZZER_VALUE_BIT_MAP_H + +#include "FuzzerPlatform.h" +#include <cstdint> + +namespace fuzzer { + +// A bit map containing kMapSizeInWords bits. +struct ValueBitMap { + static const size_t kMapSizeInBits = 1 << 16; + static const size_t kMapPrimeMod = 65371; // Largest Prime < kMapSizeInBits; + static const size_t kBitsInWord = (sizeof(uintptr_t) * 8); + static const size_t kMapSizeInWords = kMapSizeInBits / kBitsInWord; + public: + + // Clears all bits. + void Reset() { memset(Map, 0, sizeof(Map)); } + + // Computes a hash function of Value and sets the corresponding bit. + // Returns true if the bit was changed from 0 to 1. + ATTRIBUTE_NO_SANITIZE_ALL + inline bool AddValue(uintptr_t Value) { + uintptr_t Idx = Value % kMapSizeInBits; + uintptr_t WordIdx = Idx / kBitsInWord; + uintptr_t BitIdx = Idx % kBitsInWord; + uintptr_t Old = Map[WordIdx]; + uintptr_t New = Old | (1ULL << BitIdx); + Map[WordIdx] = New; + return New != Old; + } + + ATTRIBUTE_NO_SANITIZE_ALL + inline bool AddValueModPrime(uintptr_t Value) { + return AddValue(Value % kMapPrimeMod); + } + + inline bool Get(uintptr_t Idx) { + assert(Idx < kMapSizeInBits); + uintptr_t WordIdx = Idx / kBitsInWord; + uintptr_t BitIdx = Idx % kBitsInWord; + return Map[WordIdx] & (1ULL << BitIdx); + } + + size_t SizeInBits() const { return kMapSizeInBits; } + + template <class Callback> + ATTRIBUTE_NO_SANITIZE_ALL + void ForEach(Callback CB) const { + for (size_t i = 0; i < kMapSizeInWords; i++) + if (uintptr_t M = Map[i]) + for (size_t j = 0; j < sizeof(M) * 8; j++) + if (M & ((uintptr_t)1 << j)) + CB(i * sizeof(M) * 8 + j); + } + + private: + ATTRIBUTE_ALIGNED(512) uintptr_t Map[kMapSizeInWords]; +}; + +} // namespace fuzzer + +#endif // LLVM_FUZZER_VALUE_BIT_MAP_H diff --git a/tools/fuzzing/libfuzzer/LICENSE.TXT b/tools/fuzzing/libfuzzer/LICENSE.TXT new file mode 100644 index 0000000000..555c8bb952 --- /dev/null +++ b/tools/fuzzing/libfuzzer/LICENSE.TXT @@ -0,0 +1,68 @@ +============================================================================== +LLVM Release License +============================================================================== +University of Illinois/NCSA +Open Source License + +Copyright (c) 2003-2016 University of Illinois at Urbana-Champaign. +All rights reserved. + +Developed by: + + LLVM Team + + University of Illinois at Urbana-Champaign + + http://llvm.org + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal with +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimers. + + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimers in the + documentation and/or other materials provided with the distribution. + + * Neither the names of the LLVM Team, University of Illinois at + Urbana-Champaign, nor the names of its contributors may be used to + endorse or promote products derived from this Software without specific + prior written permission. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE +SOFTWARE. + +============================================================================== +Copyrights and Licenses for Third Party Software Distributed with LLVM: +============================================================================== +The LLVM software contains code written by third parties. Such software will +have its own individual LICENSE.TXT file in the directory in which it appears. +This file will describe the copyrights, license, and restrictions which apply +to that code. + +The disclaimer of warranty in the University of Illinois Open Source License +applies to all code in the LLVM Distribution, and nothing in any of the +other licenses gives permission to use the names of the LLVM Team or the +University of Illinois to endorse or promote products derived from this +Software. + +The following pieces of software have additional or alternate copyrights, +licenses, and/or restrictions: + +Program Directory +------- --------- +Google Test llvm/utils/unittest/googletest +OpenBSD regex llvm/lib/Support/{reg*, COPYRIGHT.regex} +pyyaml tests llvm/test/YAMLParser/{*.data, LICENSE.TXT} +ARM contributions llvm/lib/Target/ARM/LICENSE.TXT +md5 contributions llvm/lib/Support/MD5.cpp llvm/include/llvm/Support/MD5.h diff --git a/tools/fuzzing/libfuzzer/clone_libfuzzer.sh b/tools/fuzzing/libfuzzer/clone_libfuzzer.sh new file mode 100755 index 0000000000..ca01df2978 --- /dev/null +++ b/tools/fuzzing/libfuzzer/clone_libfuzzer.sh @@ -0,0 +1,36 @@ +#!/bin/bash -e + +# Optionally get revision from cmd line +[ $1 ] && REVISION=$1 || REVISION=76d07503f0c69f6632e6d8d4736e2a4cb4055a92 + +mkdir tmp +git clone --single-branch --no-checkout --shallow-since "2020-07-01" https://github.com/llvm/llvm-project tmp + +(cd tmp && git reset --hard $REVISION) + +# libFuzzer source files +CPPS=($(ls tmp/compiler-rt/lib/fuzzer/*.cpp | sort -r)) +CPPS=(${CPPS[@]##*/}) +CPPS=(${CPPS[@]##FuzzerMain*}) # ignored +CPPS=(${CPPS[@]##FuzzerInterceptors*}) # ignored + +# Update SOURCES entries +sed -e "/^SOURCES/,/^]/ {/'/d}" -i moz.build +for CPP in ${CPPS[@]}; do sed -e "/^SOURCES/ a \\ '${CPP}'," -i moz.build; done + +# Remove previous files +rm *.{cpp,h,def} + +# Copy files +cp tmp/compiler-rt/lib/fuzzer/*.{cpp,h,def} . + +# Apply local patches +for patch in patches/*.patch +do + patch -p4 < $patch +done + +# Remove the temporary directory +rm -Rf tmp/ + +echo "Updated libFuzzer to ${REVISION}" diff --git a/tools/fuzzing/libfuzzer/moz.build b/tools/fuzzing/libfuzzer/moz.build new file mode 100644 index 0000000000..a04169df57 --- /dev/null +++ b/tools/fuzzing/libfuzzer/moz.build @@ -0,0 +1,55 @@ +# -*- Mode: python; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +Library('fuzzer') + +EXPORTS += [ + 'FuzzerDefs.h', + 'FuzzerExtFunctions.def', + 'FuzzerExtFunctions.h', +] + +SOURCES += [ + 'FuzzerCrossOver.cpp', + 'FuzzerDataFlowTrace.cpp', + 'FuzzerDriver.cpp', + 'FuzzerExtFunctionsDlsym.cpp', + 'FuzzerExtFunctionsWeak.cpp', + 'FuzzerExtFunctionsWindows.cpp', + 'FuzzerExtraCounters.cpp', + 'FuzzerFork.cpp', + 'FuzzerIO.cpp', + 'FuzzerIOPosix.cpp', + 'FuzzerIOWindows.cpp', + 'FuzzerLoop.cpp', + 'FuzzerMerge.cpp', + 'FuzzerMutate.cpp', + 'FuzzerSHA1.cpp', + 'FuzzerTracePC.cpp', + 'FuzzerUtil.cpp', + 'FuzzerUtilDarwin.cpp', + 'FuzzerUtilFuchsia.cpp', + 'FuzzerUtilLinux.cpp', + 'FuzzerUtilPosix.cpp', + 'FuzzerUtilWindows.cpp', +] + +if CONFIG['CC_TYPE'] == 'clang': + CXXFLAGS += ['-Wno-unreachable-code-return'] + +# According to the LLVM docs, LibFuzzer isn't supposed to be built with any +# sanitizer flags and in fact, building it with ASan coverage currently causes +# Clang 3.9+ to crash, so we filter out all sanitizer-related flags here. +for flags_var in ('OS_CFLAGS', 'OS_CXXFLAGS'): + COMPILE_FLAGS[flags_var] = [ + f for f in COMPILE_FLAGS.get(flags_var, []) + if not f.startswith('-fsanitize') + ] + +LINK_FLAGS['OS'] = [ + f for f in LINK_FLAGS.get('OS', []) + if not f.startswith('-fsanitize') +] diff --git a/tools/fuzzing/libfuzzer/patches/10-ef-runtime.patch b/tools/fuzzing/libfuzzer/patches/10-ef-runtime.patch new file mode 100644 index 0000000000..34562e27a5 --- /dev/null +++ b/tools/fuzzing/libfuzzer/patches/10-ef-runtime.patch @@ -0,0 +1,31 @@ +# HG changeset patch +# User Christian Holler <choller@mozilla.com> +# Date 1596126054 -7200 +# Thu Jul 30 18:20:54 2020 +0200 +# Node ID 8a2a26b33d516c43c366b2f24d731d27d9843349 +# Parent 997c4109edd112695097fd8c55cbacd976cab24a +[libFuzzer] Allow external functions to be defined at runtime + +diff --git a/tools/fuzzing/libfuzzer/FuzzerDriver.cpp b/tools/fuzzing/libfuzzer/FuzzerDriver.cpp +--- a/tools/fuzzing/libfuzzer/FuzzerDriver.cpp ++++ b/tools/fuzzing/libfuzzer/FuzzerDriver.cpp +@@ -608,17 +608,18 @@ static Vector<SizedFile> ReadCorpora(con + SizedFiles.push_back({File, Size}); + return SizedFiles; + } + + int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) { + using namespace fuzzer; + assert(argc && argv && "Argument pointers cannot be nullptr"); + std::string Argv0((*argv)[0]); +- EF = new ExternalFunctions(); ++ if (!EF) ++ EF = new ExternalFunctions(); + if (EF->LLVMFuzzerInitialize) + EF->LLVMFuzzerInitialize(argc, argv); + if (EF->__msan_scoped_disable_interceptor_checks) + EF->__msan_scoped_disable_interceptor_checks(); + const Vector<std::string> Args(*argv, *argv + *argc); + assert(!Args.empty()); + ProgName = new std::string(Args[0]); + if (Argv0 != *ProgName) { diff --git a/tools/fuzzing/libfuzzer/patches/11-callback-rv.patch b/tools/fuzzing/libfuzzer/patches/11-callback-rv.patch new file mode 100644 index 0000000000..650444442c --- /dev/null +++ b/tools/fuzzing/libfuzzer/patches/11-callback-rv.patch @@ -0,0 +1,132 @@ +# HG changeset patch +# User Christian Holler <choller@mozilla.com> +# Date 1596126448 -7200 +# Thu Jul 30 18:27:28 2020 +0200 +# Node ID ea198a0331a6db043cb5978512226977514104db +# Parent 8a2a26b33d516c43c366b2f24d731d27d9843349 +[libFuzzer] Change libFuzzer callback contract to allow positive return values + +diff --git a/tools/fuzzing/libfuzzer/FuzzerInternal.h b/tools/fuzzing/libfuzzer/FuzzerInternal.h +--- a/tools/fuzzing/libfuzzer/FuzzerInternal.h ++++ b/tools/fuzzing/libfuzzer/FuzzerInternal.h +@@ -60,17 +60,17 @@ public: + + static void StaticAlarmCallback(); + static void StaticCrashSignalCallback(); + static void StaticExitCallback(); + static void StaticInterruptCallback(); + static void StaticFileSizeExceedCallback(); + static void StaticGracefulExitCallback(); + +- void ExecuteCallback(const uint8_t *Data, size_t Size); ++ int ExecuteCallback(const uint8_t *Data, size_t Size); + bool RunOne(const uint8_t *Data, size_t Size, bool MayDeleteFile = false, + InputInfo *II = nullptr, bool *FoundUniqFeatures = nullptr); + + // Merge Corpora[1:] into Corpora[0]. + void Merge(const Vector<std::string> &Corpora); + void CrashResistantMergeInternalStep(const std::string &ControlFilePath); + MutationDispatcher &GetMD() { return MD; } + void PrintFinalStats(); +diff --git a/tools/fuzzing/libfuzzer/FuzzerLoop.cpp b/tools/fuzzing/libfuzzer/FuzzerLoop.cpp +--- a/tools/fuzzing/libfuzzer/FuzzerLoop.cpp ++++ b/tools/fuzzing/libfuzzer/FuzzerLoop.cpp +@@ -463,17 +463,19 @@ static void RenameFeatureSetFile(const s + DirPlusFile(FeaturesDir, NewFile)); + } + + bool Fuzzer::RunOne(const uint8_t *Data, size_t Size, bool MayDeleteFile, + InputInfo *II, bool *FoundUniqFeatures) { + if (!Size) + return false; + +- ExecuteCallback(Data, Size); ++ if (ExecuteCallback(Data, Size) > 0) { ++ return false; ++ } + + UniqFeatureSetTmp.clear(); + size_t FoundUniqFeaturesOfII = 0; + size_t NumUpdatesBefore = Corpus.NumFeatureUpdates(); + TPC.CollectFeatures([&](size_t Feature) { + if (Corpus.AddFeature(Feature, Size, Options.Shrink)) + UniqFeatureSetTmp.push_back(Feature); + if (Options.Entropic) +@@ -530,48 +532,49 @@ static bool LooseMemeq(const uint8_t *A, + const size_t Limit = 64; + if (Size <= 64) + return !memcmp(A, B, Size); + // Compare first and last Limit/2 bytes. + return !memcmp(A, B, Limit / 2) && + !memcmp(A + Size - Limit / 2, B + Size - Limit / 2, Limit / 2); + } + +-void Fuzzer::ExecuteCallback(const uint8_t *Data, size_t Size) { ++int Fuzzer::ExecuteCallback(const uint8_t *Data, size_t Size) { + TPC.RecordInitialStack(); + TotalNumberOfRuns++; + assert(InFuzzingThread()); + // We copy the contents of Unit into a separate heap buffer + // so that we reliably find buffer overflows in it. + uint8_t *DataCopy = new uint8_t[Size]; + memcpy(DataCopy, Data, Size); + if (EF->__msan_unpoison) + EF->__msan_unpoison(DataCopy, Size); + if (EF->__msan_unpoison_param) + EF->__msan_unpoison_param(2); + if (CurrentUnitData && CurrentUnitData != Data) + memcpy(CurrentUnitData, Data, Size); + CurrentUnitSize = Size; ++ int Res = 0; + { + ScopedEnableMsanInterceptorChecks S; + AllocTracer.Start(Options.TraceMalloc); + UnitStartTime = system_clock::now(); + TPC.ResetMaps(); + RunningUserCallback = true; +- int Res = CB(DataCopy, Size); ++ Res = CB(DataCopy, Size); + RunningUserCallback = false; + UnitStopTime = system_clock::now(); +- (void)Res; +- assert(Res == 0); ++ assert(Res >= 0); + HasMoreMallocsThanFrees = AllocTracer.Stop(); + } + if (!LooseMemeq(DataCopy, Data, Size)) + CrashOnOverwrittenData(); + CurrentUnitSize = 0; + delete[] DataCopy; ++ return Res; + } + + std::string Fuzzer::WriteToOutputCorpus(const Unit &U) { + if (Options.OnlyASCII) + assert(IsASCII(U)); + if (Options.OutputCorpus.empty()) + return ""; + std::string Path = DirPlusFile(Options.OutputCorpus, Hash(U)); +diff --git a/tools/fuzzing/libfuzzer/FuzzerMerge.cpp b/tools/fuzzing/libfuzzer/FuzzerMerge.cpp +--- a/tools/fuzzing/libfuzzer/FuzzerMerge.cpp ++++ b/tools/fuzzing/libfuzzer/FuzzerMerge.cpp +@@ -223,17 +223,19 @@ void Fuzzer::CrashResistantMergeInternal + U.shrink_to_fit(); + } + + // Write the pre-run marker. + OF << "STARTED " << i << " " << U.size() << "\n"; + OF.flush(); // Flush is important since Command::Execute may crash. + // Run. + TPC.ResetMaps(); +- ExecuteCallback(U.data(), U.size()); ++ if (ExecuteCallback(U.data(), U.size()) > 0) { ++ continue; ++ } + // Collect coverage. We are iterating over the files in this order: + // * First, files in the initial corpus ordered by size, smallest first. + // * Then, all other files, smallest first. + // So it makes no sense to record all features for all files, instead we + // only record features that were not seen before. + Set<size_t> UniqFeatures; + TPC.CollectFeatures([&](size_t Feature) { + if (AllFeatures.insert(Feature).second) diff --git a/tools/fuzzing/libfuzzer/patches/12-custom-mutator-fail.patch b/tools/fuzzing/libfuzzer/patches/12-custom-mutator-fail.patch new file mode 100644 index 0000000000..2457c1f046 --- /dev/null +++ b/tools/fuzzing/libfuzzer/patches/12-custom-mutator-fail.patch @@ -0,0 +1,53 @@ +# HG changeset patch +# User Christian Holler <choller@mozilla.com> +# Date 1596126768 -7200 +# Thu Jul 30 18:32:48 2020 +0200 +# Node ID 64e7d096fa77a62b71a306b2c5383b8f75ac4945 +# Parent ea198a0331a6db043cb5978512226977514104db +[libFuzzer] Allow custom mutators to fail + +diff --git a/tools/fuzzing/libfuzzer/FuzzerLoop.cpp b/tools/fuzzing/libfuzzer/FuzzerLoop.cpp +--- a/tools/fuzzing/libfuzzer/FuzzerLoop.cpp ++++ b/tools/fuzzing/libfuzzer/FuzzerLoop.cpp +@@ -690,16 +690,20 @@ void Fuzzer::MutateAndTestOne() { + if (II.HasFocusFunction && !II.DataFlowTraceForFocusFunction.empty() && + Size <= CurrentMaxMutationLen) + NewSize = MD.MutateWithMask(CurrentUnitData, Size, Size, + II.DataFlowTraceForFocusFunction); + + // If MutateWithMask either failed or wasn't called, call default Mutate. + if (!NewSize) + NewSize = MD.Mutate(CurrentUnitData, Size, CurrentMaxMutationLen); ++ ++ if (!NewSize) ++ continue; ++ + assert(NewSize > 0 && "Mutator returned empty unit"); + assert(NewSize <= CurrentMaxMutationLen && "Mutator return oversized unit"); + Size = NewSize; + II.NumExecutedMutations++; + Corpus.IncrementNumExecutedMutations(); + + bool FoundUniqFeatures = false; + bool NewCov = RunOne(CurrentUnitData, Size, /*MayDeleteFile=*/true, &II, +@@ -850,17 +854,19 @@ void Fuzzer::Loop(Vector<SizedFile> &Cor + void Fuzzer::MinimizeCrashLoop(const Unit &U) { + if (U.size() <= 1) + return; + while (!TimedOut() && TotalNumberOfRuns < Options.MaxNumberOfRuns) { + MD.StartMutationSequence(); + memcpy(CurrentUnitData, U.data(), U.size()); + for (int i = 0; i < Options.MutateDepth; i++) { + size_t NewSize = MD.Mutate(CurrentUnitData, U.size(), MaxMutationLen); +- assert(NewSize > 0 && NewSize <= MaxMutationLen); ++ assert(NewSize <= MaxMutationLen); ++ if (!NewSize) ++ continue; + ExecuteCallback(CurrentUnitData, NewSize); + PrintPulseAndReportSlowInput(CurrentUnitData, NewSize); + TryDetectingAMemoryLeak(CurrentUnitData, NewSize, + /*DuringInitialCorpusExecution*/ false); + } + } + } + diff --git a/tools/fuzzing/libfuzzer/patches/13-unused-write.patch b/tools/fuzzing/libfuzzer/patches/13-unused-write.patch new file mode 100644 index 0000000000..f8ef7a5ac9 --- /dev/null +++ b/tools/fuzzing/libfuzzer/patches/13-unused-write.patch @@ -0,0 +1,88 @@ +# HG changeset patch +# User Christian Holler <choller@mozilla.com> +# Date 1596126946 -7200 +# Thu Jul 30 18:35:46 2020 +0200 +# Node ID 6c779ec81530b6784a714063af66085681ab7318 +# Parent 64e7d096fa77a62b71a306b2c5383b8f75ac4945 +[libFuzzer] Suppress warnings about unused return values + +diff --git a/tools/fuzzing/libfuzzer/FuzzerIO.cpp b/tools/fuzzing/libfuzzer/FuzzerIO.cpp +--- a/tools/fuzzing/libfuzzer/FuzzerIO.cpp ++++ b/tools/fuzzing/libfuzzer/FuzzerIO.cpp +@@ -3,16 +3,17 @@ + // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + // See https://llvm.org/LICENSE.txt for license information. + // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + // + //===----------------------------------------------------------------------===// + // IO functions. + //===----------------------------------------------------------------------===// + ++#include "mozilla/Unused.h" + #include "FuzzerDefs.h" + #include "FuzzerExtFunctions.h" + #include "FuzzerIO.h" + #include "FuzzerUtil.h" + #include <algorithm> + #include <cstdarg> + #include <fstream> + #include <iterator> +@@ -68,17 +69,17 @@ void WriteToFile(const std::string &Data + WriteToFile(reinterpret_cast<const uint8_t *>(Data.c_str()), Data.size(), + Path); + } + + void WriteToFile(const uint8_t *Data, size_t Size, const std::string &Path) { + // Use raw C interface because this function may be called from a sig handler. + FILE *Out = fopen(Path.c_str(), "wb"); + if (!Out) return; +- fwrite(Data, sizeof(Data[0]), Size, Out); ++ mozilla::Unused << fwrite(Data, sizeof(Data[0]), Size, Out); + fclose(Out); + } + + void ReadDirToVectorOfUnits(const char *Path, Vector<Unit> *V, + long *Epoch, size_t MaxSize, bool ExitOnError) { + long E = Epoch ? *Epoch : 0; + Vector<std::string> Files; + ListFilesInDirRecursive(Path, Epoch, &Files, /*TopDir*/true); +diff --git a/tools/fuzzing/libfuzzer/FuzzerIOPosix.cpp b/tools/fuzzing/libfuzzer/FuzzerIOPosix.cpp +--- a/tools/fuzzing/libfuzzer/FuzzerIOPosix.cpp ++++ b/tools/fuzzing/libfuzzer/FuzzerIOPosix.cpp +@@ -2,16 +2,17 @@ + // + // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + // See https://llvm.org/LICENSE.txt for license information. + // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + // + //===----------------------------------------------------------------------===// + // IO functions implementation using Posix API. + //===----------------------------------------------------------------------===// ++#include "mozilla/Unused.h" + #include "FuzzerPlatform.h" + #if LIBFUZZER_POSIX || LIBFUZZER_FUCHSIA + + #include "FuzzerExtFunctions.h" + #include "FuzzerIO.h" + #include <cstdarg> + #include <cstdio> + #include <dirent.h> +@@ -150,17 +151,17 @@ bool IsInterestingCoverageFile(const std + if (FileName.find("/usr/include/") != std::string::npos) + return false; + if (FileName == "<null>") + return false; + return true; + } + + void RawPrint(const char *Str) { +- write(2, Str, strlen(Str)); ++ mozilla::Unused << write(2, Str, strlen(Str)); + } + + void MkDir(const std::string &Path) { + mkdir(Path.c_str(), 0700); + } + + void RmDir(const std::string &Path) { + rmdir(Path.c_str()); diff --git a/tools/fuzzing/libfuzzer/patches/14-explicit-allocator.patch b/tools/fuzzing/libfuzzer/patches/14-explicit-allocator.patch new file mode 100644 index 0000000000..8e02273193 --- /dev/null +++ b/tools/fuzzing/libfuzzer/patches/14-explicit-allocator.patch @@ -0,0 +1,30 @@ +# HG changeset patch +# User Christian Holler <choller@mozilla.com> +# Date 1596126981 -7200 +# Thu Jul 30 18:36:21 2020 +0200 +# Node ID 069dfa3715b1d30905ff0ea1c0f66db88ce146f9 +# Parent 6c779ec81530b6784a714063af66085681ab7318 +[libFuzzer] Make fuzzer_allocator explicit + +diff --git a/tools/fuzzing/libfuzzer/FuzzerDefs.h b/tools/fuzzing/libfuzzer/FuzzerDefs.h +--- a/tools/fuzzing/libfuzzer/FuzzerDefs.h ++++ b/tools/fuzzing/libfuzzer/FuzzerDefs.h +@@ -41,17 +41,17 @@ extern ExternalFunctions *EF; + // We are using a custom allocator to give a different symbol name to STL + // containers in order to avoid ODR violations. + template<typename T> + class fuzzer_allocator: public std::allocator<T> { + public: + fuzzer_allocator() = default; + + template<class U> +- fuzzer_allocator(const fuzzer_allocator<U>&) {} ++ explicit fuzzer_allocator(const fuzzer_allocator<U>&) {} + + template<class Other> + struct rebind { typedef fuzzer_allocator<Other> other; }; + }; + + template<typename T> + using Vector = std::vector<T, fuzzer_allocator<T>>; + diff --git a/tools/fuzzing/libfuzzer/patches/15-return-to-exit.patch b/tools/fuzzing/libfuzzer/patches/15-return-to-exit.patch new file mode 100644 index 0000000000..4adb9f59a9 --- /dev/null +++ b/tools/fuzzing/libfuzzer/patches/15-return-to-exit.patch @@ -0,0 +1,968 @@ +commit f80733b3b1b5e05e7dfd7a071f60050fe20108c3 +Author: Jesse Schwartzentruber <truber@mozilla.com> +Date: Mon Mar 1 15:47:38 2021 -0500 + + [libfuzzer] In most cases, return instead of exit(). + +diff --git a/tools/fuzzing/libfuzzer/FuzzerDataFlowTrace.cpp b/tools/fuzzing/libfuzzer/FuzzerDataFlowTrace.cpp +index 0e9cdf7e66b1..06ea287a3cfe 100644 +--- a/tools/fuzzing/libfuzzer/FuzzerDataFlowTrace.cpp ++++ b/tools/fuzzing/libfuzzer/FuzzerDataFlowTrace.cpp +@@ -102,9 +102,11 @@ Vector<double> BlockCoverage::FunctionWeights(size_t NumFunctions) const { + return Res; + } + +-void DataFlowTrace::ReadCoverage(const std::string &DirPath) { ++int DataFlowTrace::ReadCoverage(const std::string &DirPath) { + Vector<SizedFile> Files; +- GetSizedFilesFromDir(DirPath, &Files); ++ int Res = GetSizedFilesFromDir(DirPath, &Files); ++ if (Res != 0) ++ return Res; + for (auto &SF : Files) { + auto Name = Basename(SF.File); + if (Name == kFunctionsTxt) continue; +@@ -112,6 +114,7 @@ void DataFlowTrace::ReadCoverage(const std::string &DirPath) { + std::ifstream IF(SF.File); + Coverage.AppendCoverage(IF); + } ++ return 0; + } + + static void DFTStringAppendToVector(Vector<uint8_t> *DFT, +@@ -157,12 +160,14 @@ static bool ParseDFTLine(const std::string &Line, size_t *FunctionNum, + return true; + } + +-bool DataFlowTrace::Init(const std::string &DirPath, std::string *FocusFunction, ++int DataFlowTrace::Init(const std::string &DirPath, std::string *FocusFunction, + Vector<SizedFile> &CorporaFiles, Random &Rand) { +- if (DirPath.empty()) return false; ++ if (DirPath.empty()) return 0; + Printf("INFO: DataFlowTrace: reading from '%s'\n", DirPath.c_str()); + Vector<SizedFile> Files; +- GetSizedFilesFromDir(DirPath, &Files); ++ int Res = GetSizedFilesFromDir(DirPath, &Files); ++ if (Res != 0) ++ return Res; + std::string L; + size_t FocusFuncIdx = SIZE_MAX; + Vector<std::string> FunctionNames; +@@ -181,14 +186,16 @@ bool DataFlowTrace::Init(const std::string &DirPath, std::string *FocusFunction, + FocusFuncIdx = NumFunctions - 1; + } + if (!NumFunctions) +- return false; ++ return 0; + + if (*FocusFunction == "auto") { + // AUTOFOCUS works like this: + // * reads the coverage data from the DFT files. + // * assigns weights to functions based on coverage. + // * chooses a random function according to the weights. +- ReadCoverage(DirPath); ++ Res = ReadCoverage(DirPath); ++ if (Res != 0) ++ return Res; + auto Weights = Coverage.FunctionWeights(NumFunctions); + Vector<double> Intervals(NumFunctions + 1); + std::iota(Intervals.begin(), Intervals.end(), 0); +@@ -209,7 +216,7 @@ bool DataFlowTrace::Init(const std::string &DirPath, std::string *FocusFunction, + } + + if (!NumFunctions || FocusFuncIdx == SIZE_MAX || Files.size() <= 1) +- return false; ++ return 0; + + // Read traces. + size_t NumTraceFiles = 0; +@@ -228,8 +235,10 @@ bool DataFlowTrace::Init(const std::string &DirPath, std::string *FocusFunction, + FunctionNum == FocusFuncIdx) { + NumTracesWithFocusFunction++; + +- if (FunctionNum >= NumFunctions) +- return ParseError("N is greater than the number of functions", L); ++ if (FunctionNum >= NumFunctions) { ++ ParseError("N is greater than the number of functions", L); ++ return 0; ++ } + Traces[Name] = DFTStringToVector(DFTString); + // Print just a few small traces. + if (NumTracesWithFocusFunction <= 3 && DFTString.size() <= 16) +@@ -241,7 +250,7 @@ bool DataFlowTrace::Init(const std::string &DirPath, std::string *FocusFunction, + Printf("INFO: DataFlowTrace: %zd trace files, %zd functions, " + "%zd traces with focus function\n", + NumTraceFiles, NumFunctions, NumTracesWithFocusFunction); +- return NumTraceFiles > 0; ++ return 0; + } + + int CollectDataFlow(const std::string &DFTBinary, const std::string &DirPath, +diff --git a/tools/fuzzing/libfuzzer/FuzzerDataFlowTrace.h b/tools/fuzzing/libfuzzer/FuzzerDataFlowTrace.h +index d6e3de30a4ef..767bad24f1d0 100644 +--- a/tools/fuzzing/libfuzzer/FuzzerDataFlowTrace.h ++++ b/tools/fuzzing/libfuzzer/FuzzerDataFlowTrace.h +@@ -113,8 +113,8 @@ class BlockCoverage { + + class DataFlowTrace { + public: +- void ReadCoverage(const std::string &DirPath); +- bool Init(const std::string &DirPath, std::string *FocusFunction, ++ int ReadCoverage(const std::string &DirPath); ++ int Init(const std::string &DirPath, std::string *FocusFunction, + Vector<SizedFile> &CorporaFiles, Random &Rand); + void Clear() { Traces.clear(); } + const Vector<uint8_t> *Get(const std::string &InputSha1) const { +diff --git a/tools/fuzzing/libfuzzer/FuzzerDriver.cpp b/tools/fuzzing/libfuzzer/FuzzerDriver.cpp +index cd720200848b..bedad16efa7b 100644 +--- a/tools/fuzzing/libfuzzer/FuzzerDriver.cpp ++++ b/tools/fuzzing/libfuzzer/FuzzerDriver.cpp +@@ -326,7 +326,7 @@ int CleanseCrashInput(const Vector<std::string> &Args, + if (Inputs->size() != 1 || !Flags.exact_artifact_path) { + Printf("ERROR: -cleanse_crash should be given one input file and" + " -exact_artifact_path\n"); +- exit(1); ++ return 1; + } + std::string InputFilePath = Inputs->at(0); + std::string OutputFilePath = Flags.exact_artifact_path; +@@ -380,7 +380,7 @@ int MinimizeCrashInput(const Vector<std::string> &Args, + const FuzzingOptions &Options) { + if (Inputs->size() != 1) { + Printf("ERROR: -minimize_crash should be given one input file\n"); +- exit(1); ++ return 1; + } + std::string InputFilePath = Inputs->at(0); + Command BaseCmd(Args); +@@ -411,7 +411,7 @@ int MinimizeCrashInput(const Vector<std::string> &Args, + bool Success = ExecuteCommand(Cmd, &CmdOutput); + if (Success) { + Printf("ERROR: the input %s did not crash\n", CurrentFilePath.c_str()); +- exit(1); ++ return 1; + } + Printf("CRASH_MIN: '%s' (%zd bytes) caused a crash. Will try to minimize " + "it further\n", +@@ -466,42 +466,51 @@ int MinimizeCrashInputInternalStep(Fuzzer *F, InputCorpus *Corpus) { + Printf("INFO: Starting MinimizeCrashInputInternalStep: %zd\n", U.size()); + if (U.size() < 2) { + Printf("INFO: The input is small enough, exiting\n"); +- exit(0); ++ return 0; + } + F->SetMaxInputLen(U.size()); + F->SetMaxMutationLen(U.size() - 1); + F->MinimizeCrashLoop(U); + Printf("INFO: Done MinimizeCrashInputInternalStep, no crashes found\n"); +- exit(0); + return 0; + } + +-void Merge(Fuzzer *F, FuzzingOptions &Options, const Vector<std::string> &Args, ++int Merge(Fuzzer *F, FuzzingOptions &Options, const Vector<std::string> &Args, + const Vector<std::string> &Corpora, const char *CFPathOrNull) { + if (Corpora.size() < 2) { + Printf("INFO: Merge requires two or more corpus dirs\n"); +- exit(0); ++ return 0; + } + + Vector<SizedFile> OldCorpus, NewCorpus; +- GetSizedFilesFromDir(Corpora[0], &OldCorpus); +- for (size_t i = 1; i < Corpora.size(); i++) +- GetSizedFilesFromDir(Corpora[i], &NewCorpus); ++ int Res = GetSizedFilesFromDir(Corpora[0], &OldCorpus); ++ if (Res != 0) ++ return Res; ++ for (size_t i = 1; i < Corpora.size(); i++) { ++ Res = GetSizedFilesFromDir(Corpora[i], &NewCorpus); ++ if (Res != 0) ++ return Res; ++ } + std::sort(OldCorpus.begin(), OldCorpus.end()); + std::sort(NewCorpus.begin(), NewCorpus.end()); + + std::string CFPath = CFPathOrNull ? CFPathOrNull : TempPath("Merge", ".txt"); + Vector<std::string> NewFiles; + Set<uint32_t> NewFeatures, NewCov; +- CrashResistantMerge(Args, OldCorpus, NewCorpus, &NewFiles, {}, &NewFeatures, ++ Res = CrashResistantMerge(Args, OldCorpus, NewCorpus, &NewFiles, {}, &NewFeatures, + {}, &NewCov, CFPath, true); ++ if (Res != 0) ++ return Res; ++ ++ if (F->isGracefulExitRequested()) ++ return 0; + for (auto &Path : NewFiles) + F->WriteToOutputCorpus(FileToVector(Path, Options.MaxLen)); + // We are done, delete the control file if it was a temporary one. + if (!Flags.merge_control_file) + RemoveFile(CFPath); + +- exit(0); ++ return 0; + } + + int AnalyzeDictionary(Fuzzer *F, const Vector<Unit>& Dict, +@@ -570,10 +579,9 @@ int AnalyzeDictionary(Fuzzer *F, const Vector<Unit>& Dict, + return 0; + } + +-Vector<std::string> ParseSeedInuts(const char *seed_inputs) { ++int ParseSeedInuts(const char *seed_inputs, Vector<std::string> &Files) { + // Parse -seed_inputs=file1,file2,... or -seed_inputs=@seed_inputs_file +- Vector<std::string> Files; +- if (!seed_inputs) return Files; ++ if (!seed_inputs) return 0; + std::string SeedInputs; + if (Flags.seed_inputs[0] == '@') + SeedInputs = FileToString(Flags.seed_inputs + 1); // File contains list. +@@ -581,7 +589,7 @@ Vector<std::string> ParseSeedInuts(const char *seed_inputs) { + SeedInputs = Flags.seed_inputs; // seed_inputs contains the list. + if (SeedInputs.empty()) { + Printf("seed_inputs is empty or @file does not exist.\n"); +- exit(1); ++ return 1; + } + // Parse SeedInputs. + size_t comma_pos = 0; +@@ -590,7 +598,7 @@ Vector<std::string> ParseSeedInuts(const char *seed_inputs) { + SeedInputs = SeedInputs.substr(0, comma_pos); + } + Files.push_back(SeedInputs); +- return Files; ++ return 0; + } + + static Vector<SizedFile> ReadCorpora(const Vector<std::string> &CorpusDirs, +@@ -624,7 +632,7 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) { + ProgName = new std::string(Args[0]); + if (Argv0 != *ProgName) { + Printf("ERROR: argv[0] has been modified in LLVMFuzzerInitialize\n"); +- exit(1); ++ return 1; + } + ParseFlags(Args, EF); + if (Flags.help) { +@@ -723,7 +731,7 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) { + if (!Options.FocusFunction.empty()) { + Printf("ERROR: The parameters `--entropic` and `--focus_function` cannot " + "be used together.\n"); +- exit(1); ++ return 1; + } + Printf("INFO: Running with entropic power schedule (0x%X, %d).\n", + Options.EntropicFeatureFrequencyThreshold, +@@ -809,22 +817,21 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) { + "*** executed the target code on a fixed set of inputs.\n" + "***\n"); + F->PrintFinalStats(); +- exit(0); ++ return 0; + } + + if (Flags.fork) +- FuzzWithFork(F->GetMD().GetRand(), Options, Args, *Inputs, Flags.fork); ++ return FuzzWithFork(F->GetMD().GetRand(), Options, Args, *Inputs, Flags.fork); + + if (Flags.merge) +- Merge(F, Options, Args, *Inputs, Flags.merge_control_file); ++ return Merge(F, Options, Args, *Inputs, Flags.merge_control_file); + + if (Flags.merge_inner) { + const size_t kDefaultMaxMergeLen = 1 << 20; + if (Options.MaxLen == 0) + F->SetMaxInputLen(kDefaultMaxMergeLen); + assert(Flags.merge_control_file); +- F->CrashResistantMergeInternalStep(Flags.merge_control_file); +- exit(0); ++ return F->CrashResistantMergeInternalStep(Flags.merge_control_file); + } + + if (Flags.analyze_dict) { +@@ -842,21 +849,31 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) { + } + if (AnalyzeDictionary(F, Dictionary, InitialCorpus)) { + Printf("Dictionary analysis failed\n"); +- exit(1); ++ return 1; + } + Printf("Dictionary analysis succeeded\n"); +- exit(0); ++ return 0; + } + +- auto CorporaFiles = ReadCorpora(*Inputs, ParseSeedInuts(Flags.seed_inputs)); +- F->Loop(CorporaFiles); ++ { ++ Vector<std::string> Files; ++ int Res = ParseSeedInuts(Flags.seed_inputs, Files); ++ if (Res != 0) ++ return Res; ++ auto CorporaFiles = ReadCorpora(*Inputs, Files); ++ Res = F->Loop(CorporaFiles); ++ if (Res != 0) ++ return Res; ++ if (F->isGracefulExitRequested()) ++ return 0; ++ } + + if (Flags.verbosity) + Printf("Done %zd runs in %zd second(s)\n", F->getTotalNumberOfRuns(), + F->secondsSinceProcessStartUp()); + F->PrintFinalStats(); + +- exit(0); // Don't let F destroy itself. ++ return 0; // Don't let F destroy itself. + } + + extern "C" ATTRIBUTE_INTERFACE int +diff --git a/tools/fuzzing/libfuzzer/FuzzerFork.cpp b/tools/fuzzing/libfuzzer/FuzzerFork.cpp +index d9e6b79443e0..ee2a99a250c1 100644 +--- a/tools/fuzzing/libfuzzer/FuzzerFork.cpp ++++ b/tools/fuzzing/libfuzzer/FuzzerFork.cpp +@@ -177,14 +177,16 @@ struct GlobalEnv { + return Job; + } + +- void RunOneMergeJob(FuzzJob *Job) { ++ int RunOneMergeJob(FuzzJob *Job) { + auto Stats = ParseFinalStatsFromLog(Job->LogPath); + NumRuns += Stats.number_of_executed_units; + + Vector<SizedFile> TempFiles, MergeCandidates; + // Read all newly created inputs and their feature sets. + // Choose only those inputs that have new features. +- GetSizedFilesFromDir(Job->CorpusDir, &TempFiles); ++ int Res = GetSizedFilesFromDir(Job->CorpusDir, &TempFiles); ++ if (Res != 0) ++ return Res; + std::sort(TempFiles.begin(), TempFiles.end()); + for (auto &F : TempFiles) { + auto FeatureFile = F.File; +@@ -207,12 +209,14 @@ struct GlobalEnv { + Stats.average_exec_per_sec, NumOOMs, NumTimeouts, NumCrashes, + secondsSinceProcessStartUp(), Job->JobId, Job->DftTimeInSeconds); + +- if (MergeCandidates.empty()) return; ++ if (MergeCandidates.empty()) return 0; + + Vector<std::string> FilesToAdd; + Set<uint32_t> NewFeatures, NewCov; + CrashResistantMerge(Args, {}, MergeCandidates, &FilesToAdd, Features, + &NewFeatures, Cov, &NewCov, Job->CFPath, false); ++ if (Fuzzer::isGracefulExitRequested()) ++ return 0; + for (auto &Path : FilesToAdd) { + auto U = FileToVector(Path); + auto NewPath = DirPlusFile(MainCorpusDir, Hash(U)); +@@ -226,7 +230,7 @@ struct GlobalEnv { + if (TPC.PcIsFuncEntry(TE)) + PrintPC(" NEW_FUNC: %p %F %L\n", "", + TPC.GetNextInstructionPc(TE->PC)); +- ++ return 0; + } + + +@@ -280,7 +284,7 @@ void WorkerThread(JobQueue *FuzzQ, JobQueue *MergeQ) { + } + + // This is just a skeleton of an experimental -fork=1 feature. +-void FuzzWithFork(Random &Rand, const FuzzingOptions &Options, ++int FuzzWithFork(Random &Rand, const FuzzingOptions &Options, + const Vector<std::string> &Args, + const Vector<std::string> &CorpusDirs, int NumJobs) { + Printf("INFO: -fork=%d: fuzzing in separate process(s)\n", NumJobs); +@@ -294,8 +298,12 @@ void FuzzWithFork(Random &Rand, const FuzzingOptions &Options, + Env.DataFlowBinary = Options.CollectDataFlow; + + Vector<SizedFile> SeedFiles; +- for (auto &Dir : CorpusDirs) +- GetSizedFilesFromDir(Dir, &SeedFiles); ++ int Res; ++ for (auto &Dir : CorpusDirs) { ++ Res = GetSizedFilesFromDir(Dir, &SeedFiles); ++ if (Res != 0) ++ return Res; ++ } + std::sort(SeedFiles.begin(), SeedFiles.end()); + Env.TempDir = TempPath("FuzzWithFork", ".dir"); + Env.DFTDir = DirPlusFile(Env.TempDir, "DFT"); +@@ -310,9 +318,14 @@ void FuzzWithFork(Random &Rand, const FuzzingOptions &Options, + Env.MainCorpusDir = CorpusDirs[0]; + + auto CFPath = DirPlusFile(Env.TempDir, "merge.txt"); +- CrashResistantMerge(Env.Args, {}, SeedFiles, &Env.Files, {}, &Env.Features, ++ Res = CrashResistantMerge(Env.Args, {}, SeedFiles, &Env.Files, {}, &Env.Features, + {}, &Env.Cov, + CFPath, false); ++ if (Res != 0) ++ return Res; ++ if (Fuzzer::isGracefulExitRequested()) ++ return 0; ++ + RemoveFile(CFPath); + Printf("INFO: -fork=%d: %zd seed inputs, starting to fuzz in %s\n", NumJobs, + Env.Files.size(), Env.TempDir.c_str()); +@@ -345,9 +358,14 @@ void FuzzWithFork(Random &Rand, const FuzzingOptions &Options, + StopJobs(); + break; + } +- Fuzzer::MaybeExitGracefully(); ++ if (Fuzzer::MaybeExitGracefully()) ++ return 0; + +- Env.RunOneMergeJob(Job.get()); ++ Res = Env.RunOneMergeJob(Job.get()); ++ if (Res != 0) ++ return Res; ++ if (Fuzzer::isGracefulExitRequested()) ++ return 0; + + // Continue if our crash is one of the ignorred ones. + if (Options.IgnoreTimeouts && ExitCode == Options.TimeoutExitCode) +@@ -403,7 +421,7 @@ void FuzzWithFork(Random &Rand, const FuzzingOptions &Options, + // Use the exit code from the last child process. + Printf("INFO: exiting: %d time: %zds\n", ExitCode, + Env.secondsSinceProcessStartUp()); +- exit(ExitCode); ++ return ExitCode; + } + + } // namespace fuzzer +diff --git a/tools/fuzzing/libfuzzer/FuzzerFork.h b/tools/fuzzing/libfuzzer/FuzzerFork.h +index b29a43e13fbc..1352171ad49d 100644 +--- a/tools/fuzzing/libfuzzer/FuzzerFork.h ++++ b/tools/fuzzing/libfuzzer/FuzzerFork.h +@@ -16,7 +16,7 @@ + #include <string> + + namespace fuzzer { +-void FuzzWithFork(Random &Rand, const FuzzingOptions &Options, ++int FuzzWithFork(Random &Rand, const FuzzingOptions &Options, + const Vector<std::string> &Args, + const Vector<std::string> &CorpusDirs, int NumJobs); + } // namespace fuzzer +diff --git a/tools/fuzzing/libfuzzer/FuzzerIO.cpp b/tools/fuzzing/libfuzzer/FuzzerIO.cpp +index 0053ef39f2b9..6be2be67c691 100644 +--- a/tools/fuzzing/libfuzzer/FuzzerIO.cpp ++++ b/tools/fuzzing/libfuzzer/FuzzerIO.cpp +@@ -82,7 +82,9 @@ void ReadDirToVectorOfUnits(const char *Path, Vector<Unit> *V, + long *Epoch, size_t MaxSize, bool ExitOnError) { + long E = Epoch ? *Epoch : 0; + Vector<std::string> Files; +- ListFilesInDirRecursive(Path, Epoch, &Files, /*TopDir*/true); ++ int Res = ListFilesInDirRecursive(Path, Epoch, &Files, /*TopDir*/true); ++ if (ExitOnError && Res != 0) ++ exit(Res); + size_t NumLoaded = 0; + for (size_t i = 0; i < Files.size(); i++) { + auto &X = Files[i]; +@@ -97,12 +99,15 @@ void ReadDirToVectorOfUnits(const char *Path, Vector<Unit> *V, + } + + +-void GetSizedFilesFromDir(const std::string &Dir, Vector<SizedFile> *V) { ++int GetSizedFilesFromDir(const std::string &Dir, Vector<SizedFile> *V) { + Vector<std::string> Files; +- ListFilesInDirRecursive(Dir, 0, &Files, /*TopDir*/true); ++ int Res = ListFilesInDirRecursive(Dir, 0, &Files, /*TopDir*/true); ++ if (Res != 0) ++ return Res; + for (auto &File : Files) + if (size_t Size = FileSize(File)) + V->push_back({File, Size}); ++ return 0; + } + + std::string DirPlusFile(const std::string &DirPath, +diff --git a/tools/fuzzing/libfuzzer/FuzzerIO.h b/tools/fuzzing/libfuzzer/FuzzerIO.h +index 6e4368b971fa..6c90ba637322 100644 +--- a/tools/fuzzing/libfuzzer/FuzzerIO.h ++++ b/tools/fuzzing/libfuzzer/FuzzerIO.h +@@ -60,7 +60,7 @@ void RawPrint(const char *Str); + bool IsFile(const std::string &Path); + size_t FileSize(const std::string &Path); + +-void ListFilesInDirRecursive(const std::string &Dir, long *Epoch, ++int ListFilesInDirRecursive(const std::string &Dir, long *Epoch, + Vector<std::string> *V, bool TopDir); + + void RmDirRecursive(const std::string &Dir); +@@ -79,7 +79,7 @@ struct SizedFile { + bool operator<(const SizedFile &B) const { return Size < B.Size; } + }; + +-void GetSizedFilesFromDir(const std::string &Dir, Vector<SizedFile> *V); ++int GetSizedFilesFromDir(const std::string &Dir, Vector<SizedFile> *V); + + char GetSeparator(); + // Similar to the basename utility: returns the file name w/o the dir prefix. +diff --git a/tools/fuzzing/libfuzzer/FuzzerIOPosix.cpp b/tools/fuzzing/libfuzzer/FuzzerIOPosix.cpp +index 4b453d286c80..1a50295c010f 100644 +--- a/tools/fuzzing/libfuzzer/FuzzerIOPosix.cpp ++++ b/tools/fuzzing/libfuzzer/FuzzerIOPosix.cpp +@@ -53,16 +53,16 @@ std::string Basename(const std::string &Path) { + return Path.substr(Pos + 1); + } + +-void ListFilesInDirRecursive(const std::string &Dir, long *Epoch, ++int ListFilesInDirRecursive(const std::string &Dir, long *Epoch, + Vector<std::string> *V, bool TopDir) { + auto E = GetEpoch(Dir); + if (Epoch) +- if (E && *Epoch >= E) return; ++ if (E && *Epoch >= E) return 0; + + DIR *D = opendir(Dir.c_str()); + if (!D) { + Printf("%s: %s; exiting\n", strerror(errno), Dir.c_str()); +- exit(1); ++ return 1; + } + while (auto E = readdir(D)) { + std::string Path = DirPlusFile(Dir, E->d_name); +@@ -71,12 +71,16 @@ void ListFilesInDirRecursive(const std::string &Dir, long *Epoch, + V->push_back(Path); + else if ((E->d_type == DT_DIR || + (E->d_type == DT_UNKNOWN && IsDirectory(Path))) && +- *E->d_name != '.') +- ListFilesInDirRecursive(Path, Epoch, V, false); ++ *E->d_name != '.') { ++ int Res = ListFilesInDirRecursive(Path, Epoch, V, false); ++ if (Res != 0) ++ return Res; ++ } + } + closedir(D); + if (Epoch && TopDir) + *Epoch = E; ++ return 0; + } + + +diff --git a/tools/fuzzing/libfuzzer/FuzzerIOWindows.cpp b/tools/fuzzing/libfuzzer/FuzzerIOWindows.cpp +index 651283a551cf..0e977bd02557 100644 +--- a/tools/fuzzing/libfuzzer/FuzzerIOWindows.cpp ++++ b/tools/fuzzing/libfuzzer/FuzzerIOWindows.cpp +@@ -98,11 +98,12 @@ size_t FileSize(const std::string &Path) { + return size.QuadPart; + } + +-void ListFilesInDirRecursive(const std::string &Dir, long *Epoch, ++int ListFilesInDirRecursive(const std::string &Dir, long *Epoch, + Vector<std::string> *V, bool TopDir) { ++ int Res; + auto E = GetEpoch(Dir); + if (Epoch) +- if (E && *Epoch >= E) return; ++ if (E && *Epoch >= E) return 0; + + std::string Path(Dir); + assert(!Path.empty()); +@@ -116,9 +117,9 @@ void ListFilesInDirRecursive(const std::string &Dir, long *Epoch, + if (FindHandle == INVALID_HANDLE_VALUE) + { + if (GetLastError() == ERROR_FILE_NOT_FOUND) +- return; ++ return 0; + Printf("No such file or directory: %s; exiting\n", Dir.c_str()); +- exit(1); ++ return 1; + } + + do { +@@ -131,7 +132,9 @@ void ListFilesInDirRecursive(const std::string &Dir, long *Epoch, + FindInfo.cFileName[1] == '.')) + continue; + +- ListFilesInDirRecursive(FileName, Epoch, V, false); ++ int Res = ListFilesInDirRecursive(FileName, Epoch, V, false); ++ if (Res != 0) ++ return Res; + } + else if (IsFile(FileName, FindInfo.dwFileAttributes)) + V->push_back(FileName); +@@ -145,6 +148,7 @@ void ListFilesInDirRecursive(const std::string &Dir, long *Epoch, + + if (Epoch && TopDir) + *Epoch = E; ++ return 0; + } + + +diff --git a/tools/fuzzing/libfuzzer/FuzzerInternal.h b/tools/fuzzing/libfuzzer/FuzzerInternal.h +index 1f7d671ed848..cc2650b58ef1 100644 +--- a/tools/fuzzing/libfuzzer/FuzzerInternal.h ++++ b/tools/fuzzing/libfuzzer/FuzzerInternal.h +@@ -35,8 +35,8 @@ public: + Fuzzer(UserCallback CB, InputCorpus &Corpus, MutationDispatcher &MD, + FuzzingOptions Options); + ~Fuzzer(); +- void Loop(Vector<SizedFile> &CorporaFiles); +- void ReadAndExecuteSeedCorpora(Vector<SizedFile> &CorporaFiles); ++ int Loop(Vector<SizedFile> &CorporaFiles); ++ int ReadAndExecuteSeedCorpora(Vector<SizedFile> &CorporaFiles); + void MinimizeCrashLoop(const Unit &U); + void RereadOutputCorpus(size_t MaxSize); + +@@ -65,13 +65,16 @@ public: + static void StaticFileSizeExceedCallback(); + static void StaticGracefulExitCallback(); + ++ static void GracefullyExit(); ++ static bool isGracefulExitRequested(); ++ + int ExecuteCallback(const uint8_t *Data, size_t Size); + bool RunOne(const uint8_t *Data, size_t Size, bool MayDeleteFile = false, + InputInfo *II = nullptr, bool *FoundUniqFeatures = nullptr); + + // Merge Corpora[1:] into Corpora[0]. + void Merge(const Vector<std::string> &Corpora); +- void CrashResistantMergeInternalStep(const std::string &ControlFilePath); ++ int CrashResistantMergeInternalStep(const std::string &ControlFilePath); + MutationDispatcher &GetMD() { return MD; } + void PrintFinalStats(); + void SetMaxInputLen(size_t MaxInputLen); +@@ -84,7 +87,7 @@ public: + bool DuringInitialCorpusExecution); + + void HandleMalloc(size_t Size); +- static void MaybeExitGracefully(); ++ static bool MaybeExitGracefully(); + std::string WriteToOutputCorpus(const Unit &U); + + private: +@@ -93,7 +96,7 @@ private: + void ExitCallback(); + void CrashOnOverwrittenData(); + void InterruptCallback(); +- void MutateAndTestOne(); ++ bool MutateAndTestOne(); + void PurgeAllocator(); + void ReportNewCoverage(InputInfo *II, const Unit &U); + void PrintPulseAndReportSlowInput(const uint8_t *Data, size_t Size); +diff --git a/tools/fuzzing/libfuzzer/FuzzerLoop.cpp b/tools/fuzzing/libfuzzer/FuzzerLoop.cpp +index 4c4e8c271b1f..e7dfc187dbfe 100644 +--- a/tools/fuzzing/libfuzzer/FuzzerLoop.cpp ++++ b/tools/fuzzing/libfuzzer/FuzzerLoop.cpp +@@ -254,12 +254,20 @@ void Fuzzer::ExitCallback() { + _Exit(Options.ErrorExitCode); + } + +-void Fuzzer::MaybeExitGracefully() { +- if (!F->GracefulExitRequested) return; ++bool Fuzzer::MaybeExitGracefully() { ++ if (!F->GracefulExitRequested) return false; + Printf("==%lu== INFO: libFuzzer: exiting as requested\n", GetPid()); + RmDirRecursive(TempPath("FuzzWithFork", ".dir")); + F->PrintFinalStats(); +- _Exit(0); ++ return true; ++} ++ ++void Fuzzer::GracefullyExit() { ++ F->GracefulExitRequested = true; ++} ++ ++bool Fuzzer::isGracefulExitRequested() { ++ return F->GracefulExitRequested; + } + + void Fuzzer::InterruptCallback() { +@@ -663,7 +671,7 @@ void Fuzzer::TryDetectingAMemoryLeak(const uint8_t *Data, size_t Size, + } + } + +-void Fuzzer::MutateAndTestOne() { ++bool Fuzzer::MutateAndTestOne() { + MD.StartMutationSequence(); + + auto &II = Corpus.ChooseUnitToMutate(MD.GetRand()); +@@ -685,7 +693,7 @@ void Fuzzer::MutateAndTestOne() { + for (int i = 0; i < Options.MutateDepth; i++) { + if (TotalNumberOfRuns >= Options.MaxNumberOfRuns) + break; +- MaybeExitGracefully(); ++ if (MaybeExitGracefully()) return true; + size_t NewSize = 0; + if (II.HasFocusFunction && !II.DataFlowTraceForFocusFunction.empty() && + Size <= CurrentMaxMutationLen) +@@ -719,6 +727,7 @@ void Fuzzer::MutateAndTestOne() { + } + + II.NeedsEnergyUpdate = true; ++ return false; + } + + void Fuzzer::PurgeAllocator() { +@@ -736,7 +745,7 @@ void Fuzzer::PurgeAllocator() { + LastAllocatorPurgeAttemptTime = system_clock::now(); + } + +-void Fuzzer::ReadAndExecuteSeedCorpora(Vector<SizedFile> &CorporaFiles) { ++int Fuzzer::ReadAndExecuteSeedCorpora(Vector<SizedFile> &CorporaFiles) { + const size_t kMaxSaneLen = 1 << 20; + const size_t kMinDefaultLen = 4096; + size_t MaxSize = 0; +@@ -795,16 +804,23 @@ void Fuzzer::ReadAndExecuteSeedCorpora(Vector<SizedFile> &CorporaFiles) { + if (Corpus.empty() && Options.MaxNumberOfRuns) { + Printf("ERROR: no interesting inputs were found. " + "Is the code instrumented for coverage? Exiting.\n"); +- exit(1); ++ return 1; + } ++ return 0; + } + +-void Fuzzer::Loop(Vector<SizedFile> &CorporaFiles) { ++int Fuzzer::Loop(Vector<SizedFile> &CorporaFiles) { + auto FocusFunctionOrAuto = Options.FocusFunction; +- DFT.Init(Options.DataFlowTrace, &FocusFunctionOrAuto, CorporaFiles, ++ int Res = DFT.Init(Options.DataFlowTrace, &FocusFunctionOrAuto, CorporaFiles, + MD.GetRand()); +- TPC.SetFocusFunction(FocusFunctionOrAuto); +- ReadAndExecuteSeedCorpora(CorporaFiles); ++ if (Res != 0) ++ return Res; ++ Res = TPC.SetFocusFunction(FocusFunctionOrAuto); ++ if (Res != 0) ++ return Res; ++ Res = ReadAndExecuteSeedCorpora(CorporaFiles); ++ if (Res != 0) ++ return Res; + DFT.Clear(); // No need for DFT any more. + TPC.SetPrintNewPCs(Options.PrintNewCovPcs); + TPC.SetPrintNewFuncs(Options.PrintNewCovFuncs); +@@ -842,13 +858,15 @@ void Fuzzer::Loop(Vector<SizedFile> &CorporaFiles) { + } + + // Perform several mutations and runs. +- MutateAndTestOne(); ++ if (MutateAndTestOne()) ++ return 0; + + PurgeAllocator(); + } + + PrintStats("DONE ", "\n"); + MD.PrintRecommendedDictionary(); ++ return 0; + } + + void Fuzzer::MinimizeCrashLoop(const Unit &U) { +diff --git a/tools/fuzzing/libfuzzer/FuzzerMerge.cpp b/tools/fuzzing/libfuzzer/FuzzerMerge.cpp +index 919eea848580..0a185c7325bb 100644 +--- a/tools/fuzzing/libfuzzer/FuzzerMerge.cpp ++++ b/tools/fuzzing/libfuzzer/FuzzerMerge.cpp +@@ -28,11 +28,12 @@ bool Merger::Parse(const std::string &Str, bool ParseCoverage) { + return Parse(SS, ParseCoverage); + } + +-void Merger::ParseOrExit(std::istream &IS, bool ParseCoverage) { ++int Merger::ParseOrExit(std::istream &IS, bool ParseCoverage) { + if (!Parse(IS, ParseCoverage)) { + Printf("MERGE: failed to parse the control file (unexpected error)\n"); +- exit(1); ++ return 1; + } ++ return 0; + } + + // The control file example: +@@ -194,11 +195,13 @@ Set<uint32_t> Merger::AllFeatures() const { + } + + // Inner process. May crash if the target crashes. +-void Fuzzer::CrashResistantMergeInternalStep(const std::string &CFPath) { ++int Fuzzer::CrashResistantMergeInternalStep(const std::string &CFPath) { + Printf("MERGE-INNER: using the control file '%s'\n", CFPath.c_str()); + Merger M; + std::ifstream IF(CFPath); +- M.ParseOrExit(IF, false); ++ int Res = M.ParseOrExit(IF, false); ++ if (Res != 0) ++ return Res; + IF.close(); + if (!M.LastFailure.empty()) + Printf("MERGE-INNER: '%s' caused a failure at the previous merge step\n", +@@ -216,7 +219,8 @@ void Fuzzer::CrashResistantMergeInternalStep(const std::string &CFPath) { + }; + Set<const TracePC::PCTableEntry *> AllPCs; + for (size_t i = M.FirstNotProcessedFile; i < M.Files.size(); i++) { +- Fuzzer::MaybeExitGracefully(); ++ if (Fuzzer::MaybeExitGracefully()) ++ return 0; + auto U = FileToVector(M.Files[i].Name); + if (U.size() > MaxInputLen) { + U.resize(MaxInputLen); +@@ -261,12 +265,14 @@ void Fuzzer::CrashResistantMergeInternalStep(const std::string &CFPath) { + OF.flush(); + } + PrintStatsWrapper("DONE "); ++ return 0; + } + +-static size_t WriteNewControlFile(const std::string &CFPath, ++static int WriteNewControlFile(const std::string &CFPath, + const Vector<SizedFile> &OldCorpus, + const Vector<SizedFile> &NewCorpus, +- const Vector<MergeFileInfo> &KnownFiles) { ++ const Vector<MergeFileInfo> &KnownFiles, ++ size_t &NumFiles) { + std::unordered_set<std::string> FilesToSkip; + for (auto &SF: KnownFiles) + FilesToSkip.insert(SF.Name); +@@ -292,14 +298,15 @@ static size_t WriteNewControlFile(const std::string &CFPath, + if (!ControlFile) { + Printf("MERGE-OUTER: failed to write to the control file: %s\n", + CFPath.c_str()); +- exit(1); ++ return 1; + } + +- return FilesToUse.size(); ++ NumFiles = FilesToUse.size(); ++ return 0; + } + + // Outer process. Does not call the target code and thus should not fail. +-void CrashResistantMerge(const Vector<std::string> &Args, ++int CrashResistantMerge(const Vector<std::string> &Args, + const Vector<SizedFile> &OldCorpus, + const Vector<SizedFile> &NewCorpus, + Vector<std::string> *NewFiles, +@@ -309,8 +316,9 @@ void CrashResistantMerge(const Vector<std::string> &Args, + Set<uint32_t> *NewCov, + const std::string &CFPath, + bool V /*Verbose*/) { +- if (NewCorpus.empty() && OldCorpus.empty()) return; // Nothing to merge. ++ if (NewCorpus.empty() && OldCorpus.empty()) return 0; // Nothing to merge. + size_t NumAttempts = 0; ++ int Res; + Vector<MergeFileInfo> KnownFiles; + if (FileSize(CFPath)) { + VPrintf(V, "MERGE-OUTER: non-empty control file provided: '%s'\n", +@@ -331,7 +339,8 @@ void CrashResistantMerge(const Vector<std::string> &Args, + VPrintf( + V, + "MERGE-OUTER: nothing to do, merge has been completed before\n"); +- exit(0); ++ Fuzzer::GracefullyExit(); ++ return 0; + } + + // Number of input files likely changed, start merge from scratch, but +@@ -356,7 +365,9 @@ void CrashResistantMerge(const Vector<std::string> &Args, + "%zd files, %zd in the initial corpus, %zd processed earlier\n", + OldCorpus.size() + NewCorpus.size(), OldCorpus.size(), + KnownFiles.size()); +- NumAttempts = WriteNewControlFile(CFPath, OldCorpus, NewCorpus, KnownFiles); ++ Res = WriteNewControlFile(CFPath, OldCorpus, NewCorpus, KnownFiles, NumAttempts); ++ if (Res != 0) ++ return Res; + } + + // Execute the inner process until it passes. +@@ -366,7 +377,8 @@ void CrashResistantMerge(const Vector<std::string> &Args, + BaseCmd.removeFlag("fork"); + BaseCmd.removeFlag("collect_data_flow"); + for (size_t Attempt = 1; Attempt <= NumAttempts; Attempt++) { +- Fuzzer::MaybeExitGracefully(); ++ if (Fuzzer::MaybeExitGracefully()) ++ return 0; + VPrintf(V, "MERGE-OUTER: attempt %zd\n", Attempt); + Command Cmd(BaseCmd); + Cmd.addFlag("merge_control_file", CFPath); +@@ -388,7 +400,9 @@ void CrashResistantMerge(const Vector<std::string> &Args, + VPrintf(V, "MERGE-OUTER: the control file has %zd bytes\n", + (size_t)IF.tellg()); + IF.seekg(0, IF.beg); +- M.ParseOrExit(IF, true); ++ Res = M.ParseOrExit(IF, true); ++ if (Res != 0) ++ return Res; + IF.close(); + VPrintf(V, + "MERGE-OUTER: consumed %zdMb (%zdMb rss) to parse the control file\n", +@@ -399,6 +413,7 @@ void CrashResistantMerge(const Vector<std::string> &Args, + VPrintf(V, "MERGE-OUTER: %zd new files with %zd new features added; " + "%zd new coverage edges\n", + NewFiles->size(), NewFeatures->size(), NewCov->size()); ++ return 0; + } + + } // namespace fuzzer +diff --git a/tools/fuzzing/libfuzzer/FuzzerMerge.h b/tools/fuzzing/libfuzzer/FuzzerMerge.h +index e0c6bc539bdb..6dc1c4c45abf 100644 +--- a/tools/fuzzing/libfuzzer/FuzzerMerge.h ++++ b/tools/fuzzing/libfuzzer/FuzzerMerge.h +@@ -63,7 +63,7 @@ struct Merger { + + bool Parse(std::istream &IS, bool ParseCoverage); + bool Parse(const std::string &Str, bool ParseCoverage); +- void ParseOrExit(std::istream &IS, bool ParseCoverage); ++ int ParseOrExit(std::istream &IS, bool ParseCoverage); + size_t Merge(const Set<uint32_t> &InitialFeatures, Set<uint32_t> *NewFeatures, + const Set<uint32_t> &InitialCov, Set<uint32_t> *NewCov, + Vector<std::string> *NewFiles); +@@ -71,7 +71,7 @@ struct Merger { + Set<uint32_t> AllFeatures() const; + }; + +-void CrashResistantMerge(const Vector<std::string> &Args, ++int CrashResistantMerge(const Vector<std::string> &Args, + const Vector<SizedFile> &OldCorpus, + const Vector<SizedFile> &NewCorpus, + Vector<std::string> *NewFiles, +diff --git a/tools/fuzzing/libfuzzer/FuzzerTracePC.cpp b/tools/fuzzing/libfuzzer/FuzzerTracePC.cpp +index b2ca7693e540..fbceda39bc22 100644 +--- a/tools/fuzzing/libfuzzer/FuzzerTracePC.cpp ++++ b/tools/fuzzing/libfuzzer/FuzzerTracePC.cpp +@@ -238,13 +238,13 @@ void TracePC::IterateCoveredFunctions(CallBack CB) { + } + } + +-void TracePC::SetFocusFunction(const std::string &FuncName) { ++int TracePC::SetFocusFunction(const std::string &FuncName) { + // This function should be called once. + assert(!FocusFunctionCounterPtr); + // "auto" is not a valid function name. If this function is called with "auto" + // that means the auto focus functionality failed. + if (FuncName.empty() || FuncName == "auto") +- return; ++ return 0; + for (size_t M = 0; M < NumModules; M++) { + auto &PCTE = ModulePCTable[M]; + size_t N = PCTE.Stop - PCTE.Start; +@@ -256,13 +256,13 @@ void TracePC::SetFocusFunction(const std::string &FuncName) { + if (FuncName != Name) continue; + Printf("INFO: Focus function is set to '%s'\n", Name.c_str()); + FocusFunctionCounterPtr = Modules[M].Start() + I; +- return; ++ return 0; + } + } + + Printf("ERROR: Failed to set focus function. Make sure the function name is " + "valid (%s) and symbolization is enabled.\n", FuncName.c_str()); +- exit(1); ++ return 1; + } + + bool TracePC::ObservedFocusFunction() { +diff --git a/tools/fuzzing/libfuzzer/FuzzerTracePC.h b/tools/fuzzing/libfuzzer/FuzzerTracePC.h +index 501f3b544971..b46ebb909dbf 100644 +--- a/tools/fuzzing/libfuzzer/FuzzerTracePC.h ++++ b/tools/fuzzing/libfuzzer/FuzzerTracePC.h +@@ -116,7 +116,7 @@ class TracePC { + CB(PC); + } + +- void SetFocusFunction(const std::string &FuncName); ++ int SetFocusFunction(const std::string &FuncName); + bool ObservedFocusFunction(); + + struct PCTableEntry { diff --git a/tools/fuzzing/messagemanager/MessageManagerFuzzer.cpp b/tools/fuzzing/messagemanager/MessageManagerFuzzer.cpp new file mode 100644 index 0000000000..6a4475641f --- /dev/null +++ b/tools/fuzzing/messagemanager/MessageManagerFuzzer.cpp @@ -0,0 +1,327 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include <climits> +#include <cmath> +#include "FuzzingTraits.h" +#include "jsapi.h" +#include "jsfriendapi.h" +#include "js/CharacterEncoding.h" +#include "js/Exception.h" +#include "js/PropertyAndElement.h" // JS_Enumerate, JS_GetProperty, JS_GetPropertyById, JS_SetProperty, JS_SetPropertyById +#include "prenv.h" +#include "MessageManagerFuzzer.h" +#include "mozilla/ErrorResult.h" +#include "nsComponentManagerUtils.h" +#include "nsDebug.h" +#include "nsError.h" +#include "nsFrameMessageManager.h" +#include "nsJSUtils.h" +#include "nsXULAppAPI.h" +#include "nsNetCID.h" +#include "nsString.h" +#include "nsUnicharUtils.h" +#include "nsIFile.h" +#include "nsIFileStreams.h" +#include "nsILineInputStream.h" +#include "nsLocalFile.h" +#include "nsTArray.h" + +#ifdef IsLoggingEnabled +// This is defined in the Windows SDK urlmon.h +# undef IsLoggingEnabled +#endif + +#define MESSAGEMANAGER_FUZZER_DEFAULT_MUTATION_PROBABILITY 2 +#define MSGMGR_FUZZER_LOG(fmt, args...) \ + if (MessageManagerFuzzer::IsLoggingEnabled()) { \ + printf_stderr("[MessageManagerFuzzer] " fmt "\n", ##args); \ + } + +namespace mozilla { +namespace dom { + +using namespace fuzzing; +using namespace ipc; + +/* static */ +void MessageManagerFuzzer::ReadFile(const char* path, + nsTArray<nsCString>& aArray) { + nsCOMPtr<nsIFile> file; + nsresult rv = + NS_NewLocalFile(NS_ConvertUTF8toUTF16(path), true, getter_AddRefs(file)); + NS_ENSURE_SUCCESS_VOID(rv); + + bool exists = false; + rv = file->Exists(&exists); + if (NS_FAILED(rv) || !exists) { + return; + } + + nsCOMPtr<nsIFileInputStream> fileStream( + do_CreateInstance(NS_LOCALFILEINPUTSTREAM_CONTRACTID, &rv)); + NS_ENSURE_SUCCESS_VOID(rv); + + rv = fileStream->Init(file, -1, -1, false); + NS_ENSURE_SUCCESS_VOID(rv); + + nsCOMPtr<nsILineInputStream> lineStream(do_QueryInterface(fileStream, &rv)); + NS_ENSURE_SUCCESS_VOID(rv); + + nsAutoCString line; + bool more = true; + do { + rv = lineStream->ReadLine(line, &more); + NS_ENSURE_SUCCESS_VOID(rv); + aArray.AppendElement(line); + } while (more); +} + +/* static */ +bool MessageManagerFuzzer::IsMessageNameBlacklisted( + const nsAString& aMessageName) { + static bool sFileLoaded = false; + static nsTArray<nsCString> valuesInFile; + + if (!sFileLoaded) { + ReadFile(PR_GetEnv("MESSAGEMANAGER_FUZZER_BLACKLIST"), valuesInFile); + sFileLoaded = true; + } + + if (valuesInFile.Length() == 0) { + return false; + } + + return valuesInFile.Contains(NS_ConvertUTF16toUTF8(aMessageName).get()); +} + +/* static */ +nsCString MessageManagerFuzzer::GetFuzzValueFromFile() { + static bool sFileLoaded = false; + static nsTArray<nsCString> valuesInFile; + + if (!sFileLoaded) { + ReadFile(PR_GetEnv("MESSAGEMANAGER_FUZZER_STRINGSFILE"), valuesInFile); + sFileLoaded = true; + } + + // If something goes wrong with importing the file we return an empty string. + if (valuesInFile.Length() == 0) { + return nsCString(); + } + + unsigned randIdx = RandomIntegerRange<unsigned>(0, valuesInFile.Length()); + return valuesInFile.ElementAt(randIdx); +} + +/* static */ +void MessageManagerFuzzer::MutateObject(JSContext* aCx, + JS::Handle<JS::Value> aValue, + unsigned short int aRecursionCounter) { + JS::Rooted<JSObject*> object(aCx, &aValue.toObject()); + JS::Rooted<JS::IdVector> ids(aCx, JS::IdVector(aCx)); + + if (!JS_Enumerate(aCx, object, &ids)) { + return; + } + + for (size_t i = 0, n = ids.length(); i < n; i++) { + // Retrieve Property name. + nsAutoJSString propName; + if (!propName.init(aCx, ids[i])) { + continue; + } + MSGMGR_FUZZER_LOG("%*s- Property: %s", aRecursionCounter * 4, "", + NS_ConvertUTF16toUTF8(propName).get()); + + // The likelihood when a value gets fuzzed of this object. + if (!FuzzingTraits::Sometimes(DefaultMutationProbability())) { + continue; + } + + // Retrieve Property value. + JS::Rooted<JS::Value> propertyValue(aCx); + JS_GetPropertyById(aCx, object, ids[i], &propertyValue); + + JS::Rooted<JS::Value> newPropValue(aCx); + MutateValue(aCx, propertyValue, &newPropValue, aRecursionCounter); + + JS_SetPropertyById(aCx, object, ids[i], newPropValue); + } +} + +/* static */ +bool MessageManagerFuzzer::MutateValue( + JSContext* aCx, JS::Handle<JS::Value> aValue, + JS::MutableHandle<JS::Value> aOutMutationValue, + unsigned short int aRecursionCounter) { + if (aValue.isInt32()) { + if (FuzzingTraits::Sometimes(DefaultMutationProbability() * 2)) { + aOutMutationValue.set(JS::Int32Value(RandomNumericLimit<int>())); + } else { + aOutMutationValue.set(JS::Int32Value(RandomInteger<int>())); + } + MSGMGR_FUZZER_LOG("%*s! Mutated value of type |int32|: '%d' to '%d'", + aRecursionCounter * 4, "", aValue.toInt32(), + aOutMutationValue.toInt32()); + return true; + } + + if (aValue.isDouble()) { + aOutMutationValue.set(JS::DoubleValue(RandomFloatingPoint<double>())); + MSGMGR_FUZZER_LOG("%*s! Mutated value of type |double|: '%f' to '%f'", + aRecursionCounter * 4, "", aValue.toDouble(), + aOutMutationValue.toDouble()); + return true; + } + + if (aValue.isBoolean()) { + aOutMutationValue.set(JS::BooleanValue(bool(RandomIntegerRange(0, 2)))); + MSGMGR_FUZZER_LOG("%*s! Mutated value of type |boolean|: '%d' to '%d'", + aRecursionCounter * 4, "", aValue.toBoolean(), + aOutMutationValue.toBoolean()); + return true; + } + + if (aValue.isString()) { + nsCString x = GetFuzzValueFromFile(); + if (x.IsEmpty()) { + return false; + } + JSString* str = JS_NewStringCopyZ(aCx, x.get()); + aOutMutationValue.set(JS::StringValue(str)); + JS::Rooted<JSString*> rootedValue(aCx, aValue.toString()); + JS::UniqueChars valueChars = JS_EncodeStringToUTF8(aCx, rootedValue); + MSGMGR_FUZZER_LOG("%*s! Mutated value of type |string|: '%s' to '%s'", + aRecursionCounter * 4, "", valueChars.get(), x.get()); + return true; + } + + if (aValue.isObject()) { + aRecursionCounter++; + MSGMGR_FUZZER_LOG("%*s<Enumerating found object>", aRecursionCounter * 4, + ""); + MutateObject(aCx, aValue, aRecursionCounter); + aOutMutationValue.set(aValue); + return true; + } + + return false; +} + +/* static */ +bool MessageManagerFuzzer::Mutate(JSContext* aCx, const nsAString& aMessageName, + ipc::StructuredCloneData* aData, + const JS::Value& aTransfer) { + MSGMGR_FUZZER_LOG("Message: %s in process: %d", + NS_ConvertUTF16toUTF8(aMessageName).get(), + XRE_GetProcessType()); + + unsigned short int aRecursionCounter = 0; + ErrorResult rv; + JS::Rooted<JS::Value> t(aCx, aTransfer); + + /* Read original StructuredCloneData. */ + JS::Rooted<JS::Value> scdContent(aCx); + aData->Read(aCx, &scdContent, rv); + if (NS_WARN_IF(rv.Failed())) { + rv.SuppressException(); + JS_ClearPendingException(aCx); + return false; + } + + JS::Rooted<JS::Value> scdMutationContent(aCx); + bool isMutated = + MutateValue(aCx, scdContent, &scdMutationContent, aRecursionCounter); + + /* Write mutated StructuredCloneData. */ + ipc::StructuredCloneData mutatedStructuredCloneData; + mutatedStructuredCloneData.Write(aCx, scdMutationContent, t, + JS::CloneDataPolicy(), rv); + if (NS_WARN_IF(rv.Failed())) { + rv.SuppressException(); + JS_ClearPendingException(aCx); + return false; + } + + // See: https://bugzilla.mozilla.org/show_bug.cgi?id=1346040 + aData->Copy(mutatedStructuredCloneData); + + /* Mutated and successfully written to StructuredCloneData object. */ + if (isMutated) { + JS::Rooted<JSString*> str(aCx, JS_ValueToSource(aCx, scdMutationContent)); + JS::UniqueChars strChars = JS_EncodeStringToUTF8(aCx, str); + MSGMGR_FUZZER_LOG("Mutated '%s' Message: %s", + NS_ConvertUTF16toUTF8(aMessageName).get(), + strChars.get()); + } + + return true; +} + +/* static */ +unsigned int MessageManagerFuzzer::DefaultMutationProbability() { + static unsigned long sPropValue = + MESSAGEMANAGER_FUZZER_DEFAULT_MUTATION_PROBABILITY; + static bool sInitialized = false; + + if (sInitialized) { + return sPropValue; + } + sInitialized = true; + + // Defines the likelihood of fuzzing a message. + const char* probability = + PR_GetEnv("MESSAGEMANAGER_FUZZER_MUTATION_PROBABILITY"); + if (probability) { + long n = std::strtol(probability, nullptr, 10); + if (n != 0) { + sPropValue = n; + return sPropValue; + } + } + + return sPropValue; +} + +/* static */ +bool MessageManagerFuzzer::IsLoggingEnabled() { + static bool sInitialized = false; + static bool sIsLoggingEnabled = false; + + if (!sInitialized) { + sIsLoggingEnabled = !!PR_GetEnv("MESSAGEMANAGER_FUZZER_ENABLE_LOGGING"); + sInitialized = true; + } + + return sIsLoggingEnabled; +} + +/* static */ +bool MessageManagerFuzzer::IsEnabled() { + return !!PR_GetEnv("MESSAGEMANAGER_FUZZER_ENABLE") && XRE_IsContentProcess(); +} + +/* static */ +void MessageManagerFuzzer::TryMutate(JSContext* aCx, + const nsAString& aMessageName, + ipc::StructuredCloneData* aData, + const JS::Value& aTransfer) { + if (!IsEnabled()) { + return; + } + + if (IsMessageNameBlacklisted(aMessageName)) { + MSGMGR_FUZZER_LOG("Blacklisted message: %s", + NS_ConvertUTF16toUTF8(aMessageName).get()); + return; + } + + Mutate(aCx, aMessageName, aData, aTransfer); +} + +} // namespace dom +} // namespace mozilla diff --git a/tools/fuzzing/messagemanager/MessageManagerFuzzer.h b/tools/fuzzing/messagemanager/MessageManagerFuzzer.h new file mode 100644 index 0000000000..fd7835055a --- /dev/null +++ b/tools/fuzzing/messagemanager/MessageManagerFuzzer.h @@ -0,0 +1,63 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef mozilla_dom_MessageManagerFuzzer_h__ +#define mozilla_dom_MessageManagerFuzzer_h__ + +#include "jspubtd.h" +#include "nsAString.h" +#include "nsTArray.h" + +namespace mozilla { +namespace dom { + +namespace ipc { +class StructuredCloneData; +} + +/* +Exposed environment variables: +MESSAGEMANAGER_FUZZER_ENABLE=1 +MESSAGEMANAGER_FUZZER_ENABLE_LOGGING=1 (optional) +MESSAGEMANAGER_FUZZER_MUTATION_PROBABILITY=2 (optional) +MESSAGEMANAGER_FUZZER_STRINGSFILE=<path> (optional) +MESSAGEMANAGER_FUZZER_BLACKLIST=<path> (optional) +*/ + +#ifdef IsLoggingEnabled +// This is defined in the Windows SDK urlmon.h +# undef IsLoggingEnabled +#endif + +class MessageManagerFuzzer { + public: + static void TryMutate(JSContext* aCx, const nsAString& aMessageName, + ipc::StructuredCloneData* aData, + const JS::Value& aTransfer); + + private: + static void ReadFile(const char* path, nsTArray<nsCString>& aArray); + static nsCString GetFuzzValueFromFile(); + static bool IsMessageNameBlacklisted(const nsAString& aMessageName); + static bool Mutate(JSContext* aCx, const nsAString& aMessageName, + ipc::StructuredCloneData* aData, + const JS::Value& aTransfer); + static void Mutate(JSContext* aCx, JS::Rooted<JS::Value>& aMutation); + static void MutateObject(JSContext* aCx, JS::Handle<JS::Value> aValue, + unsigned short int aRecursionCounter); + static bool MutateValue(JSContext* aCx, JS::Handle<JS::Value> aValue, + JS::MutableHandle<JS::Value> aOutMutationValue, + unsigned short int aRecursionCounter); + static unsigned int DefaultMutationProbability(); + static nsAutoString ReadJSON(JSContext* aCx, const JS::Value& aJSON); + static bool IsEnabled(); + static bool IsLoggingEnabled(); +}; + +} // namespace dom +} // namespace mozilla + +#endif diff --git a/tools/fuzzing/messagemanager/moz.build b/tools/fuzzing/messagemanager/moz.build new file mode 100644 index 0000000000..9fc49d7b73 --- /dev/null +++ b/tools/fuzzing/messagemanager/moz.build @@ -0,0 +1,11 @@ +# -*- Mode: python; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +SOURCES += ["MessageManagerFuzzer.cpp"] + +EXPORTS += ["MessageManagerFuzzer.h"] + +FINAL_LIBRARY = "xul" diff --git a/tools/fuzzing/moz.build b/tools/fuzzing/moz.build new file mode 100644 index 0000000000..37dc85ddd1 --- /dev/null +++ b/tools/fuzzing/moz.build @@ -0,0 +1,33 @@ +# -*- Mode: python; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +DIRS += [ + "interface", + "registry", +] + +if not CONFIG["JS_STANDALONE"]: + DIRS += [ + "common", + "messagemanager", + "shmem", + ] + + if CONFIG["FUZZING_SNAPSHOT"]: + DIRS += [ + "ipc", + "nyx", + ] + + if CONFIG["LIBFUZZER"]: + DIRS += [ + "rust", + ] + +if CONFIG["LIBFUZZER"]: + DIRS += [ + "libfuzzer", + ] diff --git a/tools/fuzzing/nyx/Nyx.cpp b/tools/fuzzing/nyx/Nyx.cpp new file mode 100644 index 0000000000..5c48d3342b --- /dev/null +++ b/tools/fuzzing/nyx/Nyx.cpp @@ -0,0 +1,206 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mozilla/Assertions.h" +#include "mozilla/Unused.h" +#include "mozilla/Vector.h" +#include "mozilla/fuzzing/Nyx.h" +#include "prinrval.h" +#include "prthread.h" + +#include <algorithm> +#include <fstream> + +#include <unistd.h> + +namespace mozilla { +namespace fuzzing { + +Nyx::Nyx() {} + +// static +Nyx& Nyx::instance() { + static Nyx nyx; + return nyx; +} + +extern "C" { +MOZ_EXPORT __attribute__((weak)) void nyx_start(void); +MOZ_EXPORT __attribute__((weak)) uint32_t nyx_get_next_fuzz_data(void*, + uint32_t); +MOZ_EXPORT __attribute__((weak)) void nyx_release(uint32_t); +MOZ_EXPORT __attribute__((weak)) void nyx_handle_event(const char*, const char*, + int, const char*); +MOZ_EXPORT __attribute__((weak)) void nyx_puts(const char*); +} + +/* + * In this macro, we must avoid calling MOZ_CRASH and friends, as these + * calls will redirect to the Nyx event handler routines. If the library + * is not properly preloaded, we will crash in the process. Instead, emit + * a descriptive error and then force a crash that won't be redirected. + */ +#define NYX_CHECK_API(func) \ + if (!func) { \ + fprintf( \ + stderr, \ + "Error: Nyx library must be in LD_PRELOAD. Missing function \"%s\"\n", \ + #func); \ + MOZ_REALLY_CRASH(__LINE__); \ + } + +void Nyx::start(void) { + MOZ_RELEASE_ASSERT(!mInited); + mInited = true; + + // Check if we are in replay mode. + char* testFilePtr = getenv("MOZ_FUZZ_TESTFILE"); + if (testFilePtr) { + mReplayMode = true; + + MOZ_FUZZING_NYX_PRINT("[Replay Mode] Reading data file...\n"); + + std::string testFile(testFilePtr); + std::ifstream is; + is.open(testFile, std::ios::binary); + + uint64_t chksum, num_ops, num_data, op_offset, data_offset; + + // If only C++ supported streaming operators on binary files... + is.read(reinterpret_cast<char*>(&chksum), sizeof(uint64_t)); + is.read(reinterpret_cast<char*>(&num_ops), sizeof(uint64_t)); + is.read(reinterpret_cast<char*>(&num_data), sizeof(uint64_t)); + is.read(reinterpret_cast<char*>(&op_offset), sizeof(uint64_t)); + is.read(reinterpret_cast<char*>(&data_offset), sizeof(uint64_t)); + + if (!is.good()) { + MOZ_FUZZING_NYX_PRINT("[Replay Mode] Error reading input file.\n"); + _exit(1); + } + + is.seekg(data_offset); + + // The data chunks we receive through Nyx are stored in the data + // section of the testfile as chunks prefixed with a 16-bit data + // length. We read all chunks and store them away to simulate how + // we originally received the data via Nyx. + + while (is.good()) { + uint16_t pktsize; + is.read(reinterpret_cast<char*>(&pktsize), sizeof(uint16_t)); + + if (!is.good()) { + break; + } + + auto buffer = new Vector<uint8_t>(); + + mozilla::Unused << buffer->initLengthUninitialized(pktsize); + is.read(reinterpret_cast<char*>(buffer->begin()), buffer->length()); + + MOZ_FUZZING_NYX_PRINTF("[Replay Mode] Read data packet of size %zu\n", + buffer->length()); + + mReplayBuffers.push_back(buffer); + } + + if (!mReplayBuffers.size()) { + MOZ_FUZZING_NYX_PRINT("[Replay Mode] Error: No buffers read.\n"); + _exit(1); + } + + is.close(); + + if (!!getenv("MOZ_FUZZ_WAIT_BEFORE_REPLAY")) { + // This can be useful in some cases to reproduce intermittent issues. + PR_Sleep(PR_MillisecondsToInterval(5000)); + } + + return; + } + + NYX_CHECK_API(nyx_start); + NYX_CHECK_API(nyx_get_next_fuzz_data); + NYX_CHECK_API(nyx_release); + NYX_CHECK_API(nyx_handle_event); + NYX_CHECK_API(nyx_puts); + + nyx_start(); +} + +bool Nyx::started(void) { return mInited; } + +bool Nyx::is_enabled(const char* identifier) { + static char* fuzzer = getenv("NYX_FUZZER"); + if (!fuzzer || strcmp(fuzzer, identifier)) { + return false; + } + return true; +} + +bool Nyx::is_replay() { return mReplayMode; } + +uint32_t Nyx::get_data(uint8_t* data, uint32_t size) { + MOZ_RELEASE_ASSERT(mInited); + + if (mReplayMode) { + if (!mReplayBuffers.size()) { + return 0xFFFFFFFF; + } + + Vector<uint8_t>* buffer = mReplayBuffers.front(); + mReplayBuffers.pop_front(); + + size = std::min(size, (uint32_t)buffer->length()); + memcpy(data, buffer->begin(), size); + + delete buffer; + + return size; + } + + return nyx_get_next_fuzz_data(data, size); +} + +void Nyx::release(uint32_t iterations) { + MOZ_RELEASE_ASSERT(mInited); + + if (mReplayMode) { + MOZ_FUZZING_NYX_PRINT("[Replay Mode] Nyx::release() called.\n"); + + // If we reach this point in replay mode, we are essentially done. + // Let's wait a bit further for things to settle and then exit. + PR_Sleep(PR_MillisecondsToInterval(5000)); + _exit(1); + } + + MOZ_FUZZING_NYX_DEBUG("[DEBUG] Nyx::release() called.\n"); + + nyx_release(iterations); +} + +void Nyx::handle_event(const char* type, const char* file, int line, + const char* reason) { + if (mReplayMode) { + MOZ_FUZZING_NYX_PRINTF( + "[Replay Mode] Nyx::handle_event() called: %s at %s:%d : %s\n", type, + file, line, reason); + return; + } + + if (mInited) { + nyx_handle_event(type, file, line, reason); + } else { + // We can have events such as MOZ_CRASH even before we snapshot. + // Output some useful information to make it clear where it happened. + MOZ_FUZZING_NYX_PRINTF( + "[ERROR] PRE SNAPSHOT Nyx::handle_event() called: %s at %s:%d : %s\n", + type, file, line, reason); + } +} + +} // namespace fuzzing +} // namespace mozilla diff --git a/tools/fuzzing/nyx/Nyx.h b/tools/fuzzing/nyx/Nyx.h new file mode 100644 index 0000000000..e29f5f687a --- /dev/null +++ b/tools/fuzzing/nyx/Nyx.h @@ -0,0 +1,54 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef mozilla_fuzzing_Nyx_h +#define mozilla_fuzzing_Nyx_h + +#include <stdint.h> +#include <atomic> +#include <list> + +#ifndef NYX_DISALLOW_COPY_AND_ASSIGN +# define NYX_DISALLOW_COPY_AND_ASSIGN(T) \ + T(const T&); \ + void operator=(const T&) +#endif + +namespace mozilla { + +class MallocAllocPolicy; +template <class T, size_t MinInlineCapacity, class AllocPolicy> +class Vector; + +namespace fuzzing { + +class Nyx { + public: + static Nyx& instance(); + + void start(void); + bool started(void); + bool is_enabled(const char* identifier); + bool is_replay(); + uint32_t get_data(uint8_t* data, uint32_t size); + void release(uint32_t iterations = 1); + void handle_event(const char* type, const char* file, int line, + const char* reason); + + private: + std::atomic<bool> mInited; + + std::atomic<bool> mReplayMode; + std::list<Vector<uint8_t, 0, MallocAllocPolicy>*> mReplayBuffers; + + Nyx(); + NYX_DISALLOW_COPY_AND_ASSIGN(Nyx); +}; + +} // namespace fuzzing +} // namespace mozilla + +#endif /* mozilla_fuzzing_Nyx_h */ diff --git a/tools/fuzzing/nyx/NyxWrapper.h b/tools/fuzzing/nyx/NyxWrapper.h new file mode 100644 index 0000000000..0bb6e0fc46 --- /dev/null +++ b/tools/fuzzing/nyx/NyxWrapper.h @@ -0,0 +1,33 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef mozilla_fuzzing_NyxWrapper_h +#define mozilla_fuzzing_NyxWrapper_h + +#include "mozilla/Types.h" + +/* + * We need this event handler definition both in C and C++ to differentiate + * the various flavors of controlled aborts (e.g. MOZ_DIAGNOSTIC_ASSERT + * vs. MOZ_RELEASE_ASSERT). Hence we can't use the higher level C++ API + * for this and directly redirect to the Nyx event handler in the preloaded + * library. + */ + +#ifdef __cplusplus +extern "C" { +#endif +MOZ_EXPORT __attribute__((weak)) void nyx_handle_event(const char* type, + const char* file, + int line, + const char* reason); + +MOZ_EXPORT __attribute__((weak)) void nyx_puts(const char*); +#ifdef __cplusplus +} +#endif + +#endif /* mozilla_fuzzing_NyxWrapper_h */ diff --git a/tools/fuzzing/nyx/moz.build b/tools/fuzzing/nyx/moz.build new file mode 100644 index 0000000000..74c8ec8515 --- /dev/null +++ b/tools/fuzzing/nyx/moz.build @@ -0,0 +1,16 @@ +# -*- Mode: python; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +SOURCES += [ + "Nyx.cpp", +] + +EXPORTS.mozilla.fuzzing += [ + "Nyx.h", + "NyxWrapper.h", +] + +FINAL_LIBRARY = "xul" diff --git a/tools/fuzzing/registry/FuzzerRegistry.cpp b/tools/fuzzing/registry/FuzzerRegistry.cpp new file mode 100644 index 0000000000..ed4b0c7fa8 --- /dev/null +++ b/tools/fuzzing/registry/FuzzerRegistry.cpp @@ -0,0 +1,26 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * * This Source Code Form is subject to the terms of the Mozilla Public + * * License, v. 2.0. If a copy of the MPL was not distributed with this + * * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "FuzzerRegistry.h" + +namespace mozilla { + +FuzzerRegistry& FuzzerRegistry::getInstance() { + static FuzzerRegistry instance; + return instance; +} + +void FuzzerRegistry::registerModule(std::string moduleName, + FuzzerInitFunc initFunc, + FuzzerTestingFunc testingFunc) { + moduleMap.insert(std::pair<std::string, FuzzerFunctions>( + moduleName, FuzzerFunctions(initFunc, testingFunc))); +} + +FuzzerFunctions FuzzerRegistry::getModuleFunctions(std::string& moduleName) { + return moduleMap[moduleName]; +} + +} // namespace mozilla diff --git a/tools/fuzzing/registry/FuzzerRegistry.h b/tools/fuzzing/registry/FuzzerRegistry.h new file mode 100644 index 0000000000..5976ddc5b6 --- /dev/null +++ b/tools/fuzzing/registry/FuzzerRegistry.h @@ -0,0 +1,44 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * * This Source Code Form is subject to the terms of the Mozilla Public + * * License, v. 2.0. If a copy of the MPL was not distributed with this + * * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef _FuzzerRegistry_h__ +#define _FuzzerRegistry_h__ + +#include <cstdint> +#include <map> +#include <string> +#include <utility> + +#include "mozilla/Attributes.h" +#include "mozilla/Types.h" + +typedef int (*FuzzerInitFunc)(int*, char***); +typedef int (*FuzzerTestingFunc)(const uint8_t*, size_t); + +typedef int (*LibFuzzerDriver)(int*, char***, FuzzerTestingFunc); + +namespace mozilla { + +typedef std::pair<FuzzerInitFunc, FuzzerTestingFunc> FuzzerFunctions; + +class FuzzerRegistry { + public: + MOZ_EXPORT static FuzzerRegistry& getInstance(); + MOZ_EXPORT void registerModule(std::string moduleName, + FuzzerInitFunc initFunc, + FuzzerTestingFunc testingFunc); + MOZ_EXPORT FuzzerFunctions getModuleFunctions(std::string& moduleName); + + FuzzerRegistry(FuzzerRegistry const&) = delete; + void operator=(FuzzerRegistry const&) = delete; + + private: + FuzzerRegistry(){}; + std::map<std::string, FuzzerFunctions> moduleMap; +}; + +} // namespace mozilla + +#endif // _FuzzerRegistry_h__ diff --git a/tools/fuzzing/registry/moz.build b/tools/fuzzing/registry/moz.build new file mode 100644 index 0000000000..4aa005a56e --- /dev/null +++ b/tools/fuzzing/registry/moz.build @@ -0,0 +1,20 @@ +# -*- Mode: python; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +Library("fuzzer-registry") + +SOURCES += [ + "FuzzerRegistry.cpp", +] + +EXPORTS += [ + "FuzzerRegistry.h", +] + +if CONFIG["JS_STANDALONE"]: + FINAL_LIBRARY = "js" +else: + FINAL_LIBRARY = "xul" diff --git a/tools/fuzzing/rust/Cargo.toml b/tools/fuzzing/rust/Cargo.toml new file mode 100644 index 0000000000..de0830e976 --- /dev/null +++ b/tools/fuzzing/rust/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "gecko-fuzz-targets" +version = "0.1.0" +authors = ["fuzzing@mozilla.com"] + +[dependencies] +libc = "0.2" +tempfile = "3" +lazy_static = "1.4.0" +rkv = { version = "0.18", features = ["with-fuzzer-no-link"] } +lmdb-rkv = { version = "0.14", features = ["with-fuzzer-no-link"] } diff --git a/tools/fuzzing/rust/RustFuzzingTargets.cpp b/tools/fuzzing/rust/RustFuzzingTargets.cpp new file mode 100644 index 0000000000..7b4dd6c86c --- /dev/null +++ b/tools/fuzzing/rust/RustFuzzingTargets.cpp @@ -0,0 +1,15 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "FuzzingInterface.h" +#include "RustFuzzingTargets.h" + +int FuzzingInitDummy(int* argc, char*** argv) { return 0; } + +MOZ_FUZZING_INTERFACE_RAW(FuzzingInitDummy, fuzz_rkv_db_file, RkvDbFile); +MOZ_FUZZING_INTERFACE_RAW(FuzzingInitDummy, fuzz_rkv_db_name, RkvDbName); +MOZ_FUZZING_INTERFACE_RAW(FuzzingInitDummy, fuzz_rkv_key_write, RkvKeyWrite); +MOZ_FUZZING_INTERFACE_RAW(FuzzingInitDummy, fuzz_rkv_val_write, RkvValWrite); +MOZ_FUZZING_INTERFACE_RAW(FuzzingInitDummy, fuzz_rkv_calls, RkvCalls); diff --git a/tools/fuzzing/rust/RustFuzzingTargets.h b/tools/fuzzing/rust/RustFuzzingTargets.h new file mode 100644 index 0000000000..a9c1439598 --- /dev/null +++ b/tools/fuzzing/rust/RustFuzzingTargets.h @@ -0,0 +1,26 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* + * Interface definitions for fuzzing rust modules + */ + +#ifndef RustFuzzingTargets_h__ +#define RustFuzzingTargets_h__ + +#include <stddef.h> +#include <stdint.h> + +extern "C" { + +int fuzz_rkv_db_file(const uint8_t* raw_data, size_t size); +int fuzz_rkv_db_name(const uint8_t* raw_data, size_t size); +int fuzz_rkv_key_write(const uint8_t* raw_data, size_t size); +int fuzz_rkv_val_write(const uint8_t* raw_data, size_t size); +int fuzz_rkv_calls(const uint8_t* raw_data, size_t size); + +} // extern "C" + +#endif // RustFuzzingTargets_h__ diff --git a/tools/fuzzing/rust/moz.build b/tools/fuzzing/rust/moz.build new file mode 100644 index 0000000000..aa6aaa29dc --- /dev/null +++ b/tools/fuzzing/rust/moz.build @@ -0,0 +1,16 @@ +# -*- Mode: python; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +Library("fuzzer-rust-targets") + +SOURCES += [ + "RustFuzzingTargets.cpp", +] + +# Add libFuzzer configuration directives +include("/tools/fuzzing/libfuzzer-config.mozbuild") + +FINAL_LIBRARY = "xul-gtest" diff --git a/tools/fuzzing/rust/src/lib.rs b/tools/fuzzing/rust/src/lib.rs new file mode 100644 index 0000000000..25c9195fb8 --- /dev/null +++ b/tools/fuzzing/rust/src/lib.rs @@ -0,0 +1,341 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#[macro_use] +extern crate lazy_static; +extern crate libc; +extern crate lmdb; +extern crate rkv; +extern crate tempfile; + +use rkv::backend::{ + BackendEnvironmentBuilder, SafeMode, SafeModeDatabase, SafeModeEnvironment, + SafeModeRoTransaction, SafeModeRwTransaction, +}; +use std::fs; +use std::fs::File; +use std::io::Write; +use std::iter; +use std::path::Path; +use std::sync::Arc; +use std::thread; +use tempfile::Builder; + +type Rkv = rkv::Rkv<SafeModeEnvironment>; +type SingleStore = rkv::SingleStore<SafeModeDatabase>; + +fn eat_lmdb_err<T>(value: Result<T, rkv::StoreError>) -> Result<Option<T>, rkv::StoreError> { + match value { + Ok(value) => Ok(Some(value)), + Err(rkv::StoreError::LmdbError(_)) => Ok(None), + Err(err) => { + println!("Not a crash, but an error outside LMDB: {}", err); + println!("A refined fuzzing test, or changes to RKV, might be required."); + Err(err) + } + } +} + +fn panic_with_err(err: rkv::StoreError) { + println!("Got error: {}", err); + Result::Err(err).unwrap() +} + +#[no_mangle] +pub extern "C" fn fuzz_rkv_db_file(raw_data: *const u8, size: libc::size_t) -> libc::c_int { + let data = unsafe { std::slice::from_raw_parts(raw_data as *const u8, size as usize) }; + + // First 8192 bytes are for the lock file. + if data.len() < 8192 { + return 0; + } + let (lock, db) = data.split_at(8192); + let mut lock_file = File::create("data.lock").unwrap(); + lock_file.write_all(lock).unwrap(); + let mut db_file = File::create("data.mdb").unwrap(); + db_file.write_all(db).unwrap(); + + let env = Rkv::with_capacity::<SafeMode>(Path::new("."), 2).unwrap(); + let store = env + .open_single("test", rkv::StoreOptions::create()) + .unwrap(); + + let reader = env.read().unwrap(); + eat_lmdb_err(store.get(&reader, &[0])).unwrap(); + + 0 +} + +#[no_mangle] +pub extern "C" fn fuzz_rkv_db_name(raw_data: *const u8, size: libc::size_t) -> libc::c_int { + let data = unsafe { std::slice::from_raw_parts(raw_data as *const u8, size as usize) }; + + let root = Builder::new().prefix("fuzz_rkv_db_name").tempdir().unwrap(); + fs::create_dir_all(root.path()).unwrap(); + + let env = Rkv::new::<SafeMode>(root.path()).unwrap(); + let name = String::from_utf8_lossy(data); + println!("Checking string: '{:?}'", name); + // Some strings are invalid database names, and are handled as store errors. + // Ignore those errors, but not others. + let store = eat_lmdb_err(env.open_single(name.as_ref(), rkv::StoreOptions::create())).unwrap(); + + if let Some(store) = store { + let reader = env.read().unwrap(); + eat_lmdb_err(store.get(&reader, &[0])).unwrap(); + }; + + 0 +} + +#[no_mangle] +pub extern "C" fn fuzz_rkv_key_write(raw_data: *const u8, size: libc::size_t) -> libc::c_int { + let data = unsafe { std::slice::from_raw_parts(raw_data as *const u8, size as usize) }; + + let root = Builder::new() + .prefix("fuzz_rkv_key_write") + .tempdir() + .unwrap(); + fs::create_dir_all(root.path()).unwrap(); + + let env = Rkv::new::<SafeMode>(root.path()).unwrap(); + let store = env + .open_single("test", rkv::StoreOptions::create()) + .unwrap(); + + let mut writer = env.write().unwrap(); + // Some data are invalid values, and are handled as store errors. + // Ignore those errors, but not others. + eat_lmdb_err(store.put(&mut writer, data, &rkv::Value::Str("val"))).unwrap(); + + 0 +} + +#[no_mangle] +pub extern "C" fn fuzz_rkv_val_write(raw_data: *const u8, size: libc::size_t) -> libc::c_int { + let data = unsafe { std::slice::from_raw_parts(raw_data as *const u8, size as usize) }; + + let root = Builder::new() + .prefix("fuzz_rkv_val_write") + .tempdir() + .unwrap(); + fs::create_dir_all(root.path()).unwrap(); + + let env = Rkv::new::<SafeMode>(root.path()).unwrap(); + let store = env + .open_single("test", rkv::StoreOptions::create()) + .unwrap(); + + let mut writer = env.write().unwrap(); + let string = String::from_utf8_lossy(data); + let value = rkv::Value::Str(&string); + store.put(&mut writer, "key", &value).unwrap(); + + 0 +} + +lazy_static! { + static ref STATIC_DATA: Vec<String> = { + let sizes = vec![4, 16, 128, 512, 1024]; + let mut data = Vec::new(); + + for (i, s) in sizes.into_iter().enumerate() { + let bytes = iter::repeat('a' as u8 + i as u8).take(s).collect(); + data.push(String::from_utf8(bytes).unwrap()); + } + + data + }; +} + +#[no_mangle] +pub extern "C" fn fuzz_rkv_calls(raw_data: *const u8, size: libc::size_t) -> libc::c_int { + let data = unsafe { std::slice::from_raw_parts(raw_data as *const u8, size as usize) }; + let mut fuzz = data.iter().copied(); + + fn maybe_do<I: Iterator<Item = u8>>(fuzz: &mut I, f: impl FnOnce(&mut I) -> ()) -> Option<()> { + match fuzz.next().map(|byte| byte % 2) { + Some(0) => Some(f(fuzz)), + _ => None, + } + } + + fn maybe_abort<I: Iterator<Item = u8>>( + fuzz: &mut I, + read: rkv::Reader<SafeModeRoTransaction>, + ) -> Result<(), rkv::StoreError> { + match fuzz.next().map(|byte| byte % 2) { + Some(0) => Ok(read.abort()), + _ => Ok(()), + } + } + + fn maybe_commit<I: Iterator<Item = u8>>( + fuzz: &mut I, + write: rkv::Writer<SafeModeRwTransaction>, + ) -> Result<(), rkv::StoreError> { + match fuzz.next().map(|byte| byte % 3) { + Some(0) => write.commit(), + Some(1) => Ok(write.abort()), + _ => Ok(()), + } + } + + fn get_static_data<'a, I: Iterator<Item = u8>>(fuzz: &mut I) -> Option<&'a String> { + fuzz.next().map(|byte| { + let data = &*STATIC_DATA; + let n = byte as usize; + data.get(n % data.len()).unwrap() + }) + } + + fn get_fuzz_data<I: Iterator<Item = u8> + Clone>( + fuzz: &mut I, + max_len: usize, + ) -> Option<Vec<u8>> { + fuzz.next().map(|byte| { + let n = byte as usize; + fuzz.clone().take((n * n) % max_len).collect() + }) + } + + fn get_any_data<I: Iterator<Item = u8> + Clone>( + fuzz: &mut I, + max_len: usize, + ) -> Option<Vec<u8>> { + match fuzz.next().map(|byte| byte % 2) { + Some(0) => get_static_data(fuzz).map(|v| v.clone().into_bytes()), + Some(1) => get_fuzz_data(fuzz, max_len), + _ => None, + } + } + + fn store_put<I: Iterator<Item = u8> + Clone>(fuzz: &mut I, env: &Rkv, store: &SingleStore) { + let key = match get_any_data(fuzz, 1024) { + Some(key) => key, + None => return, + }; + let value = match get_any_data(fuzz, std::usize::MAX) { + Some(value) => value, + None => return, + }; + + let mut writer = env.write().unwrap(); + let mut full = false; + + match store.put(&mut writer, key, &rkv::Value::Blob(&value)) { + Ok(_) => {} + Err(rkv::StoreError::LmdbError(lmdb::Error::BadValSize)) => {} + Err(rkv::StoreError::LmdbError(lmdb::Error::MapFull)) => full = true, + Err(err) => panic_with_err(err), + }; + + if full { + writer.abort(); + store_resize(fuzz, env); + } else { + maybe_commit(fuzz, writer).unwrap(); + } + } + + fn store_get<I: Iterator<Item = u8> + Clone>(fuzz: &mut I, env: &Rkv, store: &SingleStore) { + let key = match get_any_data(fuzz, 1024) { + Some(key) => key, + None => return, + }; + + let mut reader = match env.read() { + Ok(reader) => reader, + Err(rkv::StoreError::LmdbError(lmdb::Error::ReadersFull)) => return, + Err(err) => return panic_with_err(err), + }; + + match store.get(&mut reader, key) { + Ok(_) => {} + Err(rkv::StoreError::LmdbError(lmdb::Error::BadValSize)) => {} + Err(err) => panic_with_err(err), + }; + + maybe_abort(fuzz, reader).unwrap(); + } + + fn store_delete<I: Iterator<Item = u8> + Clone>(fuzz: &mut I, env: &Rkv, store: &SingleStore) { + let key = match get_any_data(fuzz, 1024) { + Some(key) => key, + None => return, + }; + + let mut writer = env.write().unwrap(); + + match store.delete(&mut writer, key) { + Ok(_) => {} + Err(rkv::StoreError::LmdbError(lmdb::Error::BadValSize)) => {} + Err(rkv::StoreError::LmdbError(lmdb::Error::NotFound)) => {} + Err(err) => panic_with_err(err), + }; + + maybe_commit(fuzz, writer).unwrap(); + } + + fn store_resize<I: Iterator<Item = u8>>(fuzz: &mut I, env: &Rkv) { + let n = fuzz.next().unwrap_or(1) as usize; + env.set_map_size(1_048_576 * (n % 100)).unwrap() // 1,048,576 bytes, i.e. 1MiB. + } + + let root = Builder::new().prefix("fuzz_rkv_calls").tempdir().unwrap(); + fs::create_dir_all(root.path()).unwrap(); + + let mut builder: SafeMode = Rkv::environment_builder(); + builder.set_max_dbs(1); // need at least one db + + maybe_do(&mut fuzz, |fuzz| { + let n = fuzz.next().unwrap_or(126) as u32; // default + builder.set_max_readers(1 + n); + }); + maybe_do(&mut fuzz, |fuzz| { + let n = fuzz.next().unwrap_or(0) as u32; + builder.set_max_dbs(1 + n); + }); + maybe_do(&mut fuzz, |fuzz| { + let n = fuzz.next().unwrap_or(1) as usize; + builder.set_map_size(1_048_576 * (n % 100)); // 1,048,576 bytes, i.e. 1MiB. + }); + + let env = Rkv::from_builder(root.path(), builder).unwrap(); + let store = env + .open_single("test", rkv::StoreOptions::create()) + .unwrap(); + + let shared_env = Arc::new(env); + let shared_store = Arc::new(store); + + let use_threads = fuzz.next().map(|byte| byte % 10 == 0).unwrap_or(false); + let max_threads = if use_threads { 16 } else { 1 }; + let num_threads = fuzz.next().unwrap_or(0) as usize % max_threads + 1; + let chunk_size = fuzz.len() / num_threads; + + let threads = (0..num_threads).map(|_| { + let env = shared_env.clone(); + let store = shared_store.clone(); + + let chunk: Vec<_> = fuzz.by_ref().take(chunk_size).collect(); + let mut fuzz = chunk.into_iter(); + + thread::spawn(move || loop { + match fuzz.next().map(|byte| byte % 4) { + Some(0) => store_put(&mut fuzz, &env, &store), + Some(1) => store_get(&mut fuzz, &env, &store), + Some(2) => store_delete(&mut fuzz, &env, &store), + Some(3) => store_resize(&mut fuzz, &env), + _ => break, + } + }) + }); + + for handle in threads { + handle.join().unwrap() + } + + 0 +} diff --git a/tools/fuzzing/shmem/SharedMemoryFuzzer.cpp b/tools/fuzzing/shmem/SharedMemoryFuzzer.cpp new file mode 100644 index 0000000000..49a79fa975 --- /dev/null +++ b/tools/fuzzing/shmem/SharedMemoryFuzzer.cpp @@ -0,0 +1,122 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "FuzzingMutate.h" +#include "FuzzingTraits.h" +#include "nsDebug.h" +#include "prenv.h" +#include "SharedMemoryFuzzer.h" + +#define SHMEM_FUZZER_DEFAULT_MUTATION_PROBABILITY 2 +#define SHMEM_FUZZER_DEFAULT_MUTATION_FACTOR 500 +#define SHMEM_FUZZER_LOG(fmt, args...) \ + if (SharedMemoryFuzzer::IsLoggingEnabled()) { \ + printf_stderr("[SharedMemoryFuzzer] " fmt "\n", ##args); \ + } + +namespace mozilla { +namespace ipc { + +using namespace fuzzing; + +/* static */ +bool SharedMemoryFuzzer::IsLoggingEnabled() { + static bool sInitialized = false; + static bool sIsLoggingEnabled = false; + + if (!sInitialized) { + sIsLoggingEnabled = !!PR_GetEnv("SHMEM_FUZZER_ENABLE_LOGGING"); + sInitialized = true; + } + return sIsLoggingEnabled; +} + +/* static */ +bool SharedMemoryFuzzer::IsEnabled() { + static bool sInitialized = false; + static bool sIsFuzzerEnabled = false; + + if (!sInitialized) { + sIsFuzzerEnabled = !!PR_GetEnv("SHMEM_FUZZER_ENABLE"); + } + return sIsFuzzerEnabled; +} + +/* static */ +uint64_t SharedMemoryFuzzer::MutationProbability() { + static uint64_t sPropValue = SHMEM_FUZZER_DEFAULT_MUTATION_PROBABILITY; + static bool sInitialized = false; + + if (sInitialized) { + return sPropValue; + } + sInitialized = true; + + const char* probability = PR_GetEnv("SHMEM_FUZZER_MUTATION_PROBABILITY"); + if (probability) { + long n = std::strtol(probability, nullptr, 10); + if (n != 0) { + sPropValue = n; + return sPropValue; + } + } + return sPropValue; +} + +/* static */ +uint64_t SharedMemoryFuzzer::MutationFactor() { + static uint64_t sPropValue = SHMEM_FUZZER_DEFAULT_MUTATION_FACTOR; + static bool sInitialized = false; + + if (sInitialized) { + return sPropValue; + } + sInitialized = true; + + const char* factor = PR_GetEnv("SHMEM_FUZZER_MUTATION_FACTOR"); + if (factor) { + long n = strtol(factor, nullptr, 10); + if (n != 0) { + sPropValue = n; + return sPropValue; + } + } + return sPropValue; +} + +/* static */ +void* SharedMemoryFuzzer::MutateSharedMemory(void* aMemory, size_t aSize) { + if (!IsEnabled()) { + return aMemory; + } + + if (aSize == 0) { + /* Shmem opened from foreign handle. */ + SHMEM_FUZZER_LOG("shmem is of size 0."); + return aMemory; + } + + if (!aMemory) { + /* Memory space is not mapped. */ + SHMEM_FUZZER_LOG("shmem memory space is not mapped."); + return aMemory; + } + + // The likelihood when a value gets fuzzed of this object. + if (!FuzzingTraits::Sometimes(MutationProbability())) { + return aMemory; + } + + const size_t max = FuzzingTraits::Frequency(aSize, MutationFactor()); + SHMEM_FUZZER_LOG("shmem of size: %zu / mutations: %zu", aSize, max); + for (size_t i = 0; i < max; i++) { + FuzzingMutate::ChangeBit((uint8_t*)aMemory, aSize); + } + return aMemory; +} + +} // namespace ipc +} // namespace mozilla diff --git a/tools/fuzzing/shmem/SharedMemoryFuzzer.h b/tools/fuzzing/shmem/SharedMemoryFuzzer.h new file mode 100644 index 0000000000..bd862edf6a --- /dev/null +++ b/tools/fuzzing/shmem/SharedMemoryFuzzer.h @@ -0,0 +1,38 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef mozilla_dom_SharedMemoryFuzzer_h +#define mozilla_dom_SharedMemoryFuzzer_h + +#include <stddef.h> +#include <stdint.h> + +namespace mozilla { +namespace ipc { + +/* + * Exposed environment variables: + * SHMEM_FUZZER_ENABLE=1 + * SHMEM_FUZZER_ENABLE_LOGGING=1 (optional) + * SHMEM_FUZZER_MUTATION_PROBABILITY=2 (optional) + * SHMEM_FUZZER_MUTATION_FACTOR=500 (optional) + */ + +class SharedMemoryFuzzer { + public: + static void* MutateSharedMemory(void* aMemory, size_t aSize); + + private: + static uint64_t MutationProbability(); + static uint64_t MutationFactor(); + static bool IsEnabled(); + static bool IsLoggingEnabled(); +}; + +} // namespace ipc +} // namespace mozilla + +#endif diff --git a/tools/fuzzing/shmem/moz.build b/tools/fuzzing/shmem/moz.build new file mode 100644 index 0000000000..ee9c549920 --- /dev/null +++ b/tools/fuzzing/shmem/moz.build @@ -0,0 +1,11 @@ +# -*- Mode: python; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +UNIFIED_SOURCES += ["SharedMemoryFuzzer.cpp"] + +EXPORTS.mozilla.ipc += ["SharedMemoryFuzzer.h"] + +FINAL_LIBRARY = "xul" diff --git a/tools/fuzzing/smoke/grizzly_requirements.in b/tools/fuzzing/smoke/grizzly_requirements.in new file mode 100644 index 0000000000..96d49442a0 --- /dev/null +++ b/tools/fuzzing/smoke/grizzly_requirements.in @@ -0,0 +1,2 @@ +grizzly-framework==0.16.2 +-r ../../../build/psutil_requirements.txt diff --git a/tools/fuzzing/smoke/grizzly_requirements.txt b/tools/fuzzing/smoke/grizzly_requirements.txt new file mode 100644 index 0000000000..a6687e5426 --- /dev/null +++ b/tools/fuzzing/smoke/grizzly_requirements.txt @@ -0,0 +1,127 @@ +# +# This file is autogenerated by pip-compile with Python 3.10 +# by the following command: +# +# pip-compile --generate-hashes --output-file=tools/fuzzing/smoke/grizzly_requirements.txt tools/fuzzing/smoke/grizzly_requirements.in +# +certifi==2021.10.8 \ + --hash=sha256:78884e7c1d4b00ce3cea67b44566851c4343c120abd683433ce934a68ea58872 \ + --hash=sha256:d62a0163eb4c2344ac042ab2bdf75399a71a2d8c7d47eac2e2ee91b9d6339569 + # via requests +charset-normalizer==2.0.8 \ + --hash=sha256:735e240d9a8506778cd7a453d97e817e536bb1fc29f4f6961ce297b9c7a917b0 \ + --hash=sha256:83fcdeb225499d6344c8f7f34684c2981270beacc32ede2e669e94f7fa544405 + # via requests +cssbeautifier==1.14.0 \ + --hash=sha256:20be1f47f20762db32c78124ff44d351ba13894fa8e7cfe34014b672f9f6ecb2 + # via grizzly-framework +editorconfig==0.12.3 \ + --hash=sha256:6b0851425aa875b08b16789ee0eeadbd4ab59666e9ebe728e526314c4a2e52c1 + # via + # cssbeautifier + # jsbeautifier +fasteners==0.16.3 \ + --hash=sha256:8408e52656455977053871990bd25824d85803b9417aa348f10ba29ef0c751f7 + # via + # fuzzmanager + # grizzly-framework +ffpuppet==0.9.2 \ + --hash=sha256:340ec47fddc274c97e0b9d9e56d46885e96a07a45a265fd2fa5b21af7f655947 \ + --hash=sha256:41e93d2f3d8d3230822fd4962a85e66246a70927528dce76d765e0ffd859bf69 + # via grizzly-framework +fuzzmanager==0.4.1 \ + --hash=sha256:2bb17b5a725d8d6f03eb9979a75416a1137b7456c95a581a50c7b542fbf3d174 + # via grizzly-framework +grizzly-framework==0.16.2 \ + --hash=sha256:3270fd705a933c4d197784c6e403389c3edb791d09ce26b8a7d7c98934bbc87b \ + --hash=sha256:b01503d1a3a0f8a3fb65cfbc2397a1b1bd4d524389a24ce8eaf00613d374a91b + # via -r tools/fuzzing/smoke/grizzly_requirements.in +idna==3.3 \ + --hash=sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff \ + --hash=sha256:9d643ff0a55b762d5cdb124b8eaa99c66322e2157b69160bc32796e824360e6d + # via requests +jsbeautifier==1.14.0 \ + --hash=sha256:84fdb008d8af89619269a6aca702288b48f837a99427a0f529aa57ecfb36ee3c + # via + # cssbeautifier + # grizzly-framework +lithium-reducer==0.6.1 \ + --hash=sha256:ea2f77f496fc57bcb4d74209210c2ec84b1b327a7b707f98655f85575b6fcc16 + # via grizzly-framework +prefpicker==1.1.2 \ + --hash=sha256:1404cb0e7c07acca060a09fcc3d9203ae2d8784741b6fe97600a707b9b3ff75e \ + --hash=sha256:ea25b33e92a342a0bf2c38f5588dd4baf9d381f70faf7ae2145ca5cad49a2644 + # via grizzly-framework +psutil==5.9.4 \ + --hash=sha256:149555f59a69b33f056ba1c4eb22bb7bf24332ce631c44a319cec09f876aaeff \ + --hash=sha256:16653106f3b59386ffe10e0bad3bb6299e169d5327d3f187614b1cb8f24cf2e1 \ + --hash=sha256:3d7f9739eb435d4b1338944abe23f49584bde5395f27487d2ee25ad9a8774a62 \ + --hash=sha256:3ff89f9b835100a825b14c2808a106b6fdcc4b15483141482a12c725e7f78549 \ + --hash=sha256:54c0d3d8e0078b7666984e11b12b88af2db11d11249a8ac8920dd5ef68a66e08 \ + --hash=sha256:54d5b184728298f2ca8567bf83c422b706200bcbbfafdc06718264f9393cfeb7 \ + --hash=sha256:6001c809253a29599bc0dfd5179d9f8a5779f9dffea1da0f13c53ee568115e1e \ + --hash=sha256:68908971daf802203f3d37e78d3f8831b6d1014864d7a85937941bb35f09aefe \ + --hash=sha256:6b92c532979bafc2df23ddc785ed116fced1f492ad90a6830cf24f4d1ea27d24 \ + --hash=sha256:852dd5d9f8a47169fe62fd4a971aa07859476c2ba22c2254d4a1baa4e10b95ad \ + --hash=sha256:9120cd39dca5c5e1c54b59a41d205023d436799b1c8c4d3ff71af18535728e94 \ + --hash=sha256:c1ca331af862803a42677c120aff8a814a804e09832f166f226bfd22b56feee8 \ + --hash=sha256:efeae04f9516907be44904cc7ce08defb6b665128992a56957abc9b61dca94b7 \ + --hash=sha256:fd8522436a6ada7b4aad6638662966de0d61d241cb821239b2ae7013d41a43d4 + # via + # -r tools/fuzzing/smoke/../../../build/psutil_requirements.txt + # ffpuppet + # grizzly-framework +pyyaml==6.0 \ + --hash=sha256:0283c35a6a9fbf047493e3a0ce8d79ef5030852c51e9d911a27badfde0605293 \ + --hash=sha256:055d937d65826939cb044fc8c9b08889e8c743fdc6a32b33e2390f66013e449b \ + --hash=sha256:07751360502caac1c067a8132d150cf3d61339af5691fe9e87803040dbc5db57 \ + --hash=sha256:0b4624f379dab24d3725ffde76559cff63d9ec94e1736b556dacdfebe5ab6d4b \ + --hash=sha256:0ce82d761c532fe4ec3f87fc45688bdd3a4c1dc5e0b4a19814b9009a29baefd4 \ + --hash=sha256:1e4747bc279b4f613a09eb64bba2ba602d8a6664c6ce6396a4d0cd413a50ce07 \ + --hash=sha256:213c60cd50106436cc818accf5baa1aba61c0189ff610f64f4a3e8c6726218ba \ + --hash=sha256:231710d57adfd809ef5d34183b8ed1eeae3f76459c18fb4a0b373ad56bedcdd9 \ + --hash=sha256:277a0ef2981ca40581a47093e9e2d13b3f1fbbeffae064c1d21bfceba2030287 \ + --hash=sha256:2cd5df3de48857ed0544b34e2d40e9fac445930039f3cfe4bcc592a1f836d513 \ + --hash=sha256:40527857252b61eacd1d9af500c3337ba8deb8fc298940291486c465c8b46ec0 \ + --hash=sha256:473f9edb243cb1935ab5a084eb238d842fb8f404ed2193a915d1784b5a6b5fc0 \ + --hash=sha256:48c346915c114f5fdb3ead70312bd042a953a8ce5c7106d5bfb1a5254e47da92 \ + --hash=sha256:50602afada6d6cbfad699b0c7bb50d5ccffa7e46a3d738092afddc1f9758427f \ + --hash=sha256:68fb519c14306fec9720a2a5b45bc9f0c8d1b9c72adf45c37baedfcd949c35a2 \ + --hash=sha256:77f396e6ef4c73fdc33a9157446466f1cff553d979bd00ecb64385760c6babdc \ + --hash=sha256:819b3830a1543db06c4d4b865e70ded25be52a2e0631ccd2f6a47a2822f2fd7c \ + --hash=sha256:897b80890765f037df3403d22bab41627ca8811ae55e9a722fd0392850ec4d86 \ + --hash=sha256:98c4d36e99714e55cfbaaee6dd5badbc9a1ec339ebfc3b1f52e293aee6bb71a4 \ + --hash=sha256:9df7ed3b3d2e0ecfe09e14741b857df43adb5a3ddadc919a2d94fbdf78fea53c \ + --hash=sha256:9fa600030013c4de8165339db93d182b9431076eb98eb40ee068700c9c813e34 \ + --hash=sha256:a80a78046a72361de73f8f395f1f1e49f956c6be882eed58505a15f3e430962b \ + --hash=sha256:b3d267842bf12586ba6c734f89d1f5b871df0273157918b0ccefa29deb05c21c \ + --hash=sha256:b5b9eccad747aabaaffbc6064800670f0c297e52c12754eb1d976c57e4f74dcb \ + --hash=sha256:c5687b8d43cf58545ade1fe3e055f70eac7a5a1a0bf42824308d868289a95737 \ + --hash=sha256:cba8c411ef271aa037d7357a2bc8f9ee8b58b9965831d9e51baf703280dc73d3 \ + --hash=sha256:d15a181d1ecd0d4270dc32edb46f7cb7733c7c508857278d3d378d14d606db2d \ + --hash=sha256:d4db7c7aef085872ef65a8fd7d6d09a14ae91f691dec3e87ee5ee0539d516f53 \ + --hash=sha256:d4eccecf9adf6fbcc6861a38015c2a64f38b9d94838ac1810a9023a0609e1b78 \ + --hash=sha256:d67d839ede4ed1b28a4e8909735fc992a923cdb84e618544973d7dfc71540803 \ + --hash=sha256:daf496c58a8c52083df09b80c860005194014c3698698d1a57cbcfa182142a3a \ + --hash=sha256:e61ceaab6f49fb8bdfaa0f92c4b57bcfbea54c09277b1b4f7ac376bfb7a7c174 \ + --hash=sha256:f84fbc98b019fef2ee9a1cb3ce93e3187a6df0b2538a651bfb890254ba9f90b5 + # via prefpicker +requests==2.26.0 \ + --hash=sha256:6c1246513ecd5ecd4528a0906f910e8f0f9c6b8ec72030dc9fd154dc1a6efd24 \ + --hash=sha256:b8aa58f8cf793ffd8782d3d8cb19e66ef36f7aba4353eec859e74678b01b07a7 + # via fuzzmanager +six==1.16.0 \ + --hash=sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926 \ + --hash=sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254 + # via + # cssbeautifier + # fasteners + # fuzzmanager + # jsbeautifier +urllib3==1.26.7 \ + --hash=sha256:4987c65554f7a2dbf30c18fd48778ef124af6fab771a377103da0585e2336ece \ + --hash=sha256:c4fdf4019605b6e5423637e01bc9fe4daef873709a7973e195ceba0a62bbc844 + # via requests +xvfbwrapper==0.2.9 \ + --hash=sha256:bcf4ae571941b40254faf7a73432dfc119ad21ce688f1fdec533067037ecfc24 + # via ffpuppet diff --git a/tools/fuzzing/smoke/js.py b/tools/fuzzing/smoke/js.py new file mode 100755 index 0000000000..d6ad08eb6a --- /dev/null +++ b/tools/fuzzing/smoke/js.py @@ -0,0 +1,26 @@ +#!/usr/bin/env python +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +""" Hello I am a fake jsshell for testing purpose. +Add more features! +""" +import argparse +import sys + + +def run(): + parser = argparse.ArgumentParser(description="Process some integers.") + parser.add_argument("-e", type=str, default=None) + + parser.add_argument("--fuzzing-safe", action="store_true", default=False) + + args = parser.parse_args() + + if args.e is not None: + if "crash()" in args.e: + sys.exit(1) + + +if __name__ == "__main__": + run() diff --git a/tools/fuzzing/smoke/python.ini b/tools/fuzzing/smoke/python.ini new file mode 100644 index 0000000000..6701224f36 --- /dev/null +++ b/tools/fuzzing/smoke/python.ini @@ -0,0 +1,5 @@ +[DEFAULT] +subsuite = fuzzing + +[test_grizzly.py] +requirements = tools/fuzzing/smoke/grizzly_requirements.txt diff --git a/tools/fuzzing/smoke/smoke.py b/tools/fuzzing/smoke/smoke.py new file mode 100644 index 0000000000..e328be71d6 --- /dev/null +++ b/tools/fuzzing/smoke/smoke.py @@ -0,0 +1,71 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +""" Smoke test script for Fuzzing + +This script can be used to perform simple calls using `jsshell` +or whatever other tools you may add. + +The call is done via `taskcluster/ci/fuzzing/kind.yml` and +files contained in the `target.jsshell.zip` and `target.fuzztest.tests.tar.gz` +build artifacts are downloaded to run things. + +Everything included in this directory will be added in +`target.fuzztest.tests.tar.gz` at build time, so you can add more scripts and +tools if you need. They will be located in `$MOZ_FETCHES_DIR` and follow the +same directory structure than the source tree. +""" +import os +import os.path +import shlex +import subprocess +import sys +from distutils.spawn import find_executable + + +def run_jsshell(command, label=None): + """Invokes `jsshell` with command. + + This function will use the `JSSHELL` environment variable, + and fallback to a `js` executable if it finds one + """ + shell = os.environ.get("JSSHELL") + if shell is None: + shell = find_executable("js") + if shell is None: + raise FileNotFoundError(shell) + else: + if not os.path.exists(shell) or not os.path.isfile(shell): + raise FileNotFoundError(shell) + + if label is None: + label = command + sys.stdout.write(label) + cmd = [shell] + shlex.split(command) + sys.stdout.flush() + try: + subprocess.check_call(cmd) + finally: + sys.stdout.write("\n") + sys.stdout.flush() + + +def smoke_test(): + # first, let's make sure it catches crashes so we don't have false + # positives. + try: + run_jsshell("-e 'crash();'", "Testing for crash\n") + except subprocess.CalledProcessError: + pass + else: + raise Exception("Could not get the process to crash") + + # now let's proceed with some tests + run_jsshell("--fuzzing-safe -e 'print(\"PASSED\")'", "Simple Fuzzing...") + + # add more smoke tests here + + +if __name__ == "__main__": + # if this calls raises an error, the job will turn red in the CI. + smoke_test() diff --git a/tools/fuzzing/smoke/test_grizzly.py b/tools/fuzzing/smoke/test_grizzly.py new file mode 100644 index 0000000000..2c8f406222 --- /dev/null +++ b/tools/fuzzing/smoke/test_grizzly.py @@ -0,0 +1,42 @@ +import os +import os.path +import sys +from subprocess import check_call + +import mozunit +import pytest +from moztest.selftest import fixtures + +MOZ_AUTOMATION = bool(os.getenv("MOZ_AUTOMATION", "0") == "1") + + +def test_grizzly_smoke(): + ffbin = fixtures.binary() + + if MOZ_AUTOMATION: + assert os.path.exists( + ffbin + ), "Missing Firefox build. Build it, or set GECKO_BINARY_PATH" + + elif not os.path.exists(ffbin): + pytest.skip("Missing Firefox build. Build it, or set GECKO_BINARY_PATH") + + check_call( + [ + sys.executable, + "-m", + "grizzly", + ffbin, + "no-op", + "--headless", + "--smoke-test", + "--limit", + "10", + "--relaunch", + "5", + ], + ) + + +if __name__ == "__main__": + mozunit.main() diff --git a/tools/fuzzing/smoke/tests.py b/tools/fuzzing/smoke/tests.py new file mode 100644 index 0000000000..bc06e2427b --- /dev/null +++ b/tools/fuzzing/smoke/tests.py @@ -0,0 +1,34 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +import os +from contextlib import contextmanager + +import pytest +import smoke + +JS = os.path.join(os.path.dirname(__file__), "js.py") + + +@contextmanager +def fake_js(): + os.environ["JSSHELL"] = JS + try: + yield + finally: + del os.environ["JSSHELL"] + + +def test_run_no_jsshell(): + with pytest.raises(FileNotFoundError): + smoke.run_jsshell("--fuzzing-safe -e 'print(\"PASSED\")'") + + +def test_run_jsshell_set(): + with fake_js(): + smoke.run_jsshell("--fuzzing-safe -e 'print(\"PASSED\")'") + + +def test_smoke_test(): + with fake_js(): + smoke.smoke_test() |