1 files changed, 87 insertions, 0 deletions
diff --git a/tools/fuzzing/libfuzzer/FuzzerMerge.h b/tools/fuzzing/libfuzzer/FuzzerMerge.h
new file mode 100644
index 0000000000..6dc1c4c45a
--- /dev/null
+++ b/tools/fuzzing/libfuzzer/FuzzerMerge.h
@@ -0,0 +1,87 @@
+//===- FuzzerMerge.h - merging corpa ----------------------------*- C++ -* ===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// Merging Corpora.
+//
+// The task:
+//   Take the existing corpus (possibly empty) and merge new inputs into
+//   it so that only inputs with new coverage ('features') are added.
+//   The process should tolerate the crashes, OOMs, leaks, etc.
+//
+// Algorithm:
+//   The outer process collects the set of files and writes their names
+//   into a temporary "control" file, then repeatedly launches the inner
+//   process until all inputs are processed.
+//   The outer process does not actually execute the target code.
+//
+//   The inner process reads the control file and sees a) list of all the inputs
+//   and b) the last processed input. Then it starts processing the inputs one
+//   by one. Before processing every input it writes one line to control file:
+//   STARTED INPUT_ID INPUT_SIZE
+//   After processing an input it writes the following lines:
+//   FT INPUT_ID Feature1 Feature2 Feature3 ...
+//   COV INPUT_ID Coverage1 Coverage2 Coverage3 ...
+//   If a crash happens while processing an input the last line in the control
+//   file will be "STARTED INPUT_ID" and so the next process will know
+//   where to resume.
+//
+//   Once all inputs are processed by the inner process(es) the outer process
+//   reads the control files and does the merge based entirely on the contents
+//   of control file.
+//   It uses a single pass greedy algorithm choosing first the smallest inputs
+//   within the same size the inputs that have more new features.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_FUZZER_MERGE_H
+#define LLVM_FUZZER_MERGE_H
+
+#include "FuzzerDefs.h"
+
+#include <istream>
+#include <ostream>
+#include <set>
+#include <vector>
+
+namespace fuzzer {
+
+struct MergeFileInfo {
+  std::string Name;
+  size_t Size = 0;
+  Vector<uint32_t> Features, Cov;
+};
+
+struct Merger {
+  Vector<MergeFileInfo> Files;
+  size_t NumFilesInFirstCorpus = 0;
+  size_t FirstNotProcessedFile = 0;
+  std::string LastFailure;
+
+  bool Parse(std::istream &IS, bool ParseCoverage);
+  bool Parse(const std::string &Str, bool ParseCoverage);
+  int ParseOrExit(std::istream &IS, bool ParseCoverage);
+  size_t Merge(const Set<uint32_t> &InitialFeatures, Set<uint32_t> *NewFeatures,
+               const Set<uint32_t> &InitialCov, Set<uint32_t> *NewCov,
+               Vector<std::string> *NewFiles);
+  size_t ApproximateMemoryConsumption() const;
+  Set<uint32_t> AllFeatures() const;
+};
+
+int CrashResistantMerge(const Vector<std::string> &Args,
+                         const Vector<SizedFile> &OldCorpus,
+                         const Vector<SizedFile> &NewCorpus,
+                         Vector<std::string> *NewFiles,
+                         const Set<uint32_t> &InitialFeatures,
+                         Set<uint32_t> *NewFeatures,
+                         const Set<uint32_t> &InitialCov,
+                         Set<uint32_t> *NewCov,
+                         const std::string &CFPath,
+                         bool Verbose);
+
+}  // namespace fuzzer
+
+#endif  // LLVM_FUZZER_MERGE_H