diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-15 20:01:36 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-15 20:01:36 +0000 |
commit | 62e4c68907d8d33709c2c1f92a161dff00b3d5f2 (patch) | |
tree | adbbaf3acf88ea08f6eeec4b75ee98ad3b07fbdc /src/fts_fuzzy_match.cc | |
parent | Initial commit. (diff) | |
download | lnav-62e4c68907d8d33709c2c1f92a161dff00b3d5f2.tar.xz lnav-62e4c68907d8d33709c2c1f92a161dff00b3d5f2.zip |
Adding upstream version 0.11.2.upstream/0.11.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r-- | src/fts_fuzzy_match.cc | 227 |
1 files changed, 227 insertions, 0 deletions
diff --git a/src/fts_fuzzy_match.cc b/src/fts_fuzzy_match.cc new file mode 100644 index 0000000..8ae7098 --- /dev/null +++ b/src/fts_fuzzy_match.cc @@ -0,0 +1,227 @@ +// LICENSE +// +// This software is dual-licensed to the public domain and under the following +// license: you are granted a perpetual, irrevocable license to copy, modify, +// publish, and distribute this file as you see fit. + +#include <cstring> // memcpy + +#include "fts_fuzzy_match.hh" + +#include <ctype.h> // ::tolower, ::toupper + +#include "config.h" + +namespace fts { + +// Forward declarations for "private" implementation +namespace fuzzy_internal { +static bool fuzzy_match_recursive(const char* pattern, + const char* str, + int& outScore, + const char* strBegin, + uint8_t const* srcMatches, + uint8_t* newMatches, + int maxMatches, + int nextMatch, + int& recursionCount, + int recursionLimit); +} + +// Public interface +bool +fuzzy_match_simple(char const* pattern, char const* str) +{ + while (*pattern != '\0' && *str != '\0') { + if (tolower(*pattern) == tolower(*str)) + ++pattern; + ++str; + } + + return *pattern == '\0' ? true : false; +} + +bool +fuzzy_match(char const* pattern, char const* str, int& outScore) +{ + uint8_t matches[256]; + return fuzzy_match(pattern, str, outScore, matches, sizeof(matches)); +} + +bool +fuzzy_match(char const* pattern, + char const* str, + int& outScore, + uint8_t* matches, + int maxMatches) +{ + int recursionCount = 0; + int recursionLimit = 10; + + return fuzzy_internal::fuzzy_match_recursive(pattern, + str, + outScore, + str, + nullptr, + matches, + maxMatches, + 0, + recursionCount, + recursionLimit); +} + +// Private implementation +static bool +fuzzy_internal::fuzzy_match_recursive(const char* pattern, + const char* str, + int& outScore, + const char* strBegin, + uint8_t const* srcMatches, + uint8_t* matches, + int maxMatches, + int nextMatch, + int& recursionCount, + int recursionLimit) +{ + // Count recursions + ++recursionCount; + if (recursionCount >= recursionLimit) + return false; + + // Detect end of strings + if (*pattern == '\0' || *str == '\0') + return false; + + // Recursion params + bool recursiveMatch = false; + uint8_t bestRecursiveMatches[256]; + int bestRecursiveScore = 0; + + // Loop through pattern and str looking for a match + bool first_match = true; + while (*pattern != '\0' && *str != '\0') { + // Found match + if (tolower(*pattern) == tolower(*str)) { + // Supplied matches buffer was too short + if (nextMatch >= maxMatches) + return false; + + // "Copy-on-Write" srcMatches into matches + if (first_match && srcMatches) { + memcpy(matches, srcMatches, nextMatch); + first_match = false; + } + + // Recursive call that "skips" this match + uint8_t recursiveMatches[256]; + int recursiveScore; + if (fuzzy_match_recursive(pattern, + str + 1, + recursiveScore, + strBegin, + matches, + recursiveMatches, + sizeof(recursiveMatches), + nextMatch, + recursionCount, + recursionLimit)) + { + // Pick best recursive score + if (!recursiveMatch || recursiveScore > bestRecursiveScore) { + memcpy(bestRecursiveMatches, recursiveMatches, 256); + bestRecursiveScore = recursiveScore; + } + recursiveMatch = true; + } + + // Advance + matches[nextMatch++] = (uint8_t) (str - strBegin); + ++pattern; + } + ++str; + } + + // Determine if full pattern was matched + bool matched = *pattern == '\0' ? true : false; + + // Calculate score + if (matched) { + const int sequential_bonus = 15; // bonus for adjacent matches + const int separator_bonus + = 30; // bonus if match occurs after a separator + const int camel_bonus + = 30; // bonus if match is uppercase and prev is lower + const int first_letter_bonus + = 15; // bonus if the first letter is matched + + const int leading_letter_penalty + = -5; // penalty applied for every letter in str before the first + // match + const int max_leading_letter_penalty + = -15; // maximum penalty for leading letters + const int unmatched_letter_penalty + = -1; // penalty for every letter that doesn't matter + + // Iterate str to end + while (*str != '\0') + ++str; + + // Initialize score + outScore = 100; + + // Apply leading letter penalty + int penalty = leading_letter_penalty * matches[0]; + if (penalty < max_leading_letter_penalty) + penalty = max_leading_letter_penalty; + outScore += penalty; + + // Apply unmatched penalty + int unmatched = (int) (str - strBegin) - nextMatch; + outScore += unmatched_letter_penalty * unmatched; + + // Apply ordering bonuses + for (int i = 0; i < nextMatch; ++i) { + uint8_t currIdx = matches[i]; + + if (i > 0) { + uint8_t prevIdx = matches[i - 1]; + + // Sequential + if (currIdx == (prevIdx + 1)) + outScore += sequential_bonus; + } + + // Check for bonuses based on neighbor character value + if (currIdx > 0) { + // Camel case + char neighbor = strBegin[currIdx - 1]; + char curr = strBegin[currIdx]; + if (::islower(neighbor) && ::isupper(curr)) + outScore += camel_bonus; + + // Separator + bool neighborSeparator = neighbor == '_' || neighbor == ' '; + if (neighborSeparator) + outScore += separator_bonus; + } else { + // First letter + outScore += first_letter_bonus; + } + } + } + + // Return best result + if (recursiveMatch && (!matched || bestRecursiveScore > outScore)) { + // Recursive score is better than "this" + memcpy(matches, bestRecursiveMatches, maxMatches); + outScore = bestRecursiveScore; + return true; + } else if (matched) { + // "this" score is better than recursive + return true; + } else { + // no match + return false; + } +} +} // namespace fts |