summaryrefslogtreecommitdiffstats
path: root/sql/sql_acl_getsort.ic
diff options
context:
space:
mode:
Diffstat (limited to 'sql/sql_acl_getsort.ic')
-rw-r--r--sql/sql_acl_getsort.ic205
1 files changed, 205 insertions, 0 deletions
diff --git a/sql/sql_acl_getsort.ic b/sql/sql_acl_getsort.ic
new file mode 100644
index 00000000..df55c7c5
--- /dev/null
+++ b/sql/sql_acl_getsort.ic
@@ -0,0 +1,205 @@
+/* Copyright (c) 2019, MariaDB Corporation.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */
+
+#ifndef NO_EMBEDDED_ACCESS_CHECKS
+/*
+ Returns a number which, if sorted in descending order, magically puts
+ patterns in the order from most specific (e.g. no wildcards) to most generic
+ (e.g. "%"). That is, the larger the number, the more specific the pattern is.
+
+ Takes a template that lists types of following patterns (by the first letter
+ of _h_ostname, _d_bname, _u_sername) and up to four patterns.
+ No more than two can be of 'h' or 'd' type (because one magic value takes 26
+ bits, see below).
+
+ ========================================================================
+
+ Here's how the magic is created:
+
+ Let's look at one iteration of the for() loop. That's one pattern. With
+ wildcards (usernames aren't interesting).
+
+ By definition a pattern A is "more specific" than pattern B if the set of
+ strings that match the pattern A is smaller than the set of strings that
+ match the pattern B. Strings are taken from the big superset of all valid
+ utf8 strings up to the maxlen.
+
+ Strings are matched character by character. For every non-wildcard
+ character there can be only one matching character in the matched string.
+
+ For a wild_one character ('_') any valid utf8 character will do. Below
+ numchars would mean a total number of vaid utf8 characters. It's a huge
+ number. A number of matching strings for wild_one will be numchars.
+
+ For a wild_many character ('%') any number of valid utf8 characters will do.
+ How many string will match it depends on the amount of non-wild_many
+ characters. Say, if a number of non-wildcard characters is N, and a number
+ of wild_one characters is M, and the number of wild_many characters is K,
+ then for K=1 its wild_many character will match any number of valid utf8
+ characters from 0 to L=maxlen-N-M. The number of matching strings will be
+
+ 1 + numchars + numchars^2 + numchars^3 + ... + numchars^L
+
+ Intermediate result: if M=K=0, the pattern will match only one string,
+ if M>0, K=0, the pattern will match numchars^M strings, if K=1, the
+ pattern will match
+
+ numchars^M + 1 + numchars + numchars^2 + ... + numchars^L
+
+ For a more visual notation, let's write these huge numbers not as
+ decimal or binary, but base numchars. Then the last number will be
+ a sum of two numbers: the first is one followed by M zeros, the second
+ constists of L+1 ones:
+
+ 1000{...M...}000 + 111{...L+1...}1111
+
+ This could produce any of the following
+
+ 111...112111...1111 if L > M, K = 1
+ 100...001111...1111 if M > L, K = 1
+ 2111111...111111111 if M = L, K = 1
+ 1111111...111111111 if M = 0, K = 1
+ 1000000...000000000 if K = 0, M > 0
+
+ There are two complications caused by multiple wild_many characters.
+ For, say, two wild_many characters, either can accept any number of utf8
+ characters, as long the the total amount of them is less then or equal to L.
+ Same logic applies to any number of non-consequent wild_many characters
+ (consequent wild_many characters count as one). This gives the number of
+ matching strings of
+
+ 1 + F(K,1)*numchars + F(K,2)*numchars^2 + ... + F(K,L)*numchars^L
+
+ where F(K,R) is the "number of ways one can put R balls into K boxes",
+ that is C^{K-1}_{R+K-1}.
+
+ In the "base numchars" notation, it means that besides 0, 1, and 2,
+ an R-th digit can be F(K,R). For the purpose of comparison, we only need
+ to know the most significant digit, F(K, L).
+ While it can be huge, we don't need the exact value, it's a
+ a monotonously increasing function of K, so if K1>K2, F(K1,L) > F(K2,L)
+ and we can simply compare values of K instead of complex F(K,L).
+
+ The second complication: F(K,R) gives only an upper boundary, the
+ actual number of matched strings can be smaller.
+ Example: pattern "a%b%c" can match "abbc" as a(b)b()c, and as a()b(b)c.
+ F(2,1) = 2, but it's only one string "abbc".
+ We'll ignore it here under assumption that it almost never happens
+ in practice and this simplification won't noticeably disrupt the ordering.
+
+ The last detail: old get_sort function sorted by the non-wildcard prefix
+ length, so in "abc_" and "a_bc" the former one was sorted first. Strictly
+ speaking they're both equally specific, but to preserve the backward
+ compatible sorting we'll use the P "prefix length or 0 if no wildcards"
+ to break ties.
+
+ Now, let's compare two long numbers. Numbers are easy to compare,
+ the longer number is larger. If they both have the same lengths,
+ the one with the larger first digit is larger, and so on.
+
+ But there is no need to actually calculate these numbers.
+ Three numbers L, K, M (and P to break ties) are enough to describe a pattern
+ for a purpose of comparison. L/K/M triplets can be compared like this:
+
+ * case 1: if for both patterns L>M: compare L, K, M, in that order
+ because:
+ - if L1 > L2, the first number is longer
+ - If L1 == L2, then the first digit is a monotonously increasing function
+ of K, so the first digit is larger when K is larger
+ - if K1 == K2, then all other digits in these numbers would be the
+ same too, with the exception of one digit in the middle that
+ got +1 because of +1000{...M...}000. So, whatever number has a
+ larger M will get this +1 first.
+ * case 2: if for both patterns L<M: compare M, L, K, in that order
+ * case 3: if for both patterns L=M: compare L (or M), K
+ * case 4: if one L1>M1, other L2=M2: compare L, K, M
+ * case 5: if one L1<M1, other L2=M2: compare M, L, K
+ * case 6: if one pattern L1>M1, the other M2>L2: first is more generic
+ unless (case 6a) K1=K2=1,M1=0,M2=L2+1 (in that case - equal)
+
+ note that in case 3 one can use a rule from the case either 1 or 2,
+ in the case 4 one can use the rule from the case 1,
+ in the case 5 one can use the rule from the case 2.
+
+ for the case 6 and ignoring the special case 6a, to compare patterns by a
+ magic number as a function z(a,b,c), we must ensure that z(L1,K1,M1) is
+ greater than z(M2,L2,K2) when L1=M2. This can be done by an extra bit,
+ which is 1 for K and 0 for L. Thus, the magic number could be
+
+ case 1: (((L*2 + 1)*(maxlen+1) + K)*(maxlen+1) + M)*(maxlen+1) + P
+ case 2: ((M*2*(maxlen+1) + L)*(maxlen+1) + K)*(maxlen+1) + P
+
+ upper bound: L<=maxlen, M<=maxlen, K<=maxlen/2, P<maxlen
+ for a current maxlen=64, the magic number needs 26 bits.
+*/
+
+static ulonglong get_magic_sort(const char *templ, ...)
+{
+ ulonglong sort=0;
+ va_list args;
+ va_start(args, templ);
+
+ IF_DBUG(uint bits_used= 0,);
+
+ for (; *templ; templ++)
+ {
+ char *pat= va_arg(args, char*);
+
+ if (*templ == 'u')
+ {
+ /* Username. Can be empty (= anybody) or a literal. Encoded in one bit */
+ sort= (sort << 1) + !*pat;
+ IF_DBUG(bits_used++,);
+ continue;
+ }
+
+ /* A wildcard pattern. Encoded in 26 bits. */
+ uint maxlen= *templ == 'd' ? max_dbname_length : max_hostname_length;
+ DBUG_ASSERT(maxlen <= 64);
+ DBUG_ASSERT(*templ == 'd' || *templ == 'h');
+
+ uint N= 0, M= 0, K= 0, P= 0;
+ for (uint i=0; pat[i]; i++)
+ {
+ if (pat[i] == wild_many)
+ {
+ if (!K && !M) P= N;
+ K++;
+ while (pat[i+1] == wild_many) i++;
+ continue;
+ }
+ if (pat[i] == wild_one)
+ {
+ if (!K && !M) P= N;
+ M++;
+ continue;
+ }
+ if (pat[i] == wild_prefix && pat[i+1]) i++;
+ N++;
+ }
+ uint L= K ? maxlen - N - M : 0, d= maxlen + 1, magic;
+ if (L > M)
+ magic= (((L * 2 + 1) * d + K) * d + M) * d + P;
+ else
+ magic= (((M * 2 + 0) * d + L) * d + K) * d + P;
+ DBUG_ASSERT(magic < 1<<26);
+ sort= (sort << 26) + magic;
+ IF_DBUG(bits_used+= 26,);
+ }
+ DBUG_ASSERT(bits_used < 8*sizeof(sort));
+ va_end(args);
+ return ~sort;
+}
+#endif