summaryrefslogtreecommitdiffstats
path: root/sql/sql_acl_getsort.ic
blob: df55c7c5f1e568d3b73e50acdc7c5e316af0b419 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
/* Copyright (c) 2019, MariaDB Corporation.

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; version 2 of the License.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1335  USA */

#ifndef NO_EMBEDDED_ACCESS_CHECKS
/*
  Returns a number which, if sorted in descending order, magically puts
  patterns in the order from most specific (e.g. no wildcards) to most generic
  (e.g. "%"). That is, the larger the number, the more specific the pattern is.

  Takes a template that lists types of following patterns (by the first letter
  of _h_ostname, _d_bname, _u_sername) and up to four patterns.
  No more than two can be of 'h' or 'd' type (because one magic value takes 26
  bits, see below).

  ========================================================================

  Here's how the magic is created:

  Let's look at one iteration of the for() loop. That's one pattern.  With
  wildcards (usernames aren't interesting).

  By definition a pattern A is "more specific" than pattern B if the set of
  strings that match the pattern A is smaller than the set of strings that
  match the pattern B. Strings are taken from the big superset of all valid
  utf8 strings up to the maxlen.

  Strings are matched character by character. For every non-wildcard
  character there can be only one matching character in the matched string.

  For a wild_one character ('_') any valid utf8 character will do. Below
  numchars would mean a total number of vaid utf8 characters. It's a huge
  number. A number of matching strings for wild_one will be numchars.

  For a wild_many character ('%') any number of valid utf8 characters will do.
  How many string will match it depends on the amount of non-wild_many
  characters.  Say, if a number of non-wildcard characters is N, and a number
  of wild_one characters is M, and the number of wild_many characters is K,
  then for K=1 its wild_many character will match any number of valid utf8
  characters from 0 to L=maxlen-N-M. The number of matching strings will be

     1 + numchars + numchars^2 + numchars^3 + ... + numchars^L

  Intermediate result: if M=K=0, the pattern will match only one string,
  if M>0, K=0, the pattern will match numchars^M strings, if K=1, the
  pattern will match

     numchars^M + 1 + numchars + numchars^2 + ... + numchars^L

  For a more visual notation, let's write these huge numbers not as
  decimal or binary, but base numchars. Then the last number will be
  a sum of two numbers: the first is one followed by M zeros, the second
  constists of L+1 ones:

    1000{...M...}000 + 111{...L+1...}1111

  This could produce any of the following

    111...112111...1111       if L > M, K = 1
    100...001111...1111       if M > L, K = 1
    2111111...111111111       if M = L, K = 1
    1111111...111111111       if M = 0, K = 1
    1000000...000000000       if K = 0, M > 0

  There are two complications caused by multiple wild_many characters.
  For, say, two wild_many characters, either can accept any number of utf8
  characters, as long the the total amount of them is less then or equal to L.
  Same logic applies to any number of non-consequent wild_many characters
  (consequent wild_many characters count as one). This gives the number of
  matching strings of

    1 + F(K,1)*numchars + F(K,2)*numchars^2 + ... + F(K,L)*numchars^L

  where F(K,R) is the "number of ways one can put R balls into K boxes",
  that is C^{K-1}_{R+K-1}.

  In the "base numchars" notation, it means that besides 0, 1, and 2,
  an R-th digit can be F(K,R). For the purpose of comparison, we only need
  to know the most significant digit, F(K, L).
  While it can be huge, we don't need the exact value, it's a
  a monotonously increasing function of K, so if K1>K2, F(K1,L) > F(K2,L)
  and we can simply compare values of K instead of complex F(K,L).

  The second complication: F(K,R) gives only an upper boundary, the
  actual number of matched strings can be smaller.
  Example: pattern "a%b%c" can match "abbc" as a(b)b()c, and as a()b(b)c.
  F(2,1) = 2, but it's only one string "abbc".
  We'll ignore it here under assumption that it almost never happens
  in practice and this simplification won't noticeably disrupt the ordering.

  The last detail: old get_sort function sorted by the non-wildcard prefix
  length, so in "abc_" and "a_bc" the former one was sorted first. Strictly
  speaking they're both equally specific, but to preserve the backward
  compatible sorting we'll use the P "prefix length or 0 if no wildcards"
  to break ties.

  Now, let's compare two long numbers. Numbers are easy to compare,
  the longer number is larger. If they both have the same lengths,
  the one with the larger first digit is larger, and so on.

  But there is no need to actually calculate these numbers.
  Three numbers L, K, M (and P to break ties) are enough to describe a pattern
  for a purpose of comparison. L/K/M triplets can be compared like this:

  * case 1: if for both patterns L>M: compare L, K, M, in that order
    because:
      - if L1 > L2, the first number is longer
      - If L1 == L2, then the first digit is a monotonously increasing function
        of K, so the first digit is larger when K is larger
      - if K1 == K2, then all other digits in these numbers would be the
        same too, with the exception of one digit in the middle that
        got +1 because of +1000{...M...}000. So, whatever number has a
        larger M will get this +1 first.
  * case 2: if for both patterns L<M: compare M, L, K, in that order
  * case 3: if for both patterns L=M: compare L (or M), K
  * case 4: if one L1>M1, other L2=M2: compare L, K, M
  * case 5: if one L1<M1, other L2=M2: compare M, L, K
  * case 6: if one pattern L1>M1, the other M2>L2: first is more generic
     unless (case 6a) K1=K2=1,M1=0,M2=L2+1 (in that case - equal)

  note that in case 3 one can use a rule from the case either 1 or 2,
  in the case 4 one can use the rule from the case 1,
  in the case 5 one can use the rule from the case 2.

  for the case 6 and ignoring the special case 6a, to compare patterns by a
  magic number as a function z(a,b,c), we must ensure that z(L1,K1,M1) is
  greater than z(M2,L2,K2) when L1=M2. This can be done by an extra bit,
  which is 1 for K and 0 for L. Thus, the magic number could be

  case 1: (((L*2 + 1)*(maxlen+1) + K)*(maxlen+1) + M)*(maxlen+1) + P
  case 2: ((M*2*(maxlen+1) + L)*(maxlen+1) + K)*(maxlen+1) + P

  upper bound: L<=maxlen, M<=maxlen, K<=maxlen/2, P<maxlen
  for a current maxlen=64, the magic number needs 26 bits.
*/

static ulonglong get_magic_sort(const char *templ, ...)
{
  ulonglong sort=0;
  va_list args;
  va_start(args, templ);

  IF_DBUG(uint bits_used= 0,);

  for (; *templ; templ++)
  {
    char *pat= va_arg(args, char*);

    if (*templ == 'u')
    {
      /* Username. Can be empty (= anybody) or a literal. Encoded in one bit */
      sort= (sort << 1) + !*pat;
      IF_DBUG(bits_used++,);
      continue;
    }

    /* A wildcard pattern.  Encoded in 26 bits.  */
    uint maxlen= *templ == 'd' ? max_dbname_length : max_hostname_length;
    DBUG_ASSERT(maxlen <= 64);
    DBUG_ASSERT(*templ == 'd' || *templ == 'h');

    uint N= 0, M= 0, K= 0, P= 0;
    for (uint i=0; pat[i]; i++)
    {
      if (pat[i] == wild_many)
      {
        if (!K && !M) P= N;
        K++;
        while (pat[i+1] == wild_many) i++;
        continue;
      }
      if (pat[i] == wild_one)
      {
        if (!K && !M) P= N;
        M++;
        continue;
      }
      if (pat[i] == wild_prefix && pat[i+1]) i++;
      N++;
    }
    uint L= K ? maxlen - N - M : 0, d= maxlen + 1, magic;
    if (L > M)
      magic= (((L * 2 + 1) * d + K) * d + M) * d + P;
    else
      magic= (((M * 2 + 0) * d + L) * d + K) * d + P;
    DBUG_ASSERT(magic < 1<<26);
    sort= (sort << 26) + magic;
    IF_DBUG(bits_used+= 26,);
  }
  DBUG_ASSERT(bits_used < 8*sizeof(sort));
  va_end(args);
  return ~sort;
}
#endif