diff options
Diffstat (limited to 'servers/slapd/back-wt/idl.c')
-rw-r--r-- | servers/slapd/back-wt/idl.c | 789 |
1 files changed, 789 insertions, 0 deletions
diff --git a/servers/slapd/back-wt/idl.c b/servers/slapd/back-wt/idl.c new file mode 100644 index 0000000..0f3167d --- /dev/null +++ b/servers/slapd/back-wt/idl.c @@ -0,0 +1,789 @@ +/* OpenLDAP WiredTiger backend */ +/* $OpenLDAP$ */ +/* This work is part of OpenLDAP Software <http://www.openldap.org/>. + * + * Copyright 2002-2022 The OpenLDAP Foundation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * <http://www.OpenLDAP.org/license.html>. + */ +/* ACKNOWLEDGEMENTS: + * This work was developed by HAMANO Tsukasa <hamano@osstech.co.jp> + * based on back-bdb for inclusion in OpenLDAP Software. + * WiredTiger is a product of MongoDB Inc. + */ + +#include "portable.h" + +#include <stdio.h> +#include <ac/string.h> + +#include "back-wt.h" +#include "idl.h" + +#define IDL_MAX(x,y) ( (x) > (y) ? (x) : (y) ) +#define IDL_MIN(x,y) ( (x) < (y) ? (x) : (y) ) +#define IDL_CMP(x,y) ( (x) < (y) ? -1 : (x) > (y) ) + +void wt_idl_check( ID *ids ) +{ + if( WT_IDL_IS_RANGE( ids ) ) { + assert( WT_IDL_RANGE_FIRST(ids) <= WT_IDL_RANGE_LAST(ids) ); + } else { + ID i; + for( i=1; i < ids[0]; i++ ) { + assert( ids[i+1] > ids[i] ); + } + } +} + +void wt_idl_dump( ID *ids ) +{ + if( WT_IDL_IS_RANGE( ids ) ) { + Debug( LDAP_DEBUG_ANY, + "IDL: range ( %ld - %ld )\n", + (long) WT_IDL_RANGE_FIRST( ids ), + (long) WT_IDL_RANGE_LAST( ids ) ); + + } else { + ID i; + Debug( LDAP_DEBUG_ANY, "IDL: size %ld", (long) ids[0] ); + + for( i=1; i<=ids[0]; i++ ) { + if( i % 16 == 1 ) { + Debug( LDAP_DEBUG_ANY, "\n" ); + } + Debug( LDAP_DEBUG_ANY, " %02lx", (long) ids[i] ); + } + + Debug( LDAP_DEBUG_ANY, "\n" ); + } + + wt_idl_check( ids ); +} + +unsigned wt_idl_search( ID *ids, ID id ) +{ +#define IDL_BINARY_SEARCH 1 +#ifdef IDL_BINARY_SEARCH + /* + * binary search of id in ids + * if found, returns position of id + * if not found, returns first position greater than id + */ + unsigned base = 0; + unsigned cursor = 1; + int val = 0; + unsigned n = ids[0]; + +#if IDL_DEBUG > 0 + idl_check( ids ); +#endif + + while( 0 < n ) { + unsigned pivot = n >> 1; + cursor = base + pivot + 1; + val = IDL_CMP( id, ids[cursor] ); + + if( val < 0 ) { + n = pivot; + + } else if ( val > 0 ) { + base = cursor; + n -= pivot + 1; + + } else { + return cursor; + } + } + + if( val > 0 ) { + ++cursor; + } + return cursor; + +#else + /* (reverse) linear search */ + int i; + +#if IDL_DEBUG > 0 + idl_check( ids ); +#endif + + for( i=ids[0]; i; i-- ) { + if( id > ids[i] ) { + break; + } + } + + return i+1; +#endif +} + +int wt_idl_insert( ID *ids, ID id ) +{ + unsigned x; + +#if IDL_DEBUG > 1 + Debug( LDAP_DEBUG_ANY, "insert: %04lx at %d\n", (long) id, x ); + idl_dump( ids ); +#elif IDL_DEBUG > 0 + wt_idl_check( ids ); +#endif + + if (WT_IDL_IS_RANGE( ids )) { + /* if already in range, treat as a dup */ + if (id >= WT_IDL_RANGE_FIRST(ids) && id <= WT_IDL_RANGE_LAST(ids)) + return -1; + if (id < WT_IDL_RANGE_FIRST(ids)) + ids[1] = id; + else if (id > WT_IDL_RANGE_LAST(ids)) + ids[2] = id; + return 0; + } + + x = wt_idl_search( ids, id ); + assert( x > 0 ); + + if( x < 1 ) { + /* internal error */ + return -2; + } + + if ( x <= ids[0] && ids[x] == id ) { + /* duplicate */ + return -1; + } + + if ( ++ids[0] >= WT_IDL_DB_MAX ) { + if( id < ids[1] ) { + ids[1] = id; + ids[2] = ids[ids[0]-1]; + } else if ( ids[ids[0]-1] < id ) { + ids[2] = id; + } else { + ids[2] = ids[ids[0]-1]; + } + ids[0] = NOID; + + } else { + /* insert id */ + AC_MEMCPY( &ids[x+1], &ids[x], (ids[0]-x) * sizeof(ID) ); + ids[x] = id; + } + +#if IDL_DEBUG > 1 + wt_idl_dump( ids ); +#elif IDL_DEBUG > 0 + wt_idl_check( ids ); +#endif + + return 0; +} + +static int wt_idl_delete( ID *ids, ID id ) +{ + unsigned x; + +#if IDL_DEBUG > 1 + Debug( LDAP_DEBUG_ANY, "delete: %04lx at %d\n", (long) id, x ); + idl_dump( ids ); +#elif IDL_DEBUG > 0 + wt_idl_check( ids ); +#endif + + if (WT_IDL_IS_RANGE( ids )) { + /* If deleting a range boundary, adjust */ + if ( ids[1] == id ) + ids[1]++; + else if ( ids[2] == id ) + ids[2]--; + /* deleting from inside a range is a no-op */ + + /* If the range has collapsed, re-adjust */ + if ( ids[1] > ids[2] ) + ids[0] = 0; + else if ( ids[1] == ids[2] ) + ids[1] = 1; + return 0; + } + + x = wt_idl_search( ids, id ); + assert( x > 0 ); + + if( x <= 0 ) { + /* internal error */ + return -2; + } + + if( x > ids[0] || ids[x] != id ) { + /* not found */ + return -1; + + } else if ( --ids[0] == 0 ) { + if( x != 1 ) { + return -3; + } + + } else { + AC_MEMCPY( &ids[x], &ids[x+1], (1+ids[0]-x) * sizeof(ID) ); + } + +#if IDL_DEBUG > 1 + wt_idl_dump( ids ); +#elif IDL_DEBUG > 0 + wt_idl_check( ids ); +#endif + + return 0; +} + +static char * +wt_show_key( + char *buf, + void *val, + size_t len ) +{ + if ( len == 4 /* LUTIL_HASH_BYTES */ ) { + unsigned char *c = val; + sprintf( buf, "[%02x%02x%02x%02x]", c[0], c[1], c[2], c[3] ); + return buf; + } else { + return val; + } +} + +/* + * idl_intersection - return a = a intersection b + */ +int +wt_idl_intersection( + ID *a, + ID *b ) +{ + ID ida, idb; + ID idmax, idmin; + ID cursora = 0, cursorb = 0, cursorc; + int swap = 0; + + if ( WT_IDL_IS_ZERO( a ) || WT_IDL_IS_ZERO( b ) ) { + a[0] = 0; + return 0; + } + + idmin = IDL_MAX( WT_IDL_FIRST(a), WT_IDL_FIRST(b) ); + idmax = IDL_MIN( WT_IDL_LAST(a), WT_IDL_LAST(b) ); + if ( idmin > idmax ) { + a[0] = 0; + return 0; + } else if ( idmin == idmax ) { + a[0] = 1; + a[1] = idmin; + return 0; + } + + if ( WT_IDL_IS_RANGE( a ) ) { + if ( WT_IDL_IS_RANGE(b) ) { + /* If both are ranges, just shrink the boundaries */ + a[1] = idmin; + a[2] = idmax; + return 0; + } else { + /* Else swap so that b is the range, a is a list */ + ID *tmp = a; + a = b; + b = tmp; + swap = 1; + } + } + + /* If a range completely covers the list, the result is + * just the list. If idmin to idmax is contiguous, just + * turn it into a range. + */ + if ( WT_IDL_IS_RANGE( b ) + && WT_IDL_RANGE_FIRST( b ) <= WT_IDL_FIRST( a ) + && WT_IDL_RANGE_LAST( b ) >= WT_IDL_LLAST( a ) ) { + if (idmax - idmin + 1 == a[0]) + { + a[0] = NOID; + a[1] = idmin; + a[2] = idmax; + } + goto done; + } + + /* Fine, do the intersection one element at a time. + * First advance to idmin in both IDLs. + */ + cursora = cursorb = idmin; + ida = wt_idl_first( a, &cursora ); + idb = wt_idl_first( b, &cursorb ); + cursorc = 0; + + while( ida <= idmax || idb <= idmax ) { + if( ida == idb ) { + a[++cursorc] = ida; + ida = wt_idl_next( a, &cursora ); + idb = wt_idl_next( b, &cursorb ); + } else if ( ida < idb ) { + ida = wt_idl_next( a, &cursora ); + } else { + idb = wt_idl_next( b, &cursorb ); + } + } + a[0] = cursorc; +done: + if (swap) + WT_IDL_CPY( b, a ); + + return 0; +} + + +/* + * idl_union - return a = a union b + */ +int +wt_idl_union( + ID *a, + ID *b ) +{ + ID ida, idb; + ID cursora = 0, cursorb = 0, cursorc; + + if ( WT_IDL_IS_ZERO( b ) ) { + return 0; + } + + if ( WT_IDL_IS_ZERO( a ) ) { + WT_IDL_CPY( a, b ); + return 0; + } + + if ( WT_IDL_IS_RANGE( a ) || WT_IDL_IS_RANGE(b) ) { +over: ida = IDL_MIN( WT_IDL_FIRST(a), WT_IDL_FIRST(b) ); + idb = IDL_MAX( WT_IDL_LAST(a), WT_IDL_LAST(b) ); + a[0] = NOID; + a[1] = ida; + a[2] = idb; + return 0; + } + + ida = wt_idl_first( a, &cursora ); + idb = wt_idl_first( b, &cursorb ); + + cursorc = b[0]; + + /* The distinct elements of a are cat'd to b */ + while( ida != NOID || idb != NOID ) { + if ( ida < idb ) { + if( ++cursorc > WT_IDL_UM_MAX ) { + goto over; + } + b[cursorc] = ida; + ida = wt_idl_next( a, &cursora ); + + } else { + if ( ida == idb ) + ida = wt_idl_next( a, &cursora ); + idb = wt_idl_next( b, &cursorb ); + } + } + + /* b is copied back to a in sorted order */ + a[0] = cursorc; + cursora = 1; + cursorb = 1; + cursorc = b[0]+1; + while (cursorb <= b[0] || cursorc <= a[0]) { + if (cursorc > a[0]) + idb = NOID; + else + idb = b[cursorc]; + if (cursorb <= b[0] && b[cursorb] < idb) + a[cursora++] = b[cursorb++]; + else { + a[cursora++] = idb; + cursorc++; + } + } + + return 0; +} + + +#if 0 +/* + * wt_idl_notin - return a intersection ~b (or a minus b) + */ +int +wt_idl_notin( + ID *a, + ID *b, + ID *ids ) +{ + ID ida, idb; + ID cursora = 0, cursorb = 0; + + if( WT_IDL_IS_ZERO( a ) || + WT_IDL_IS_ZERO( b ) || + WT_IDL_IS_RANGE( b ) ) + { + WT_IDL_CPY( ids, a ); + return 0; + } + + if( WT_IDL_IS_RANGE( a ) ) { + WT_IDL_CPY( ids, a ); + return 0; + } + + ida = wt_idl_first( a, &cursora ), + idb = wt_idl_first( b, &cursorb ); + + ids[0] = 0; + + while( ida != NOID ) { + if ( idb == NOID ) { + /* we could shortcut this */ + ids[++ids[0]] = ida; + ida = wt_idl_next( a, &cursora ); + + } else if ( ida < idb ) { + ids[++ids[0]] = ida; + ida = wt_idl_next( a, &cursora ); + + } else if ( ida > idb ) { + idb = wt_idl_next( b, &cursorb ); + + } else { + ida = wt_idl_next( a, &cursora ); + idb = wt_idl_next( b, &cursorb ); + } + } + + return 0; +} +#endif + +ID wt_idl_first( ID *ids, ID *cursor ) +{ + ID pos; + + if ( ids[0] == 0 ) { + *cursor = NOID; + return NOID; + } + + if ( WT_IDL_IS_RANGE( ids ) ) { + if( *cursor < ids[1] ) { + *cursor = ids[1]; + } + return *cursor; + } + + if ( *cursor == 0 ) + pos = 1; + else + pos = wt_idl_search( ids, *cursor ); + + if( pos > ids[0] ) { + return NOID; + } + + *cursor = pos; + return ids[pos]; +} + +ID wt_idl_next( ID *ids, ID *cursor ) +{ + if ( WT_IDL_IS_RANGE( ids ) ) { + if( ids[2] < ++(*cursor) ) { + return NOID; + } + return *cursor; + } + + if ( ++(*cursor) <= ids[0] ) { + return ids[*cursor]; + } + + return NOID; +} + +/* Add one ID to an unsorted list. We ensure that the first element is the + * minimum and the last element is the maximum, for fast range compaction. + * this means IDLs up to length 3 are always sorted... + */ +int wt_idl_append_one( ID *ids, ID id ) +{ + if (WT_IDL_IS_RANGE( ids )) { + /* if already in range, treat as a dup */ + if (id >= WT_IDL_RANGE_FIRST(ids) && id <= WT_IDL_RANGE_LAST(ids)) + return -1; + if (id < WT_IDL_RANGE_FIRST(ids)) + ids[1] = id; + else if (id > WT_IDL_RANGE_LAST(ids)) + ids[2] = id; + return 0; + } + if ( ids[0] ) { + ID tmp; + + if (id < ids[1]) { + tmp = ids[1]; + ids[1] = id; + id = tmp; + } + if ( ids[0] > 1 && id < ids[ids[0]] ) { + tmp = ids[ids[0]]; + ids[ids[0]] = id; + id = tmp; + } + } + ids[0]++; + if ( ids[0] >= WT_IDL_UM_MAX ) { + ids[0] = NOID; + ids[2] = id; + } else { + ids[ids[0]] = id; + } + return 0; +} + +/* Append sorted list b to sorted list a. The result is unsorted but + * a[1] is the min of the result and a[a[0]] is the max. + */ +int wt_idl_append( ID *a, ID *b ) +{ + ID ida, idb, tmp, swap = 0; + + if ( WT_IDL_IS_ZERO( b ) ) { + return 0; + } + + if ( WT_IDL_IS_ZERO( a ) ) { + WT_IDL_CPY( a, b ); + return 0; + } + + ida = WT_IDL_LAST( a ); + idb = WT_IDL_LAST( b ); + if ( WT_IDL_IS_RANGE( a ) || WT_IDL_IS_RANGE(b) || + a[0] + b[0] >= WT_IDL_UM_MAX ) { + a[2] = IDL_MAX( ida, idb ); + a[1] = IDL_MIN( a[1], b[1] ); + a[0] = NOID; + return 0; + } + + if ( b[0] > 1 && ida > idb ) { + swap = idb; + a[a[0]] = idb; + b[b[0]] = ida; + } + + if ( b[1] < a[1] ) { + tmp = a[1]; + a[1] = b[1]; + } else { + tmp = b[1]; + } + a[0]++; + a[a[0]] = tmp; + + if ( b[0] > 1 ) { + int i = b[0] - 1; + AC_MEMCPY(a+a[0]+1, b+2, i * sizeof(ID)); + a[0] += i; + } + if ( swap ) { + b[b[0]] = swap; + } + return 0; +} + +#if 1 + +/* Quicksort + Insertion sort for small arrays */ + +#define SMALL 8 +#define SWAP(a,b) itmp=(a);(a)=(b);(b)=itmp + +void +wt_idl_sort( ID *ids, ID *tmp ) +{ + int *istack = (int *)tmp; /* Private stack, not used by caller */ + int i,j,k,l,ir,jstack; + ID a, itmp; + + if ( WT_IDL_IS_RANGE( ids )) + return; + + ir = ids[0]; + l = 1; + jstack = 0; + for(;;) { + if (ir - l < SMALL) { /* Insertion sort */ + for (j=l+1;j<=ir;j++) { + a = ids[j]; + for (i=j-1;i>=1;i--) { + if (ids[i] <= a) break; + ids[i+1] = ids[i]; + } + ids[i+1] = a; + } + if (jstack == 0) break; + ir = istack[jstack--]; + l = istack[jstack--]; + } else { + k = (l + ir) >> 1; /* Choose median of left, center, right */ + SWAP(ids[k], ids[l+1]); + if (ids[l] > ids[ir]) { + SWAP(ids[l], ids[ir]); + } + if (ids[l+1] > ids[ir]) { + SWAP(ids[l+1], ids[ir]); + } + if (ids[l] > ids[l+1]) { + SWAP(ids[l], ids[l+1]); + } + i = l+1; + j = ir; + a = ids[l+1]; + for(;;) { + do i++; while(ids[i] < a); + do j--; while(ids[j] > a); + if (j < i) break; + SWAP(ids[i],ids[j]); + } + ids[l+1] = ids[j]; + ids[j] = a; + jstack += 2; + if (ir-i+1 >= j-l) { + istack[jstack] = ir; + istack[jstack-1] = i; + ir = j-1; + } else { + istack[jstack] = j-1; + istack[jstack-1] = l; + l = i; + } + } + } +} + +#else + +/* 8 bit Radix sort + insertion sort + * + * based on code from http://www.cubic.org/docs/radix.htm + * with improvements by ebackes@symas.com and hyc@symas.com + * + * This code is O(n) but has a relatively high constant factor. For lists + * up to ~50 Quicksort is slightly faster; up to ~100 they are even. + * Much faster than quicksort for lists longer than ~100. Insertion + * sort is actually superior for lists <50. + */ + +#define BUCKETS (1<<8) +#define SMALL 50 + +void +wt_idl_sort( ID *ids, ID *tmp ) +{ + int count, soft_limit, phase = 0, size = ids[0]; + ID *idls[2]; + unsigned char *maxv = (unsigned char *)&ids[size]; + + if ( WT_IDL_IS_RANGE( ids )) + return; + + /* Use insertion sort for small lists */ + if ( size <= SMALL ) { + int i,j; + ID a; + + for (j=1;j<=size;j++) { + a = ids[j]; + for (i=j-1;i>=1;i--) { + if (ids[i] <= a) break; + ids[i+1] = ids[i]; + } + ids[i+1] = a; + } + return; + } + + tmp[0] = size; + idls[0] = ids; + idls[1] = tmp; + +#if BYTE_ORDER == BIG_ENDIAN + for (soft_limit = 0; !maxv[soft_limit]; soft_limit++); +#else + for (soft_limit = sizeof(ID)-1; !maxv[soft_limit]; soft_limit--); +#endif + + for ( +#if BYTE_ORDER == BIG_ENDIAN + count = sizeof(ID)-1; count >= soft_limit; --count +#else + count = 0; count <= soft_limit; ++count +#endif + ) { + unsigned int num[BUCKETS], * np, n, sum; + int i; + ID *sp, *source, *dest; + unsigned char *bp, *source_start; + + source = idls[phase]+1; + dest = idls[phase^1]+1; + source_start = ((unsigned char *) source) + count; + + np = num; + for ( i = BUCKETS; i > 0; --i ) *np++ = 0; + + /* count occurrences of every byte value */ + bp = source_start; + for ( i = size; i > 0; --i, bp += sizeof(ID) ) + num[*bp]++; + + /* transform count into index by summing elements and storing + * into same array + */ + sum = 0; + np = num; + for ( i = BUCKETS; i > 0; --i ) { + n = *np; + *np++ = sum; + sum += n; + } + + /* fill dest with the right values in the right place */ + bp = source_start; + sp = source; + for ( i = size; i > 0; --i, bp += sizeof(ID) ) { + np = num + *bp; + dest[*np] = *sp++; + ++(*np); + } + phase ^= 1; + } + + /* copy back from temp if needed */ + if ( phase ) { + ids++; tmp++; + for ( count = 0; count < size; ++count ) + *ids++ = *tmp++; + } +} +#endif /* Quick vs Radix */ + |