diff options
Diffstat (limited to '')
-rw-r--r-- | libnetdata/libjudy/src/JudyL/JudyLCount.c | 1195 |
1 files changed, 1195 insertions, 0 deletions
diff --git a/libnetdata/libjudy/src/JudyL/JudyLCount.c b/libnetdata/libjudy/src/JudyL/JudyLCount.c new file mode 100644 index 0000000..179757f --- /dev/null +++ b/libnetdata/libjudy/src/JudyL/JudyLCount.c @@ -0,0 +1,1195 @@ +// Copyright (C) 2000 - 2002 Hewlett-Packard Company +// +// This program is free software; you can redistribute it and/or modify it +// under the term of the GNU Lesser General Public License as published by the +// Free Software Foundation; either version 2 of the License, or (at your +// option) any later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +// for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this program; if not, write to the Free Software Foundation, +// Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// _________________ + +// @(#) $Revision: 4.78 $ $Source: /judy/src/JudyCommon/JudyCount.c $ +// +// Judy*Count() function for Judy1 and JudyL. +// Compile with one of -DJUDY1 or -DJUDYL. +// +// Compile with -DNOSMARTJBB, -DNOSMARTJBU, and/or -DNOSMARTJLB to build a +// version with cache line optimizations deleted, for testing. +// +// Compile with -DSMARTMETRICS to obtain global variables containing smart +// cache line metrics. Note: Dont turn this on simultaneously for this file +// and JudyByCount.c because they export the same globals. +// +// Judy*Count() returns the "count of Indexes" (inclusive) between the two +// specified limits (Indexes). This code is remarkably fast. It traverses the +// "Judy array" data structure. +// +// This count code is the GENERIC untuned version (minimum code size). It +// might be possible to tuned to a specific architecture to be faster. +// However, in real applications, with a modern machine, it is expected that +// the instruction times will be swamped by cache line fills. +// **************************************************************************** + +#if (! (defined(JUDY1) || defined(JUDYL))) +#error: One of -DJUDY1 or -DJUDYL must be specified. +#endif + +#ifdef JUDY1 +#include "Judy1.h" +#else +#include "JudyL.h" +#endif + +#include "JudyPrivate1L.h" + + +// define a phoney that is for sure + +#define cJU_LEAFW cJU_JPIMMED_CAP + +// Avoid duplicate symbols since this file is multi-compiled: + +#ifdef SMARTMETRICS +#ifdef JUDY1 +Word_t jbb_upward = 0; // counts of directions taken: +Word_t jbb_downward = 0; +Word_t jbu_upward = 0; +Word_t jbu_downward = 0; +Word_t jlb_upward = 0; +Word_t jlb_downward = 0; +#else +extern Word_t jbb_upward; +extern Word_t jbb_downward; +extern Word_t jbu_upward; +extern Word_t jbu_downward; +extern Word_t jlb_upward; +extern Word_t jlb_downward; +#endif +#endif + + +// FORWARD DECLARATIONS (prototypes): + +static Word_t j__udy1LCountSM(const Pjp_t Pjp, const Word_t Index, + const Pjpm_t Pjpm); + +// Each of Judy1 and JudyL get their own private (static) version of this +// function: + +static int j__udyCountLeafB1(const Pjll_t Pjll, const Word_t Pop1, + const Word_t Index); + +// These functions are not static because they are exported to Judy*ByCount(): +// +// TBD: Should be made static for performance reasons? And thus duplicated? +// +// Note: There really are two different functions, but for convenience they +// are referred to here with a generic name. + +#ifdef JUDY1 +#define j__udyJPPop1 j__udy1JPPop1 +#else +#define j__udyJPPop1 j__udyLJPPop1 +#endif + +Word_t j__udyJPPop1(const Pjp_t Pjp); + + +// LOCAL ERROR HANDLING: +// +// The Judy*Count() functions are unusual because they return 0 instead of JERR +// for an error. In this source file, define C_JERR for clarity. + +#define C_JERR 0 + + +// **************************************************************************** +// J U D Y 1 C O U N T +// J U D Y L C O U N T +// +// See the manual entry for details. +// +// This code is written recursively, at least at first, because thats much +// simpler; hope its fast enough. + +#ifdef JUDY1 +FUNCTION Word_t Judy1Count +#else +FUNCTION Word_t JudyLCount +#endif + ( + Pcvoid_t PArray, // JRP to first branch/leaf in SM. + Word_t Index1, // starting Index. + Word_t Index2, // ending Index. + PJError_t PJError // optional, for returning error info. + ) +{ + jpm_t fakejpm; // local temporary for small arrays. + Pjpm_t Pjpm; // top JPM or local temporary for error info. + jp_t fakejp; // constructed for calling j__udy1LCountSM(). + Pjp_t Pjp; // JP to pass to j__udy1LCountSM(). + Word_t pop1; // total for the array. + Word_t pop1above1; // indexes at or above Index1, inclusive. + Word_t pop1above2; // indexes at or above Index2, exclusive. + int retcode; // from Judy*First() calls. +JUDYLCODE(PPvoid_t PPvalue); // from JudyLFirst() calls. + + +// CHECK FOR SHORTCUTS: +// +// As documented, return C_JERR if the Judy array is empty or Index1 > Index2. + + if ((PArray == (Pvoid_t) NULL) || (Index1 > Index2)) + { + JU_SET_ERRNO(PJError, JU_ERRNO_NONE); + return(C_JERR); + } + +// If Index1 == Index2, simply check if the specified Index is set; pass +// through the return value from Judy1Test() or JudyLGet() with appropriate +// translations. + + if (Index1 == Index2) + { +#ifdef JUDY1 + retcode = Judy1Test(PArray, Index1, PJError); + + if (retcode == JERRI) return(C_JERR); // pass through error. + + if (retcode == 0) + { + JU_SET_ERRNO(PJError, JU_ERRNO_NONE); + return(C_JERR); + } +#else + PPvalue = JudyLGet(PArray, Index1, PJError); + + if (PPvalue == PPJERR) return(C_JERR); // pass through error. + + if (PPvalue == (PPvoid_t) NULL) // Index is not set. + { + JU_SET_ERRNO(PJError, JU_ERRNO_NONE); + return(C_JERR); + } +#endif + return(1); // single index is set. + } + + +// CHECK JRP TYPE: +// +// Use an if/then for speed rather than a switch, and put the most common cases +// first. +// +// Note: Since even cJU_LEAFW types require counting between two Indexes, +// prepare them here for common code below that calls j__udy1LCountSM(), rather +// than handling them even more specially here. + + if (JU_LEAFW_POP0(PArray) < cJU_LEAFW_MAXPOP1) // must be a LEAFW + { + Pjlw_t Pjlw = P_JLW(PArray); // first word of leaf. + Pjpm = & fakejpm; + Pjp = & fakejp; + Pjp->jp_Addr = (Word_t) Pjlw; + Pjp->jp_Type = cJU_LEAFW; + Pjpm->jpm_Pop0 = Pjlw[0]; // from first word of leaf. + pop1 = Pjpm->jpm_Pop0 + 1; + } + else + { + Pjpm = P_JPM(PArray); + Pjp = &(Pjpm->jpm_JP); + pop1 = (Pjpm->jpm_Pop0) + 1; // note: can roll over to 0. + +#if (defined(JUDY1) && (! defined(JU_64BIT))) + if (pop1 == 0) // rare special case of full array: + { + Word_t count = Index2 - Index1 + 1; // can roll over again. + + if (count == 0) + { + JU_SET_ERRNO(PJError, JU_ERRNO_FULL); + return(C_JERR); + } + return(count); + } +#else + assert(pop1); // JudyL or 64-bit cannot create a full array! +#endif + } + + +// COUNT POP1 ABOVE INDEX1, INCLUSIVE: + + assert(pop1); // just to be safe. + + if (Index1 == 0) // shortcut, pop1above1 is entire population: + { + pop1above1 = pop1; + } + else // find first valid Index above Index1, if any: + { +#ifdef JUDY1 + if ((retcode = Judy1First(PArray, & Index1, PJError)) == JERRI) + return(C_JERR); // pass through error. +#else + if ((PPvalue = JudyLFirst(PArray, & Index1, PJError)) == PPJERR) + return(C_JERR); // pass through error. + + retcode = (PPvalue != (PPvoid_t) NULL); // found a next Index. +#endif + +// If theres no Index at or above Index1, just return C_JERR (early exit): + + if (retcode == 0) + { + JU_SET_ERRNO(PJError, JU_ERRNO_NONE); + return(C_JERR); + } + +// If a first/next Index was found, call the counting motor starting with that +// known valid Index, meaning the return should be positive, not C_JERR except +// in case of a real error: + + if ((pop1above1 = j__udy1LCountSM(Pjp, Index1, Pjpm)) == C_JERR) + { + JU_COPY_ERRNO(PJError, Pjpm); // pass through error. + return(C_JERR); + } + } + + +// COUNT POP1 ABOVE INDEX2, EXCLUSIVE, AND RETURN THE DIFFERENCE: +// +// In principle, calculate the ordinal of each Index and take the difference, +// with caution about off-by-one errors due to the specified Indexes being set +// or unset. In practice: +// +// - The ordinals computed here are inverse ordinals, that is, the populations +// ABOVE the specified Indexes (Index1 inclusive, Index2 exclusive), so +// subtract pop1above2 from pop1above1, rather than vice-versa. +// +// - Index1s result already includes a count for Index1 and/or Index2 if +// either is set, so calculate pop1above2 exclusive of Index2. +// +// TBD: If Index1 and Index2 fall in the same expanse in the top-state +// branch(es), would it be faster to walk the SM only once, to their divergence +// point, before calling j__udy1LCountSM() or equivalent? Possibly a non-issue +// if a top-state pop1 becomes stored with each Judy1 array. Also, consider +// whether the first call of j__udy1LCountSM() fills the cache, for common tree +// branches, for the second call. +// +// As for pop1above1, look for shortcuts for special cases when pop1above2 is +// zero. Otherwise call the counting "motor". + + assert(pop1above1); // just to be safe. + + if (Index2++ == cJU_ALLONES) return(pop1above1); // Index2 at limit. + +#ifdef JUDY1 + if ((retcode = Judy1First(PArray, & Index2, PJError)) == JERRI) + return(C_JERR); +#else + if ((PPvalue = JudyLFirst(PArray, & Index2, PJError)) == PPJERR) + return(C_JERR); + + retcode = (PPvalue != (PPvoid_t) NULL); // found a next Index. +#endif + if (retcode == 0) return(pop1above1); // no Index above Index2. + +// Just as for Index1, j__udy1LCountSM() cannot return 0 (locally == C_JERR) +// except in case of a real error: + + if ((pop1above2 = j__udy1LCountSM(Pjp, Index2, Pjpm)) == C_JERR) + { + JU_COPY_ERRNO(PJError, Pjpm); // pass through error. + return(C_JERR); + } + + if (pop1above1 == pop1above2) + { + JU_SET_ERRNO(PJError, JU_ERRNO_NONE); + return(C_JERR); + } + + return(pop1above1 - pop1above2); + +} // Judy1Count() / JudyLCount() + + +// **************************************************************************** +// __ J U D Y 1 L C O U N T S M +// +// Given a pointer to a JP (with invalid jp_DcdPopO at cJU_ROOTSTATE), a known +// valid Index, and a Pjpm for returning error info, recursively visit a Judy +// array state machine (SM) and return the count of Indexes, including Index, +// through the end of the Judy array at this state or below. In case of error +// or a count of 0 (should never happen), return C_JERR with appropriate +// JU_ERRNO in the Pjpm. +// +// Note: This function is not told the current state because its encoded in +// the JP Type. +// +// Method: To minimize cache line fills, while studying each branch, if Index +// resides above the midpoint of the branch (which often consists of multiple +// cache lines), ADD the populations at or above Index; otherwise, SUBTRACT +// from the population of the WHOLE branch (available from the JP) the +// populations at or above Index. This is especially tricky for bitmap +// branches. +// +// Note: Unlike, say, the Ins and Del walk routines, this function returns the +// same type of returns as Judy*Count(), so it can use *_SET_ERRNO*() macros +// the same way. + +FUNCTION static Word_t j__udy1LCountSM( +const Pjp_t Pjp, // top of Judy (sub)SM. +const Word_t Index, // count at or above this Index. +const Pjpm_t Pjpm) // for returning error info. +{ + Pjbl_t Pjbl; // Pjp->jp_Addr masked and cast to types: + Pjbb_t Pjbb; + Pjbu_t Pjbu; + Pjll_t Pjll; // a Judy lower-level linear leaf. + + Word_t digit; // next digit to decode from Index. + long jpnum; // JP number in a branch (base 0). + int offset; // index ordinal within a leaf, base 0. + Word_t pop1; // total population of an expanse. + Word_t pop1above; // to return. + +// Common code to check Decode bits in a JP against the equivalent portion of +// Index; XOR together, then mask bits of interest; must be all 0: +// +// Note: Why does this code only assert() compliance rather than actively +// checking for outliers? Its because Index is supposed to be valid, hence +// always match any Dcd bits traversed. +// +// Note: This assertion turns out to be always true for cState = 3 on 32-bit +// and 7 on 64-bit, but its harmless, probably removed by the compiler. + +#define CHECKDCD(Pjp,cState) \ + assert(! JU_DCDNOTMATCHINDEX(Index, Pjp, cState)) + +// Common code to prepare to handle a root-level or lower-level branch: +// Extract a state-dependent digit from Index in a "constant" way, obtain the +// total population for the branch in a state-dependent way, and then branch to +// common code for multiple cases: +// +// For root-level branches, the state is always cJU_ROOTSTATE, and the +// population is received in Pjpm->jpm_Pop0. +// +// Note: The total population is only needed in cases where the common code +// "counts up" instead of down to minimize cache line fills. However, its +// available cheaply, and its better to do it with a constant shift (constant +// state value) instead of a variable shift later "when needed". + +#define PREPB_ROOT(Pjp,Next) \ + digit = JU_DIGITATSTATE(Index, cJU_ROOTSTATE); \ + pop1 = (Pjpm->jpm_Pop0) + 1; \ + goto Next + +#define PREPB(Pjp,cState,Next) \ + digit = JU_DIGITATSTATE(Index, cState); \ + pop1 = JU_JPBRANCH_POP0(Pjp, (cState)) + 1; \ + goto Next + + +// SWITCH ON JP TYPE: +// +// WARNING: For run-time efficiency the following cases replicate code with +// varying constants, rather than using common code with variable values! + + switch (JU_JPTYPE(Pjp)) + { + + +// ---------------------------------------------------------------------------- +// ROOT-STATE LEAF that starts with a Pop0 word; just count within the leaf: + + case cJU_LEAFW: + { + Pjlw_t Pjlw = P_JLW(Pjp->jp_Addr); // first word of leaf. + + assert((Pjpm->jpm_Pop0) + 1 == Pjlw[0] + 1); // sent correctly. + offset = j__udySearchLeafW(Pjlw + 1, Pjpm->jpm_Pop0 + 1, Index); + assert(offset >= 0); // Index must exist. + assert(offset < (Pjpm->jpm_Pop0) + 1); // Index be in range. + return((Pjpm->jpm_Pop0) + 1 - offset); // INCLUSIVE of Index. + } + +// ---------------------------------------------------------------------------- +// LINEAR BRANCH; count populations in JPs in the JBL ABOVE the next digit in +// Index, and recurse for the next digit in Index: +// +// Note: There are no null JPs in a JBL; watch out for pop1 == 0. +// +// Note: A JBL should always fit in one cache line => no need to count up +// versus down to save cache line fills. (PREPB() sets pop1 for no reason.) + + case cJU_JPBRANCH_L2: CHECKDCD(Pjp, 2); PREPB(Pjp, 2, BranchL); + case cJU_JPBRANCH_L3: CHECKDCD(Pjp, 3); PREPB(Pjp, 3, BranchL); + +#ifdef JU_64BIT + case cJU_JPBRANCH_L4: CHECKDCD(Pjp, 4); PREPB(Pjp, 4, BranchL); + case cJU_JPBRANCH_L5: CHECKDCD(Pjp, 5); PREPB(Pjp, 5, BranchL); + case cJU_JPBRANCH_L6: CHECKDCD(Pjp, 6); PREPB(Pjp, 6, BranchL); + case cJU_JPBRANCH_L7: CHECKDCD(Pjp, 7); PREPB(Pjp, 7, BranchL); +#endif + case cJU_JPBRANCH_L: PREPB_ROOT(Pjp, BranchL); + +// Common code (state-independent) for all cases of linear branches: + +BranchL: + + Pjbl = P_JBL(Pjp->jp_Addr); + jpnum = Pjbl->jbl_NumJPs; // above last JP. + pop1above = 0; + + while (digit < (Pjbl->jbl_Expanse[--jpnum])) // still ABOVE digit. + { + if ((pop1 = j__udyJPPop1((Pjbl->jbl_jp) + jpnum)) == cJU_ALLONES) + { + JU_SET_ERRNO_NONNULL(Pjpm, JU_ERRNO_CORRUPT); + return(C_JERR); + } + + pop1above += pop1; + assert(jpnum > 0); // should find digit. + } + + assert(digit == (Pjbl->jbl_Expanse[jpnum])); // should find digit. + + pop1 = j__udy1LCountSM((Pjbl->jbl_jp) + jpnum, Index, Pjpm); + if (pop1 == C_JERR) return(C_JERR); // pass error up. + + assert(pop1above + pop1); + return(pop1above + pop1); + + +// ---------------------------------------------------------------------------- +// BITMAP BRANCH; count populations in JPs in the JBB ABOVE the next digit in +// Index, and recurse for the next digit in Index: +// +// Note: There are no null JPs in a JBB; watch out for pop1 == 0. + + case cJU_JPBRANCH_B2: CHECKDCD(Pjp, 2); PREPB(Pjp, 2, BranchB); + case cJU_JPBRANCH_B3: CHECKDCD(Pjp, 3); PREPB(Pjp, 3, BranchB); +#ifdef JU_64BIT + case cJU_JPBRANCH_B4: CHECKDCD(Pjp, 4); PREPB(Pjp, 4, BranchB); + case cJU_JPBRANCH_B5: CHECKDCD(Pjp, 5); PREPB(Pjp, 5, BranchB); + case cJU_JPBRANCH_B6: CHECKDCD(Pjp, 6); PREPB(Pjp, 6, BranchB); + case cJU_JPBRANCH_B7: CHECKDCD(Pjp, 7); PREPB(Pjp, 7, BranchB); +#endif + case cJU_JPBRANCH_B: PREPB_ROOT(Pjp, BranchB); + +// Common code (state-independent) for all cases of bitmap branches: + +BranchB: + { + long subexp; // for stepping through layer 1 (subexpanses). + long findsub; // subexpanse containing Index (digit). + Word_t findbit; // bit representing Index (digit). + Word_t lowermask; // bits for indexes at or below Index. + Word_t jpcount; // JPs in a subexpanse. + Word_t clbelow; // cache lines below digits cache line. + Word_t clabove; // cache lines above digits cache line. + + Pjbb = P_JBB(Pjp->jp_Addr); + findsub = digit / cJU_BITSPERSUBEXPB; + findbit = digit % cJU_BITSPERSUBEXPB; + lowermask = JU_MASKLOWERINC(JU_BITPOSMASKB(findbit)); + clbelow = clabove = 0; // initial/default => always downward. + + assert(JU_BITMAPTESTB(Pjbb, digit)); // digit must have a JP. + assert(findsub < cJU_NUMSUBEXPB); // falls in expected range. + +// Shorthand for one subexpanse in a bitmap and for one JP in a bitmap branch: +// +// Note: BMPJP0 exists separately to support assertions. + +#define BMPJP0(Subexp) (P_JP(JU_JBB_PJP(Pjbb, Subexp))) +#define BMPJP(Subexp,JPnum) (BMPJP0(Subexp) + (JPnum)) + +#ifndef NOSMARTJBB // enable to turn off smart code for comparison purposes. + +// FIGURE OUT WHICH DIRECTION CAUSES FEWER CACHE LINE FILLS; adding the pop1s +// in JPs above Indexs JP, or subtracting the pop1s in JPs below Indexs JP. +// +// This is tricky because, while each set bit in the bitmap represents a JP, +// the JPs are scattered over cJU_NUMSUBEXPB subexpanses, each of which can +// contain JPs packed into multiple cache lines, and this code must visit every +// JP either BELOW or ABOVE the JP for Index. +// +// Number of cache lines required to hold a linear list of the given number of +// JPs, assuming the first JP is at the start of a cache line or the JPs in +// jpcount fit wholly within a single cache line, which is ensured by +// JudyMalloc(): + +#define CLPERJPS(jpcount) \ + ((((jpcount) * cJU_WORDSPERJP) + cJU_WORDSPERCL - 1) / cJU_WORDSPERCL) + +// Count cache lines below/above for each subexpanse: + + for (subexp = 0; subexp < cJU_NUMSUBEXPB; ++subexp) + { + jpcount = j__udyCountBitsB(JU_JBB_BITMAP(Pjbb, subexp)); + +// When at the subexpanse containing Index (digit), add cache lines +// below/above appropriately, excluding the cache line containing the JP for +// Index itself: + + if (subexp < findsub) clbelow += CLPERJPS(jpcount); + else if (subexp > findsub) clabove += CLPERJPS(jpcount); + else // (subexp == findsub) + { + Word_t clfind; // cache line containing Index (digit). + + clfind = CLPERJPS(j__udyCountBitsB( + JU_JBB_BITMAP(Pjbb, subexp) & lowermask)); + + assert(clfind > 0); // digit itself should have 1 CL. + clbelow += clfind - 1; + clabove += CLPERJPS(jpcount) - clfind; + } + } +#endif // ! NOSMARTJBB + +// Note: Its impossible to get through the following "if" without setting +// jpnum -- see some of the assertions below -- but gcc -Wall doesnt know +// this, so preset jpnum to make it happy: + + jpnum = 0; + + +// COUNT POPULATION FOR A BITMAP BRANCH, in whichever direction should result +// in fewer cache line fills: +// +// Note: If the remainder of Index is zero, pop1above is the pop1 of the +// entire expanse and theres no point in recursing to lower levels; but this +// should be so rare that its not worth checking for; +// Judy1Count()/JudyLCount() never even calls the motor for Index == 0 (all +// bytes). + + +// COUNT UPWARD, subtracting each "below or at" JPs pop1 from the whole +// expanses pop1: +// +// Note: If this causes clbelow + 1 cache line fills including JPs cache +// line, thats OK; at worst this is the same as clabove. + + if (clbelow < clabove) + { +#ifdef SMARTMETRICS + ++jbb_upward; +#endif + pop1above = pop1; // subtract JPs at/below Index. + +// Count JPs for which to accrue pop1s in this subexpanse: +// +// TBD: If JU_JBB_BITMAP is cJU_FULLBITMAPB, dont bother counting. + + for (subexp = 0; subexp <= findsub; ++subexp) + { + jpcount = j__udyCountBitsB((subexp < findsub) ? + JU_JBB_BITMAP(Pjbb, subexp) : + JU_JBB_BITMAP(Pjbb, subexp) & lowermask); + + // should always find findbit: + assert((subexp < findsub) || jpcount); + +// Subtract pop1s from JPs BELOW OR AT Index (digit): +// +// Note: The pop1 for Indexs JP itself is partially added back later at a +// lower state. +// +// Note: An empty subexpanse (jpcount == 0) is handled "for free". +// +// Note: Must be null JP subexp pointer in empty subexpanse and non-empty in +// non-empty subexpanse: + + assert( jpcount || (BMPJP0(subexp) == (Pjp_t) NULL)); + assert((! jpcount) || (BMPJP0(subexp) != (Pjp_t) NULL)); + + for (jpnum = 0; jpnum < jpcount; ++jpnum) + { + if ((pop1 = j__udyJPPop1(BMPJP(subexp, jpnum))) + == cJU_ALLONES) + { + JU_SET_ERRNO_NONNULL(Pjpm, JU_ERRNO_CORRUPT); + return(C_JERR); + } + + pop1above -= pop1; + } + + jpnum = jpcount - 1; // make correct for digit. + } + } + +// COUNT DOWNWARD, adding each "above" JPs pop1: + + else + { + long jpcountbf; // below findbit, inclusive. +#ifdef SMARTMETRICS + ++jbb_downward; +#endif + pop1above = 0; // add JPs above Index. + jpcountbf = 0; // until subexp == findsub. + +// Count JPs for which to accrue pop1s in this subexpanse: +// +// This is more complicated than counting upward because the scan of digits +// subexpanse must count ALL JPs, to know where to START counting down, and +// ALSO note the offset of digits JP to know where to STOP counting down. + + for (subexp = cJU_NUMSUBEXPB - 1; subexp >= findsub; --subexp) + { + jpcount = j__udyCountBitsB(JU_JBB_BITMAP(Pjbb, subexp)); + + // should always find findbit: + assert((subexp > findsub) || jpcount); + + if (! jpcount) continue; // empty subexpanse, save time. + +// Count JPs below digit, inclusive: + + if (subexp == findsub) + { + jpcountbf = j__udyCountBitsB(JU_JBB_BITMAP(Pjbb, subexp) + & lowermask); + } + + // should always find findbit: + assert((subexp > findsub) || jpcountbf); + assert(jpcount >= jpcountbf); // proper relationship. + +// Add pop1s from JPs ABOVE Index (digit): + + // no null JP subexp pointers: + assert(BMPJP0(subexp) != (Pjp_t) NULL); + + for (jpnum = jpcount - 1; jpnum >= jpcountbf; --jpnum) + { + if ((pop1 = j__udyJPPop1(BMPJP(subexp, jpnum))) + == cJU_ALLONES) + { + JU_SET_ERRNO_NONNULL(Pjpm, JU_ERRNO_CORRUPT); + return(C_JERR); + } + + pop1above += pop1; + } + // jpnum is now correct for digit. + } + } // else. + +// Return the net population ABOVE the digits JP at this state (in this JBB) +// plus the population AT OR ABOVE Index in the SM under the digits JP: + + pop1 = j__udy1LCountSM(BMPJP(findsub, jpnum), Index, Pjpm); + if (pop1 == C_JERR) return(C_JERR); // pass error up. + + assert(pop1above + pop1); + return(pop1above + pop1); + + } // case. + + +// ---------------------------------------------------------------------------- +// UNCOMPRESSED BRANCH; count populations in JPs in the JBU ABOVE the next +// digit in Index, and recurse for the next digit in Index: +// +// Note: If the remainder of Index is zero, pop1above is the pop1 of the +// entire expanse and theres no point in recursing to lower levels; but this +// should be so rare that its not worth checking for; +// Judy1Count()/JudyLCount() never even calls the motor for Index == 0 (all +// bytes). + + case cJU_JPBRANCH_U2: CHECKDCD(Pjp, 2); PREPB(Pjp, 2, BranchU); + case cJU_JPBRANCH_U3: CHECKDCD(Pjp, 3); PREPB(Pjp, 3, BranchU); +#ifdef JU_64BIT + case cJU_JPBRANCH_U4: CHECKDCD(Pjp, 4); PREPB(Pjp, 4, BranchU); + case cJU_JPBRANCH_U5: CHECKDCD(Pjp, 5); PREPB(Pjp, 5, BranchU); + case cJU_JPBRANCH_U6: CHECKDCD(Pjp, 6); PREPB(Pjp, 6, BranchU); + case cJU_JPBRANCH_U7: CHECKDCD(Pjp, 7); PREPB(Pjp, 7, BranchU); +#endif + case cJU_JPBRANCH_U: PREPB_ROOT(Pjp, BranchU); + +// Common code (state-independent) for all cases of uncompressed branches: + +BranchU: + Pjbu = P_JBU(Pjp->jp_Addr); + +#ifndef NOSMARTJBU // enable to turn off smart code for comparison purposes. + +// FIGURE OUT WHICH WAY CAUSES FEWER CACHE LINE FILLS; adding the JPs above +// Indexs JP, or subtracting the JPs below Indexs JP. +// +// COUNT UPWARD, subtracting the pop1 of each JP BELOW OR AT Index, from the +// whole expanses pop1: + + if (digit < (cJU_BRANCHUNUMJPS / 2)) + { + pop1above = pop1; // subtract JPs below Index. +#ifdef SMARTMETRICS + ++jbu_upward; +#endif + for (jpnum = 0; jpnum <= digit; ++jpnum) + { + if ((Pjbu->jbu_jp[jpnum].jp_Type) <= cJU_JPNULLMAX) + continue; // shortcut, save a function call. + + if ((pop1 = j__udyJPPop1(Pjbu->jbu_jp + jpnum)) + == cJU_ALLONES) + { + JU_SET_ERRNO_NONNULL(Pjpm, JU_ERRNO_CORRUPT); + return(C_JERR); + } + + pop1above -= pop1; + } + } + +// COUNT DOWNWARD, simply adding the pop1 of each JP ABOVE Index: + + else +#endif // NOSMARTJBU + { + assert(digit < cJU_BRANCHUNUMJPS); +#ifdef SMARTMETRICS + ++jbu_downward; +#endif + pop1above = 0; // add JPs above Index. + + for (jpnum = cJU_BRANCHUNUMJPS - 1; jpnum > digit; --jpnum) + { + if ((Pjbu->jbu_jp[jpnum].jp_Type) <= cJU_JPNULLMAX) + continue; // shortcut, save a function call. + + if ((pop1 = j__udyJPPop1(Pjbu->jbu_jp + jpnum)) + == cJU_ALLONES) + { + JU_SET_ERRNO_NONNULL(Pjpm, JU_ERRNO_CORRUPT); + return(C_JERR); + } + + pop1above += pop1; + } + } + + if ((pop1 = j__udy1LCountSM(Pjbu->jbu_jp + digit, Index, Pjpm)) + == C_JERR) return(C_JERR); // pass error up. + + assert(pop1above + pop1); + return(pop1above + pop1); + + +// ---------------------------------------------------------------------------- +// LEAF COUNT MACROS: +// +// LEAF*ABOVE() are common code for different JP types (linear leaves, bitmap +// leaves, and immediates) and different leaf Index Sizes, which result in +// calling different leaf search functions. Linear leaves get the leaf address +// from jp_Addr and the Population from jp_DcdPopO, while immediates use Pjp +// itself as the leaf address and get Population from jp_Type. + +#define LEAFLABOVE(Func) \ + Pjll = P_JLL(Pjp->jp_Addr); \ + pop1 = JU_JPLEAF_POP0(Pjp) + 1; \ + LEAFABOVE(Func, Pjll, pop1) + +#define LEAFB1ABOVE(Func) LEAFLABOVE(Func) // different Func, otherwise same. + +#ifdef JUDY1 +#define IMMABOVE(Func,Pop1) \ + Pjll = (Pjll_t) Pjp; \ + LEAFABOVE(Func, Pjll, Pop1) +#else +// Note: For JudyL immediates with >= 2 Indexes, the index bytes are in a +// different place than for Judy1: + +#define IMMABOVE(Func,Pop1) \ + LEAFABOVE(Func, (Pjll_t) (Pjp->jp_LIndex), Pop1) +#endif + +// For all leaf types, the population AT OR ABOVE is the total pop1 less the +// offset of Index; and Index should always be found: + +#define LEAFABOVE(Func,Pjll,Pop1) \ + offset = Func(Pjll, Pop1, Index); \ + assert(offset >= 0); \ + assert(offset < (Pop1)); \ + return((Pop1) - offset) + +// IMMABOVE_01 handles the special case of an immediate JP with 1 index, which +// the search functions arent used for anyway: +// +// The target Index should be the one in this Immediate, in which case the +// count above (inclusive) is always 1. + +#define IMMABOVE_01 \ + assert((JU_JPDCDPOP0(Pjp)) == JU_TRIMTODCDSIZE(Index)); \ + return(1) + + +// ---------------------------------------------------------------------------- +// LINEAR LEAF; search the leaf for Index; size is computed from jp_Type: + +#if (defined(JUDYL) || (! defined(JU_64BIT))) + case cJU_JPLEAF1: LEAFLABOVE(j__udySearchLeaf1); +#endif + case cJU_JPLEAF2: LEAFLABOVE(j__udySearchLeaf2); + case cJU_JPLEAF3: LEAFLABOVE(j__udySearchLeaf3); + +#ifdef JU_64BIT + case cJU_JPLEAF4: LEAFLABOVE(j__udySearchLeaf4); + case cJU_JPLEAF5: LEAFLABOVE(j__udySearchLeaf5); + case cJU_JPLEAF6: LEAFLABOVE(j__udySearchLeaf6); + case cJU_JPLEAF7: LEAFLABOVE(j__udySearchLeaf7); +#endif + + +// ---------------------------------------------------------------------------- +// BITMAP LEAF; search the leaf for Index: +// +// Since the bitmap describes Indexes digitally rather than linearly, this is +// not really a search, but just a count. + + case cJU_JPLEAF_B1: LEAFB1ABOVE(j__udyCountLeafB1); + + +#ifdef JUDY1 +// ---------------------------------------------------------------------------- +// FULL POPULATION: +// +// Return the count of Indexes AT OR ABOVE Index, which is the total population +// of the expanse (a constant) less the value of the undecoded digit remaining +// in Index (its base-0 offset in the expanse), which yields an inclusive count +// above. +// +// TBD: This only supports a 1-byte full expanse. Should this extract a +// stored value for pop0 and possibly more LSBs of Index, to handle larger full +// expanses? + + case cJ1_JPFULLPOPU1: + return(cJU_JPFULLPOPU1_POP0 + 1 - JU_DIGITATSTATE(Index, 1)); +#endif + + +// ---------------------------------------------------------------------------- +// IMMEDIATE: + + case cJU_JPIMMED_1_01: IMMABOVE_01; + case cJU_JPIMMED_2_01: IMMABOVE_01; + case cJU_JPIMMED_3_01: IMMABOVE_01; +#ifdef JU_64BIT + case cJU_JPIMMED_4_01: IMMABOVE_01; + case cJU_JPIMMED_5_01: IMMABOVE_01; + case cJU_JPIMMED_6_01: IMMABOVE_01; + case cJU_JPIMMED_7_01: IMMABOVE_01; +#endif + + case cJU_JPIMMED_1_02: IMMABOVE(j__udySearchLeaf1, 2); + case cJU_JPIMMED_1_03: IMMABOVE(j__udySearchLeaf1, 3); +#if (defined(JUDY1) || defined(JU_64BIT)) + case cJU_JPIMMED_1_04: IMMABOVE(j__udySearchLeaf1, 4); + case cJU_JPIMMED_1_05: IMMABOVE(j__udySearchLeaf1, 5); + case cJU_JPIMMED_1_06: IMMABOVE(j__udySearchLeaf1, 6); + case cJU_JPIMMED_1_07: IMMABOVE(j__udySearchLeaf1, 7); +#endif +#if (defined(JUDY1) && defined(JU_64BIT)) + case cJ1_JPIMMED_1_08: IMMABOVE(j__udySearchLeaf1, 8); + case cJ1_JPIMMED_1_09: IMMABOVE(j__udySearchLeaf1, 9); + case cJ1_JPIMMED_1_10: IMMABOVE(j__udySearchLeaf1, 10); + case cJ1_JPIMMED_1_11: IMMABOVE(j__udySearchLeaf1, 11); + case cJ1_JPIMMED_1_12: IMMABOVE(j__udySearchLeaf1, 12); + case cJ1_JPIMMED_1_13: IMMABOVE(j__udySearchLeaf1, 13); + case cJ1_JPIMMED_1_14: IMMABOVE(j__udySearchLeaf1, 14); + case cJ1_JPIMMED_1_15: IMMABOVE(j__udySearchLeaf1, 15); +#endif + +#if (defined(JUDY1) || defined(JU_64BIT)) + case cJU_JPIMMED_2_02: IMMABOVE(j__udySearchLeaf2, 2); + case cJU_JPIMMED_2_03: IMMABOVE(j__udySearchLeaf2, 3); +#endif +#if (defined(JUDY1) && defined(JU_64BIT)) + case cJ1_JPIMMED_2_04: IMMABOVE(j__udySearchLeaf2, 4); + case cJ1_JPIMMED_2_05: IMMABOVE(j__udySearchLeaf2, 5); + case cJ1_JPIMMED_2_06: IMMABOVE(j__udySearchLeaf2, 6); + case cJ1_JPIMMED_2_07: IMMABOVE(j__udySearchLeaf2, 7); +#endif + +#if (defined(JUDY1) || defined(JU_64BIT)) + case cJU_JPIMMED_3_02: IMMABOVE(j__udySearchLeaf3, 2); +#endif +#if (defined(JUDY1) && defined(JU_64BIT)) + case cJ1_JPIMMED_3_03: IMMABOVE(j__udySearchLeaf3, 3); + case cJ1_JPIMMED_3_04: IMMABOVE(j__udySearchLeaf3, 4); + case cJ1_JPIMMED_3_05: IMMABOVE(j__udySearchLeaf3, 5); + + case cJ1_JPIMMED_4_02: IMMABOVE(j__udySearchLeaf4, 2); + case cJ1_JPIMMED_4_03: IMMABOVE(j__udySearchLeaf4, 3); + + case cJ1_JPIMMED_5_02: IMMABOVE(j__udySearchLeaf5, 2); + case cJ1_JPIMMED_5_03: IMMABOVE(j__udySearchLeaf5, 3); + + case cJ1_JPIMMED_6_02: IMMABOVE(j__udySearchLeaf6, 2); + + case cJ1_JPIMMED_7_02: IMMABOVE(j__udySearchLeaf7, 2); +#endif + + +// ---------------------------------------------------------------------------- +// OTHER CASES: + + default: JU_SET_ERRNO_NONNULL(Pjpm, JU_ERRNO_CORRUPT); return(C_JERR); + + } // switch on JP type + + /*NOTREACHED*/ + +} // j__udy1LCountSM() + + +// **************************************************************************** +// J U D Y C O U N T L E A F B 1 +// +// This is a private analog of the j__udySearchLeaf*() functions for counting +// in bitmap 1-byte leaves. Since a bitmap leaf describes Indexes digitally +// rather than linearly, this is not really a search, but just a count of the +// valid Indexes == set bits below or including Index, which should be valid. +// Return the "offset" (really the ordinal), 0 .. Pop1 - 1, of Index in Pjll; +// if Indexs bit is not set (which should never happen, so this is DEBUG-mode +// only), return the 1s-complement equivalent (== negative offset minus 1). +// +// Note: The source code for this function looks identical for both Judy1 and +// JudyL, but the JU_JLB_BITMAP macro varies. +// +// Note: For simpler calling, the first arg is of type Pjll_t but then cast to +// Pjlb_t. + +FUNCTION static int j__udyCountLeafB1( +const Pjll_t Pjll, // bitmap leaf, as Pjll_t for consistency. +const Word_t Pop1, // Population of whole leaf. +const Word_t Index) // to which to count. +{ + Pjlb_t Pjlb = (Pjlb_t) Pjll; // to proper type. + Word_t digit = Index & cJU_MASKATSTATE(1); + Word_t findsub = digit / cJU_BITSPERSUBEXPL; + Word_t findbit = digit % cJU_BITSPERSUBEXPL; + int count; // in leaf through Index. + long subexp; // for stepping through subexpanses. + + +// COUNT UPWARD: +// +// The entire bitmap should fit in one cache line, but still try to save some +// CPU time by counting the fewest possible number of subexpanses from the +// bitmap. + +#ifndef NOSMARTJLB // enable to turn off smart code for comparison purposes. + + if (findsub < (cJU_NUMSUBEXPL / 2)) + { +#ifdef SMARTMETRICS + ++jlb_upward; +#endif + count = 0; + + for (subexp = 0; subexp < findsub; ++subexp) + { + count += ((JU_JLB_BITMAP(Pjlb, subexp) == cJU_FULLBITMAPL) ? + cJU_BITSPERSUBEXPL : + j__udyCountBitsL(JU_JLB_BITMAP(Pjlb, subexp))); + } + +// This count includes findbit, which should be set, resulting in a base-1 +// offset: + + count += j__udyCountBitsL(JU_JLB_BITMAP(Pjlb, findsub) + & JU_MASKLOWERINC(JU_BITPOSMASKL(findbit))); + + DBGCODE(if (! JU_BITMAPTESTL(Pjlb, digit)) return(~count);) + assert(count >= 1); + return(count - 1); // convert to base-0 offset. + } +#endif // NOSMARTJLB + + +// COUNT DOWNWARD: +// +// Count the valid Indexes above or at Index, and subtract from Pop1. + +#ifdef SMARTMETRICS + ++jlb_downward; +#endif + count = Pop1; // base-1 for now. + + for (subexp = cJU_NUMSUBEXPL - 1; subexp > findsub; --subexp) + { + count -= ((JU_JLB_BITMAP(Pjlb, subexp) == cJU_FULLBITMAPL) ? + cJU_BITSPERSUBEXPL : + j__udyCountBitsL(JU_JLB_BITMAP(Pjlb, subexp))); + } + +// This count includes findbit, which should be set, resulting in a base-0 +// offset: + + count -= j__udyCountBitsL(JU_JLB_BITMAP(Pjlb, findsub) + & JU_MASKHIGHERINC(JU_BITPOSMASKL(findbit))); + + DBGCODE(if (! JU_BITMAPTESTL(Pjlb, digit)) return(~count);) + assert(count >= 0); // should find Index itself. + return(count); // is already a base-0 offset. + +} // j__udyCountLeafB1() + + +// **************************************************************************** +// J U D Y J P P O P 1 +// +// This function takes any type of JP other than a root-level JP (cJU_LEAFW* or +// cJU_JPBRANCH* with no number suffix) and extracts the Pop1 from it. In some +// sense this is a wrapper around the JU_JP*_POP0 macros. Why write it as a +// function instead of a complex macro containing a trinary? (See version +// Judy1.h version 4.17.) We think its cheaper to call a function containing +// a switch statement with "constant" cases than to do the variable +// calculations in a trinary. +// +// For invalid JP Types return cJU_ALLONES. Note that this is an impossibly +// high Pop1 for any JP below a top level branch. + +FUNCTION Word_t j__udyJPPop1( +const Pjp_t Pjp) // JP to count. +{ + switch (JU_JPTYPE(Pjp)) + { +#ifdef notdef // caller should shortcut and not even call with these: + + case cJU_JPNULL1: + case cJU_JPNULL2: + case cJU_JPNULL3: return(0); +#ifdef JU_64BIT + case cJU_JPNULL4: + case cJU_JPNULL5: + case cJU_JPNULL6: + case cJU_JPNULL7: return(0); +#endif +#endif // notdef + + case cJU_JPBRANCH_L2: + case cJU_JPBRANCH_B2: + case cJU_JPBRANCH_U2: return(JU_JPBRANCH_POP0(Pjp,2) + 1); + + case cJU_JPBRANCH_L3: + case cJU_JPBRANCH_B3: + case cJU_JPBRANCH_U3: return(JU_JPBRANCH_POP0(Pjp,3) + 1); + +#ifdef JU_64BIT + case cJU_JPBRANCH_L4: + case cJU_JPBRANCH_B4: + case cJU_JPBRANCH_U4: return(JU_JPBRANCH_POP0(Pjp,4) + 1); + + case cJU_JPBRANCH_L5: + case cJU_JPBRANCH_B5: + case cJU_JPBRANCH_U5: return(JU_JPBRANCH_POP0(Pjp,5) + 1); + + case cJU_JPBRANCH_L6: + case cJU_JPBRANCH_B6: + case cJU_JPBRANCH_U6: return(JU_JPBRANCH_POP0(Pjp,6) + 1); + + case cJU_JPBRANCH_L7: + case cJU_JPBRANCH_B7: + case cJU_JPBRANCH_U7: return(JU_JPBRANCH_POP0(Pjp,7) + 1); +#endif + +#if (defined(JUDYL) || (! defined(JU_64BIT))) + case cJU_JPLEAF1: +#endif + case cJU_JPLEAF2: + case cJU_JPLEAF3: +#ifdef JU_64BIT + case cJU_JPLEAF4: + case cJU_JPLEAF5: + case cJU_JPLEAF6: + case cJU_JPLEAF7: +#endif + case cJU_JPLEAF_B1: return(JU_JPLEAF_POP0(Pjp) + 1); + +#ifdef JUDY1 + case cJ1_JPFULLPOPU1: return(cJU_JPFULLPOPU1_POP0 + 1); +#endif + + case cJU_JPIMMED_1_01: + case cJU_JPIMMED_2_01: + case cJU_JPIMMED_3_01: return(1); +#ifdef JU_64BIT + case cJU_JPIMMED_4_01: + case cJU_JPIMMED_5_01: + case cJU_JPIMMED_6_01: + case cJU_JPIMMED_7_01: return(1); +#endif + + case cJU_JPIMMED_1_02: return(2); + case cJU_JPIMMED_1_03: return(3); +#if (defined(JUDY1) || defined(JU_64BIT)) + case cJU_JPIMMED_1_04: return(4); + case cJU_JPIMMED_1_05: return(5); + case cJU_JPIMMED_1_06: return(6); + case cJU_JPIMMED_1_07: return(7); +#endif +#if (defined(JUDY1) && defined(JU_64BIT)) + case cJ1_JPIMMED_1_08: return(8); + case cJ1_JPIMMED_1_09: return(9); + case cJ1_JPIMMED_1_10: return(10); + case cJ1_JPIMMED_1_11: return(11); + case cJ1_JPIMMED_1_12: return(12); + case cJ1_JPIMMED_1_13: return(13); + case cJ1_JPIMMED_1_14: return(14); + case cJ1_JPIMMED_1_15: return(15); +#endif + +#if (defined(JUDY1) || defined(JU_64BIT)) + case cJU_JPIMMED_2_02: return(2); + case cJU_JPIMMED_2_03: return(3); +#endif +#if (defined(JUDY1) && defined(JU_64BIT)) + case cJ1_JPIMMED_2_04: return(4); + case cJ1_JPIMMED_2_05: return(5); + case cJ1_JPIMMED_2_06: return(6); + case cJ1_JPIMMED_2_07: return(7); +#endif + +#if (defined(JUDY1) || defined(JU_64BIT)) + case cJU_JPIMMED_3_02: return(2); +#endif +#if (defined(JUDY1) && defined(JU_64BIT)) + case cJ1_JPIMMED_3_03: return(3); + case cJ1_JPIMMED_3_04: return(4); + case cJ1_JPIMMED_3_05: return(5); + + case cJ1_JPIMMED_4_02: return(2); + case cJ1_JPIMMED_4_03: return(3); + + case cJ1_JPIMMED_5_02: return(2); + case cJ1_JPIMMED_5_03: return(3); + + case cJ1_JPIMMED_6_02: return(2); + + case cJ1_JPIMMED_7_02: return(2); +#endif + + default: return(cJU_ALLONES); + } + + /*NOTREACHED*/ + +} // j__udyJPPop1() |