// Copyright (C) 2000 - 2002 Hewlett-Packard Company // // This program is free software; you can redistribute it and/or modify it // under the term of the GNU Lesser General Public License as published by the // Free Software Foundation; either version 2 of the License, or (at your // option) any later version. // // This program is distributed in the hope that it will be useful, but WITHOUT // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or // FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this program; if not, write to the Free Software Foundation, // Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // _________________ // @(#) $Revision: 4.78 $ $Source: /judy/src/JudyCommon/JudyCount.c $ // // Judy*Count() function for Judy1 and JudyL. // Compile with one of -DJUDY1 or -DJUDYL. // // Compile with -DNOSMARTJBB, -DNOSMARTJBU, and/or -DNOSMARTJLB to build a // version with cache line optimizations deleted, for testing. // // Compile with -DSMARTMETRICS to obtain global variables containing smart // cache line metrics. Note: Dont turn this on simultaneously for this file // and JudyByCount.c because they export the same globals. // // Judy*Count() returns the "count of Indexes" (inclusive) between the two // specified limits (Indexes). This code is remarkably fast. It traverses the // "Judy array" data structure. // // This count code is the GENERIC untuned version (minimum code size). It // might be possible to tuned to a specific architecture to be faster. // However, in real applications, with a modern machine, it is expected that // the instruction times will be swamped by cache line fills. // **************************************************************************** #if (! (defined(JUDY1) || defined(JUDYL))) #error: One of -DJUDY1 or -DJUDYL must be specified. #endif #ifdef JUDY1 #include "Judy1.h" #else #include "JudyL.h" #endif #include "JudyPrivate1L.h" // define a phoney that is for sure #define cJU_LEAFW cJU_JPIMMED_CAP // Avoid duplicate symbols since this file is multi-compiled: #ifdef SMARTMETRICS #ifdef JUDY1 Word_t jbb_upward = 0; // counts of directions taken: Word_t jbb_downward = 0; Word_t jbu_upward = 0; Word_t jbu_downward = 0; Word_t jlb_upward = 0; Word_t jlb_downward = 0; #else extern Word_t jbb_upward; extern Word_t jbb_downward; extern Word_t jbu_upward; extern Word_t jbu_downward; extern Word_t jlb_upward; extern Word_t jlb_downward; #endif #endif // FORWARD DECLARATIONS (prototypes): static Word_t j__udy1LCountSM(const Pjp_t Pjp, const Word_t Index, const Pjpm_t Pjpm); // Each of Judy1 and JudyL get their own private (static) version of this // function: static int j__udyCountLeafB1(const Pjll_t Pjll, const Word_t Pop1, const Word_t Index); // These functions are not static because they are exported to Judy*ByCount(): // // TBD: Should be made static for performance reasons? And thus duplicated? // // Note: There really are two different functions, but for convenience they // are referred to here with a generic name. #ifdef JUDY1 #define j__udyJPPop1 j__udy1JPPop1 #else #define j__udyJPPop1 j__udyLJPPop1 #endif Word_t j__udyJPPop1(const Pjp_t Pjp); // LOCAL ERROR HANDLING: // // The Judy*Count() functions are unusual because they return 0 instead of JERR // for an error. In this source file, define C_JERR for clarity. #define C_JERR 0 // **************************************************************************** // J U D Y 1 C O U N T // J U D Y L C O U N T // // See the manual entry for details. // // This code is written recursively, at least at first, because thats much // simpler; hope its fast enough. #ifdef JUDY1 FUNCTION Word_t Judy1Count #else FUNCTION Word_t JudyLCount #endif ( Pcvoid_t PArray, // JRP to first branch/leaf in SM. Word_t Index1, // starting Index. Word_t Index2, // ending Index. PJError_t PJError // optional, for returning error info. ) { jpm_t fakejpm; // local temporary for small arrays. Pjpm_t Pjpm; // top JPM or local temporary for error info. jp_t fakejp; // constructed for calling j__udy1LCountSM(). Pjp_t Pjp; // JP to pass to j__udy1LCountSM(). Word_t pop1; // total for the array. Word_t pop1above1; // indexes at or above Index1, inclusive. Word_t pop1above2; // indexes at or above Index2, exclusive. int retcode; // from Judy*First() calls. JUDYLCODE(PPvoid_t PPvalue); // from JudyLFirst() calls. // CHECK FOR SHORTCUTS: // // As documented, return C_JERR if the Judy array is empty or Index1 > Index2. if ((PArray == (Pvoid_t) NULL) || (Index1 > Index2)) { JU_SET_ERRNO(PJError, JU_ERRNO_NONE); return(C_JERR); } // If Index1 == Index2, simply check if the specified Index is set; pass // through the return value from Judy1Test() or JudyLGet() with appropriate // translations. if (Index1 == Index2) { #ifdef JUDY1 retcode = Judy1Test(PArray, Index1, PJError); if (retcode == JERRI) return(C_JERR); // pass through error. if (retcode == 0) { JU_SET_ERRNO(PJError, JU_ERRNO_NONE); return(C_JERR); } #else PPvalue = JudyLGet(PArray, Index1, PJError); if (PPvalue == PPJERR) return(C_JERR); // pass through error. if (PPvalue == (PPvoid_t) NULL) // Index is not set. { JU_SET_ERRNO(PJError, JU_ERRNO_NONE); return(C_JERR); } #endif return(1); // single index is set. } // CHECK JRP TYPE: // // Use an if/then for speed rather than a switch, and put the most common cases // first. // // Note: Since even cJU_LEAFW types require counting between two Indexes, // prepare them here for common code below that calls j__udy1LCountSM(), rather // than handling them even more specially here. if (JU_LEAFW_POP0(PArray) < cJU_LEAFW_MAXPOP1) // must be a LEAFW { Pjlw_t Pjlw = P_JLW(PArray); // first word of leaf. Pjpm = & fakejpm; Pjp = & fakejp; Pjp->jp_Addr = (Word_t) Pjlw; Pjp->jp_Type = cJU_LEAFW; Pjpm->jpm_Pop0 = Pjlw[0]; // from first word of leaf. pop1 = Pjpm->jpm_Pop0 + 1; } else { Pjpm = P_JPM(PArray); Pjp = &(Pjpm->jpm_JP); pop1 = (Pjpm->jpm_Pop0) + 1; // note: can roll over to 0. #if (defined(JUDY1) && (! defined(JU_64BIT))) if (pop1 == 0) // rare special case of full array: { Word_t count = Index2 - Index1 + 1; // can roll over again. if (count == 0) { JU_SET_ERRNO(PJError, JU_ERRNO_FULL); return(C_JERR); } return(count); } #else assert(pop1); // JudyL or 64-bit cannot create a full array! #endif } // COUNT POP1 ABOVE INDEX1, INCLUSIVE: assert(pop1); // just to be safe. if (Index1 == 0) // shortcut, pop1above1 is entire population: { pop1above1 = pop1; } else // find first valid Index above Index1, if any: { #ifdef JUDY1 if ((retcode = Judy1First(PArray, & Index1, PJError)) == JERRI) return(C_JERR); // pass through error. #else if ((PPvalue = JudyLFirst(PArray, & Index1, PJError)) == PPJERR) return(C_JERR); // pass through error. retcode = (PPvalue != (PPvoid_t) NULL); // found a next Index. #endif // If theres no Index at or above Index1, just return C_JERR (early exit): if (retcode == 0) { JU_SET_ERRNO(PJError, JU_ERRNO_NONE); return(C_JERR); } // If a first/next Index was found, call the counting motor starting with that // known valid Index, meaning the return should be positive, not C_JERR except // in case of a real error: if ((pop1above1 = j__udy1LCountSM(Pjp, Index1, Pjpm)) == C_JERR) { JU_COPY_ERRNO(PJError, Pjpm); // pass through error. return(C_JERR); } } // COUNT POP1 ABOVE INDEX2, EXCLUSIVE, AND RETURN THE DIFFERENCE: // // In principle, calculate the ordinal of each Index and take the difference, // with caution about off-by-one errors due to the specified Indexes being set // or unset. In practice: // // - The ordinals computed here are inverse ordinals, that is, the populations // ABOVE the specified Indexes (Index1 inclusive, Index2 exclusive), so // subtract pop1above2 from pop1above1, rather than vice-versa. // // - Index1s result already includes a count for Index1 and/or Index2 if // either is set, so calculate pop1above2 exclusive of Index2. // // TBD: If Index1 and Index2 fall in the same expanse in the top-state // branch(es), would it be faster to walk the SM only once, to their divergence // point, before calling j__udy1LCountSM() or equivalent? Possibly a non-issue // if a top-state pop1 becomes stored with each Judy1 array. Also, consider // whether the first call of j__udy1LCountSM() fills the cache, for common tree // branches, for the second call. // // As for pop1above1, look for shortcuts for special cases when pop1above2 is // zero. Otherwise call the counting "motor". assert(pop1above1); // just to be safe. if (Index2++ == cJU_ALLONES) return(pop1above1); // Index2 at limit. #ifdef JUDY1 if ((retcode = Judy1First(PArray, & Index2, PJError)) == JERRI) return(C_JERR); #else if ((PPvalue = JudyLFirst(PArray, & Index2, PJError)) == PPJERR) return(C_JERR); retcode = (PPvalue != (PPvoid_t) NULL); // found a next Index. #endif if (retcode == 0) return(pop1above1); // no Index above Index2. // Just as for Index1, j__udy1LCountSM() cannot return 0 (locally == C_JERR) // except in case of a real error: if ((pop1above2 = j__udy1LCountSM(Pjp, Index2, Pjpm)) == C_JERR) { JU_COPY_ERRNO(PJError, Pjpm); // pass through error. return(C_JERR); } if (pop1above1 == pop1above2) { JU_SET_ERRNO(PJError, JU_ERRNO_NONE); return(C_JERR); } return(pop1above1 - pop1above2); } // Judy1Count() / JudyLCount() // **************************************************************************** // __ J U D Y 1 L C O U N T S M // // Given a pointer to a JP (with invalid jp_DcdPopO at cJU_ROOTSTATE), a known // valid Index, and a Pjpm for returning error info, recursively visit a Judy // array state machine (SM) and return the count of Indexes, including Index, // through the end of the Judy array at this state or below. In case of error // or a count of 0 (should never happen), return C_JERR with appropriate // JU_ERRNO in the Pjpm. // // Note: This function is not told the current state because its encoded in // the JP Type. // // Method: To minimize cache line fills, while studying each branch, if Index // resides above the midpoint of the branch (which often consists of multiple // cache lines), ADD the populations at or above Index; otherwise, SUBTRACT // from the population of the WHOLE branch (available from the JP) the // populations at or above Index. This is especially tricky for bitmap // branches. // // Note: Unlike, say, the Ins and Del walk routines, this function returns the // same type of returns as Judy*Count(), so it can use *_SET_ERRNO*() macros // the same way. FUNCTION static Word_t j__udy1LCountSM( const Pjp_t Pjp, // top of Judy (sub)SM. const Word_t Index, // count at or above this Index. const Pjpm_t Pjpm) // for returning error info. { Pjbl_t Pjbl; // Pjp->jp_Addr masked and cast to types: Pjbb_t Pjbb; Pjbu_t Pjbu; Pjll_t Pjll; // a Judy lower-level linear leaf. Word_t digit; // next digit to decode from Index. long jpnum; // JP number in a branch (base 0). int offset; // index ordinal within a leaf, base 0. Word_t pop1; // total population of an expanse. Word_t pop1above; // to return. // Common code to check Decode bits in a JP against the equivalent portion of // Index; XOR together, then mask bits of interest; must be all 0: // // Note: Why does this code only assert() compliance rather than actively // checking for outliers? Its because Index is supposed to be valid, hence // always match any Dcd bits traversed. // // Note: This assertion turns out to be always true for cState = 3 on 32-bit // and 7 on 64-bit, but its harmless, probably removed by the compiler. #define CHECKDCD(Pjp,cState) \ assert(! JU_DCDNOTMATCHINDEX(Index, Pjp, cState)) // Common code to prepare to handle a root-level or lower-level branch: // Extract a state-dependent digit from Index in a "constant" way, obtain the // total population for the branch in a state-dependent way, and then branch to // common code for multiple cases: // // For root-level branches, the state is always cJU_ROOTSTATE, and the // population is received in Pjpm->jpm_Pop0. // // Note: The total population is only needed in cases where the common code // "counts up" instead of down to minimize cache line fills. However, its // available cheaply, and its better to do it with a constant shift (constant // state value) instead of a variable shift later "when needed". #define PREPB_ROOT(Pjp,Next) \ digit = JU_DIGITATSTATE(Index, cJU_ROOTSTATE); \ pop1 = (Pjpm->jpm_Pop0) + 1; \ goto Next #define PREPB(Pjp,cState,Next) \ digit = JU_DIGITATSTATE(Index, cState); \ pop1 = JU_JPBRANCH_POP0(Pjp, (cState)) + 1; \ goto Next // SWITCH ON JP TYPE: // // WARNING: For run-time efficiency the following cases replicate code with // varying constants, rather than using common code with variable values! switch (JU_JPTYPE(Pjp)) { // ---------------------------------------------------------------------------- // ROOT-STATE LEAF that starts with a Pop0 word; just count within the leaf: case cJU_LEAFW: { Pjlw_t Pjlw = P_JLW(Pjp->jp_Addr); // first word of leaf. assert((Pjpm->jpm_Pop0) + 1 == Pjlw[0] + 1); // sent correctly. offset = j__udySearchLeafW(Pjlw + 1, Pjpm->jpm_Pop0 + 1, Index); assert(offset >= 0); // Index must exist. assert(offset < (Pjpm->jpm_Pop0) + 1); // Index be in range. return((Pjpm->jpm_Pop0) + 1 - offset); // INCLUSIVE of Index. } // ---------------------------------------------------------------------------- // LINEAR BRANCH; count populations in JPs in the JBL ABOVE the next digit in // Index, and recurse for the next digit in Index: // // Note: There are no null JPs in a JBL; watch out for pop1 == 0. // // Note: A JBL should always fit in one cache line => no need to count up // versus down to save cache line fills. (PREPB() sets pop1 for no reason.) case cJU_JPBRANCH_L2: CHECKDCD(Pjp, 2); PREPB(Pjp, 2, BranchL); case cJU_JPBRANCH_L3: CHECKDCD(Pjp, 3); PREPB(Pjp, 3, BranchL); #ifdef JU_64BIT case cJU_JPBRANCH_L4: CHECKDCD(Pjp, 4); PREPB(Pjp, 4, BranchL); case cJU_JPBRANCH_L5: CHECKDCD(Pjp, 5); PREPB(Pjp, 5, BranchL); case cJU_JPBRANCH_L6: CHECKDCD(Pjp, 6); PREPB(Pjp, 6, BranchL); case cJU_JPBRANCH_L7: CHECKDCD(Pjp, 7); PREPB(Pjp, 7, BranchL); #endif case cJU_JPBRANCH_L: PREPB_ROOT(Pjp, BranchL); // Common code (state-independent) for all cases of linear branches: BranchL: Pjbl = P_JBL(Pjp->jp_Addr); jpnum = Pjbl->jbl_NumJPs; // above last JP. pop1above = 0; while (digit < (Pjbl->jbl_Expanse[--jpnum])) // still ABOVE digit. { if ((pop1 = j__udyJPPop1((Pjbl->jbl_jp) + jpnum)) == cJU_ALLONES) { JU_SET_ERRNO_NONNULL(Pjpm, JU_ERRNO_CORRUPT); return(C_JERR); } pop1above += pop1; assert(jpnum > 0); // should find digit. } assert(digit == (Pjbl->jbl_Expanse[jpnum])); // should find digit. pop1 = j__udy1LCountSM((Pjbl->jbl_jp) + jpnum, Index, Pjpm); if (pop1 == C_JERR) return(C_JERR); // pass error up. assert(pop1above + pop1); return(pop1above + pop1); // ---------------------------------------------------------------------------- // BITMAP BRANCH; count populations in JPs in the JBB ABOVE the next digit in // Index, and recurse for the next digit in Index: // // Note: There are no null JPs in a JBB; watch out for pop1 == 0. case cJU_JPBRANCH_B2: CHECKDCD(Pjp, 2); PREPB(Pjp, 2, BranchB); case cJU_JPBRANCH_B3: CHECKDCD(Pjp, 3); PREPB(Pjp, 3, BranchB); #ifdef JU_64BIT case cJU_JPBRANCH_B4: CHECKDCD(Pjp, 4); PREPB(Pjp, 4, BranchB); case cJU_JPBRANCH_B5: CHECKDCD(Pjp, 5); PREPB(Pjp, 5, BranchB); case cJU_JPBRANCH_B6: CHECKDCD(Pjp, 6); PREPB(Pjp, 6, BranchB); case cJU_JPBRANCH_B7: CHECKDCD(Pjp, 7); PREPB(Pjp, 7, BranchB); #endif case cJU_JPBRANCH_B: PREPB_ROOT(Pjp, BranchB); // Common code (state-independent) for all cases of bitmap branches: BranchB: { long subexp; // for stepping through layer 1 (subexpanses). long findsub; // subexpanse containing Index (digit). Word_t findbit; // bit representing Index (digit). Word_t lowermask; // bits for indexes at or below Index. Word_t jpcount; // JPs in a subexpanse. Word_t clbelow; // cache lines below digits cache line. Word_t clabove; // cache lines above digits cache line. Pjbb = P_JBB(Pjp->jp_Addr); findsub = digit / cJU_BITSPERSUBEXPB; findbit = digit % cJU_BITSPERSUBEXPB; lowermask = JU_MASKLOWERINC(JU_BITPOSMASKB(findbit)); clbelow = clabove = 0; // initial/default => always downward. assert(JU_BITMAPTESTB(Pjbb, digit)); // digit must have a JP. assert(findsub < cJU_NUMSUBEXPB); // falls in expected range. // Shorthand for one subexpanse in a bitmap and for one JP in a bitmap branch: // // Note: BMPJP0 exists separately to support assertions. #define BMPJP0(Subexp) (P_JP(JU_JBB_PJP(Pjbb, Subexp))) #define BMPJP(Subexp,JPnum) (BMPJP0(Subexp) + (JPnum)) #ifndef NOSMARTJBB // enable to turn off smart code for comparison purposes. // FIGURE OUT WHICH DIRECTION CAUSES FEWER CACHE LINE FILLS; adding the pop1s // in JPs above Indexs JP, or subtracting the pop1s in JPs below Indexs JP. // // This is tricky because, while each set bit in the bitmap represents a JP, // the JPs are scattered over cJU_NUMSUBEXPB subexpanses, each of which can // contain JPs packed into multiple cache lines, and this code must visit every // JP either BELOW or ABOVE the JP for Index. // // Number of cache lines required to hold a linear list of the given number of // JPs, assuming the first JP is at the start of a cache line or the JPs in // jpcount fit wholly within a single cache line, which is ensured by // JudyMalloc(): #define CLPERJPS(jpcount) \ ((((jpcount) * cJU_WORDSPERJP) + cJU_WORDSPERCL - 1) / cJU_WORDSPERCL) // Count cache lines below/above for each subexpanse: for (subexp = 0; subexp < cJU_NUMSUBEXPB; ++subexp) { jpcount = j__udyCountBitsB(JU_JBB_BITMAP(Pjbb, subexp)); // When at the subexpanse containing Index (digit), add cache lines // below/above appropriately, excluding the cache line containing the JP for // Index itself: if (subexp < findsub) clbelow += CLPERJPS(jpcount); else if (subexp > findsub) clabove += CLPERJPS(jpcount); else // (subexp == findsub) { Word_t clfind; // cache line containing Index (digit). clfind = CLPERJPS(j__udyCountBitsB( JU_JBB_BITMAP(Pjbb, subexp) & lowermask)); assert(clfind > 0); // digit itself should have 1 CL. clbelow += clfind - 1; clabove += CLPERJPS(jpcount) - clfind; } } #endif // ! NOSMARTJBB // Note: Its impossible to get through the following "if" without setting // jpnum -- see some of the assertions below -- but gcc -Wall doesnt know // this, so preset jpnum to make it happy: jpnum = 0; // COUNT POPULATION FOR A BITMAP BRANCH, in whichever direction should result // in fewer cache line fills: // // Note: If the remainder of Index is zero, pop1above is the pop1 of the // entire expanse and theres no point in recursing to lower levels; but this // should be so rare that its not worth checking for; // Judy1Count()/JudyLCount() never even calls the motor for Index == 0 (all // bytes). // COUNT UPWARD, subtracting each "below or at" JPs pop1 from the whole // expanses pop1: // // Note: If this causes clbelow + 1 cache line fills including JPs cache // line, thats OK; at worst this is the same as clabove. if (clbelow < clabove) { #ifdef SMARTMETRICS ++jbb_upward; #endif pop1above = pop1; // subtract JPs at/below Index. // Count JPs for which to accrue pop1s in this subexpanse: // // TBD: If JU_JBB_BITMAP is cJU_FULLBITMAPB, dont bother counting. for (subexp = 0; subexp <= findsub; ++subexp) { jpcount = j__udyCountBitsB((subexp < findsub) ? JU_JBB_BITMAP(Pjbb, subexp) : JU_JBB_BITMAP(Pjbb, subexp) & lowermask); // should always find findbit: assert((subexp < findsub) || jpcount); // Subtract pop1s from JPs BELOW OR AT Index (digit): // // Note: The pop1 for Indexs JP itself is partially added back later at a // lower state. // // Note: An empty subexpanse (jpcount == 0) is handled "for free". // // Note: Must be null JP subexp pointer in empty subexpanse and non-empty in // non-empty subexpanse: assert( jpcount || (BMPJP0(subexp) == (Pjp_t) NULL)); assert((! jpcount) || (BMPJP0(subexp) != (Pjp_t) NULL)); for (jpnum = 0; jpnum < jpcount; ++jpnum) { if ((pop1 = j__udyJPPop1(BMPJP(subexp, jpnum))) == cJU_ALLONES) { JU_SET_ERRNO_NONNULL(Pjpm, JU_ERRNO_CORRUPT); return(C_JERR); } pop1above -= pop1; } jpnum = jpcount - 1; // make correct for digit. } } // COUNT DOWNWARD, adding each "above" JPs pop1: else { long jpcountbf; // below findbit, inclusive. #ifdef SMARTMETRICS ++jbb_downward; #endif pop1above = 0; // add JPs above Index. jpcountbf = 0; // until subexp == findsub. // Count JPs for which to accrue pop1s in this subexpanse: // // This is more complicated than counting upward because the scan of digits // subexpanse must count ALL JPs, to know where to START counting down, and // ALSO note the offset of digits JP to know where to STOP counting down. for (subexp = cJU_NUMSUBEXPB - 1; subexp >= findsub; --subexp) { jpcount = j__udyCountBitsB(JU_JBB_BITMAP(Pjbb, subexp)); // should always find findbit: assert((subexp > findsub) || jpcount); if (! jpcount) continue; // empty subexpanse, save time. // Count JPs below digit, inclusive: if (subexp == findsub) { jpcountbf = j__udyCountBitsB(JU_JBB_BITMAP(Pjbb, subexp) & lowermask); } // should always find findbit: assert((subexp > findsub) || jpcountbf); assert(jpcount >= jpcountbf); // proper relationship. // Add pop1s from JPs ABOVE Index (digit): // no null JP subexp pointers: assert(BMPJP0(subexp) != (Pjp_t) NULL); for (jpnum = jpcount - 1; jpnum >= jpcountbf; --jpnum) { if ((pop1 = j__udyJPPop1(BMPJP(subexp, jpnum))) == cJU_ALLONES) { JU_SET_ERRNO_NONNULL(Pjpm, JU_ERRNO_CORRUPT); return(C_JERR); } pop1above += pop1; } // jpnum is now correct for digit. } } // else. // Return the net population ABOVE the digits JP at this state (in this JBB) // plus the population AT OR ABOVE Index in the SM under the digits JP: pop1 = j__udy1LCountSM(BMPJP(findsub, jpnum), Index, Pjpm); if (pop1 == C_JERR) return(C_JERR); // pass error up. assert(pop1above + pop1); return(pop1above + pop1); } // case. // ---------------------------------------------------------------------------- // UNCOMPRESSED BRANCH; count populations in JPs in the JBU ABOVE the next // digit in Index, and recurse for the next digit in Index: // // Note: If the remainder of Index is zero, pop1above is the pop1 of the // entire expanse and theres no point in recursing to lower levels; but this // should be so rare that its not worth checking for; // Judy1Count()/JudyLCount() never even calls the motor for Index == 0 (all // bytes). case cJU_JPBRANCH_U2: CHECKDCD(Pjp, 2); PREPB(Pjp, 2, BranchU); case cJU_JPBRANCH_U3: CHECKDCD(Pjp, 3); PREPB(Pjp, 3, BranchU); #ifdef JU_64BIT case cJU_JPBRANCH_U4: CHECKDCD(Pjp, 4); PREPB(Pjp, 4, BranchU); case cJU_JPBRANCH_U5: CHECKDCD(Pjp, 5); PREPB(Pjp, 5, BranchU); case cJU_JPBRANCH_U6: CHECKDCD(Pjp, 6); PREPB(Pjp, 6, BranchU); case cJU_JPBRANCH_U7: CHECKDCD(Pjp, 7); PREPB(Pjp, 7, BranchU); #endif case cJU_JPBRANCH_U: PREPB_ROOT(Pjp, BranchU); // Common code (state-independent) for all cases of uncompressed branches: BranchU: Pjbu = P_JBU(Pjp->jp_Addr); #ifndef NOSMARTJBU // enable to turn off smart code for comparison purposes. // FIGURE OUT WHICH WAY CAUSES FEWER CACHE LINE FILLS; adding the JPs above // Indexs JP, or subtracting the JPs below Indexs JP. // // COUNT UPWARD, subtracting the pop1 of each JP BELOW OR AT Index, from the // whole expanses pop1: if (digit < (cJU_BRANCHUNUMJPS / 2)) { pop1above = pop1; // subtract JPs below Index. #ifdef SMARTMETRICS ++jbu_upward; #endif for (jpnum = 0; jpnum <= digit; ++jpnum) { if ((Pjbu->jbu_jp[jpnum].jp_Type) <= cJU_JPNULLMAX) continue; // shortcut, save a function call. if ((pop1 = j__udyJPPop1(Pjbu->jbu_jp + jpnum)) == cJU_ALLONES) { JU_SET_ERRNO_NONNULL(Pjpm, JU_ERRNO_CORRUPT); return(C_JERR); } pop1above -= pop1; } } // COUNT DOWNWARD, simply adding the pop1 of each JP ABOVE Index: else #endif // NOSMARTJBU { assert(digit < cJU_BRANCHUNUMJPS); #ifdef SMARTMETRICS ++jbu_downward; #endif pop1above = 0; // add JPs above Index. for (jpnum = cJU_BRANCHUNUMJPS - 1; jpnum > digit; --jpnum) { if ((Pjbu->jbu_jp[jpnum].jp_Type) <= cJU_JPNULLMAX) continue; // shortcut, save a function call. if ((pop1 = j__udyJPPop1(Pjbu->jbu_jp + jpnum)) == cJU_ALLONES) { JU_SET_ERRNO_NONNULL(Pjpm, JU_ERRNO_CORRUPT); return(C_JERR); } pop1above += pop1; } } if ((pop1 = j__udy1LCountSM(Pjbu->jbu_jp + digit, Index, Pjpm)) == C_JERR) return(C_JERR); // pass error up. assert(pop1above + pop1); return(pop1above + pop1); // ---------------------------------------------------------------------------- // LEAF COUNT MACROS: // // LEAF*ABOVE() are common code for different JP types (linear leaves, bitmap // leaves, and immediates) and different leaf Index Sizes, which result in // calling different leaf search functions. Linear leaves get the leaf address // from jp_Addr and the Population from jp_DcdPopO, while immediates use Pjp // itself as the leaf address and get Population from jp_Type. #define LEAFLABOVE(Func) \ Pjll = P_JLL(Pjp->jp_Addr); \ pop1 = JU_JPLEAF_POP0(Pjp) + 1; \ LEAFABOVE(Func, Pjll, pop1) #define LEAFB1ABOVE(Func) LEAFLABOVE(Func) // different Func, otherwise same. #ifdef JUDY1 #define IMMABOVE(Func,Pop1) \ Pjll = (Pjll_t) Pjp; \ LEAFABOVE(Func, Pjll, Pop1) #else // Note: For JudyL immediates with >= 2 Indexes, the index bytes are in a // different place than for Judy1: #define IMMABOVE(Func,Pop1) \ LEAFABOVE(Func, (Pjll_t) (Pjp->jp_LIndex), Pop1) #endif // For all leaf types, the population AT OR ABOVE is the total pop1 less the // offset of Index; and Index should always be found: #define LEAFABOVE(Func,Pjll,Pop1) \ offset = Func(Pjll, Pop1, Index); \ assert(offset >= 0); \ assert(offset < (Pop1)); \ return((Pop1) - offset) // IMMABOVE_01 handles the special case of an immediate JP with 1 index, which // the search functions arent used for anyway: // // The target Index should be the one in this Immediate, in which case the // count above (inclusive) is always 1. #define IMMABOVE_01 \ assert((JU_JPDCDPOP0(Pjp)) == JU_TRIMTODCDSIZE(Index)); \ return(1) // ---------------------------------------------------------------------------- // LINEAR LEAF; search the leaf for Index; size is computed from jp_Type: #if (defined(JUDYL) || (! defined(JU_64BIT))) case cJU_JPLEAF1: LEAFLABOVE(j__udySearchLeaf1); #endif case cJU_JPLEAF2: LEAFLABOVE(j__udySearchLeaf2); case cJU_JPLEAF3: LEAFLABOVE(j__udySearchLeaf3); #ifdef JU_64BIT case cJU_JPLEAF4: LEAFLABOVE(j__udySearchLeaf4); case cJU_JPLEAF5: LEAFLABOVE(j__udySearchLeaf5); case cJU_JPLEAF6: LEAFLABOVE(j__udySearchLeaf6); case cJU_JPLEAF7: LEAFLABOVE(j__udySearchLeaf7); #endif // ---------------------------------------------------------------------------- // BITMAP LEAF; search the leaf for Index: // // Since the bitmap describes Indexes digitally rather than linearly, this is // not really a search, but just a count. case cJU_JPLEAF_B1: LEAFB1ABOVE(j__udyCountLeafB1); #ifdef JUDY1 // ---------------------------------------------------------------------------- // FULL POPULATION: // // Return the count of Indexes AT OR ABOVE Index, which is the total population // of the expanse (a constant) less the value of the undecoded digit remaining // in Index (its base-0 offset in the expanse), which yields an inclusive count // above. // // TBD: This only supports a 1-byte full expanse. Should this extract a // stored value for pop0 and possibly more LSBs of Index, to handle larger full // expanses? case cJ1_JPFULLPOPU1: return(cJU_JPFULLPOPU1_POP0 + 1 - JU_DIGITATSTATE(Index, 1)); #endif // ---------------------------------------------------------------------------- // IMMEDIATE: case cJU_JPIMMED_1_01: IMMABOVE_01; case cJU_JPIMMED_2_01: IMMABOVE_01; case cJU_JPIMMED_3_01: IMMABOVE_01; #ifdef JU_64BIT case cJU_JPIMMED_4_01: IMMABOVE_01; case cJU_JPIMMED_5_01: IMMABOVE_01; case cJU_JPIMMED_6_01: IMMABOVE_01; case cJU_JPIMMED_7_01: IMMABOVE_01; #endif case cJU_JPIMMED_1_02: IMMABOVE(j__udySearchLeaf1, 2); case cJU_JPIMMED_1_03: IMMABOVE(j__udySearchLeaf1, 3); #if (defined(JUDY1) || defined(JU_64BIT)) case cJU_JPIMMED_1_04: IMMABOVE(j__udySearchLeaf1, 4); case cJU_JPIMMED_1_05: IMMABOVE(j__udySearchLeaf1, 5); case cJU_JPIMMED_1_06: IMMABOVE(j__udySearchLeaf1, 6); case cJU_JPIMMED_1_07: IMMABOVE(j__udySearchLeaf1, 7); #endif #if (defined(JUDY1) && defined(JU_64BIT)) case cJ1_JPIMMED_1_08: IMMABOVE(j__udySearchLeaf1, 8); case cJ1_JPIMMED_1_09: IMMABOVE(j__udySearchLeaf1, 9); case cJ1_JPIMMED_1_10: IMMABOVE(j__udySearchLeaf1, 10); case cJ1_JPIMMED_1_11: IMMABOVE(j__udySearchLeaf1, 11); case cJ1_JPIMMED_1_12: IMMABOVE(j__udySearchLeaf1, 12); case cJ1_JPIMMED_1_13: IMMABOVE(j__udySearchLeaf1, 13); case cJ1_JPIMMED_1_14: IMMABOVE(j__udySearchLeaf1, 14); case cJ1_JPIMMED_1_15: IMMABOVE(j__udySearchLeaf1, 15); #endif #if (defined(JUDY1) || defined(JU_64BIT)) case cJU_JPIMMED_2_02: IMMABOVE(j__udySearchLeaf2, 2); case cJU_JPIMMED_2_03: IMMABOVE(j__udySearchLeaf2, 3); #endif #if (defined(JUDY1) && defined(JU_64BIT)) case cJ1_JPIMMED_2_04: IMMABOVE(j__udySearchLeaf2, 4); case cJ1_JPIMMED_2_05: IMMABOVE(j__udySearchLeaf2, 5); case cJ1_JPIMMED_2_06: IMMABOVE(j__udySearchLeaf2, 6); case cJ1_JPIMMED_2_07: IMMABOVE(j__udySearchLeaf2, 7); #endif #if (defined(JUDY1) || defined(JU_64BIT)) case cJU_JPIMMED_3_02: IMMABOVE(j__udySearchLeaf3, 2); #endif #if (defined(JUDY1) && defined(JU_64BIT)) case cJ1_JPIMMED_3_03: IMMABOVE(j__udySearchLeaf3, 3); case cJ1_JPIMMED_3_04: IMMABOVE(j__udySearchLeaf3, 4); case cJ1_JPIMMED_3_05: IMMABOVE(j__udySearchLeaf3, 5); case cJ1_JPIMMED_4_02: IMMABOVE(j__udySearchLeaf4, 2); case cJ1_JPIMMED_4_03: IMMABOVE(j__udySearchLeaf4, 3); case cJ1_JPIMMED_5_02: IMMABOVE(j__udySearchLeaf5, 2); case cJ1_JPIMMED_5_03: IMMABOVE(j__udySearchLeaf5, 3); case cJ1_JPIMMED_6_02: IMMABOVE(j__udySearchLeaf6, 2); case cJ1_JPIMMED_7_02: IMMABOVE(j__udySearchLeaf7, 2); #endif // ---------------------------------------------------------------------------- // OTHER CASES: default: JU_SET_ERRNO_NONNULL(Pjpm, JU_ERRNO_CORRUPT); return(C_JERR); } // switch on JP type /*NOTREACHED*/ } // j__udy1LCountSM() // **************************************************************************** // J U D Y C O U N T L E A F B 1 // // This is a private analog of the j__udySearchLeaf*() functions for counting // in bitmap 1-byte leaves. Since a bitmap leaf describes Indexes digitally // rather than linearly, this is not really a search, but just a count of the // valid Indexes == set bits below or including Index, which should be valid. // Return the "offset" (really the ordinal), 0 .. Pop1 - 1, of Index in Pjll; // if Indexs bit is not set (which should never happen, so this is DEBUG-mode // only), return the 1s-complement equivalent (== negative offset minus 1). // // Note: The source code for this function looks identical for both Judy1 and // JudyL, but the JU_JLB_BITMAP macro varies. // // Note: For simpler calling, the first arg is of type Pjll_t but then cast to // Pjlb_t. FUNCTION static int j__udyCountLeafB1( const Pjll_t Pjll, // bitmap leaf, as Pjll_t for consistency. const Word_t Pop1, // Population of whole leaf. const Word_t Index) // to which to count. { Pjlb_t Pjlb = (Pjlb_t) Pjll; // to proper type. Word_t digit = Index & cJU_MASKATSTATE(1); Word_t findsub = digit / cJU_BITSPERSUBEXPL; Word_t findbit = digit % cJU_BITSPERSUBEXPL; int count; // in leaf through Index. long subexp; // for stepping through subexpanses. // COUNT UPWARD: // // The entire bitmap should fit in one cache line, but still try to save some // CPU time by counting the fewest possible number of subexpanses from the // bitmap. #ifndef NOSMARTJLB // enable to turn off smart code for comparison purposes. if (findsub < (cJU_NUMSUBEXPL / 2)) { #ifdef SMARTMETRICS ++jlb_upward; #endif count = 0; for (subexp = 0; subexp < findsub; ++subexp) { count += ((JU_JLB_BITMAP(Pjlb, subexp) == cJU_FULLBITMAPL) ? cJU_BITSPERSUBEXPL : j__udyCountBitsL(JU_JLB_BITMAP(Pjlb, subexp))); } // This count includes findbit, which should be set, resulting in a base-1 // offset: count += j__udyCountBitsL(JU_JLB_BITMAP(Pjlb, findsub) & JU_MASKLOWERINC(JU_BITPOSMASKL(findbit))); DBGCODE(if (! JU_BITMAPTESTL(Pjlb, digit)) return(~count);) assert(count >= 1); return(count - 1); // convert to base-0 offset. } #endif // NOSMARTJLB // COUNT DOWNWARD: // // Count the valid Indexes above or at Index, and subtract from Pop1. #ifdef SMARTMETRICS ++jlb_downward; #endif count = Pop1; // base-1 for now. for (subexp = cJU_NUMSUBEXPL - 1; subexp > findsub; --subexp) { count -= ((JU_JLB_BITMAP(Pjlb, subexp) == cJU_FULLBITMAPL) ? cJU_BITSPERSUBEXPL : j__udyCountBitsL(JU_JLB_BITMAP(Pjlb, subexp))); } // This count includes findbit, which should be set, resulting in a base-0 // offset: count -= j__udyCountBitsL(JU_JLB_BITMAP(Pjlb, findsub) & JU_MASKHIGHERINC(JU_BITPOSMASKL(findbit))); DBGCODE(if (! JU_BITMAPTESTL(Pjlb, digit)) return(~count);) assert(count >= 0); // should find Index itself. return(count); // is already a base-0 offset. } // j__udyCountLeafB1() // **************************************************************************** // J U D Y J P P O P 1 // // This function takes any type of JP other than a root-level JP (cJU_LEAFW* or // cJU_JPBRANCH* with no number suffix) and extracts the Pop1 from it. In some // sense this is a wrapper around the JU_JP*_POP0 macros. Why write it as a // function instead of a complex macro containing a trinary? (See version // Judy1.h version 4.17.) We think its cheaper to call a function containing // a switch statement with "constant" cases than to do the variable // calculations in a trinary. // // For invalid JP Types return cJU_ALLONES. Note that this is an impossibly // high Pop1 for any JP below a top level branch. FUNCTION Word_t j__udyJPPop1( const Pjp_t Pjp) // JP to count. { switch (JU_JPTYPE(Pjp)) { #ifdef notdef // caller should shortcut and not even call with these: case cJU_JPNULL1: case cJU_JPNULL2: case cJU_JPNULL3: return(0); #ifdef JU_64BIT case cJU_JPNULL4: case cJU_JPNULL5: case cJU_JPNULL6: case cJU_JPNULL7: return(0); #endif #endif // notdef case cJU_JPBRANCH_L2: case cJU_JPBRANCH_B2: case cJU_JPBRANCH_U2: return(JU_JPBRANCH_POP0(Pjp,2) + 1); case cJU_JPBRANCH_L3: case cJU_JPBRANCH_B3: case cJU_JPBRANCH_U3: return(JU_JPBRANCH_POP0(Pjp,3) + 1); #ifdef JU_64BIT case cJU_JPBRANCH_L4: case cJU_JPBRANCH_B4: case cJU_JPBRANCH_U4: return(JU_JPBRANCH_POP0(Pjp,4) + 1); case cJU_JPBRANCH_L5: case cJU_JPBRANCH_B5: case cJU_JPBRANCH_U5: return(JU_JPBRANCH_POP0(Pjp,5) + 1); case cJU_JPBRANCH_L6: case cJU_JPBRANCH_B6: case cJU_JPBRANCH_U6: return(JU_JPBRANCH_POP0(Pjp,6) + 1); case cJU_JPBRANCH_L7: case cJU_JPBRANCH_B7: case cJU_JPBRANCH_U7: return(JU_JPBRANCH_POP0(Pjp,7) + 1); #endif #if (defined(JUDYL) || (! defined(JU_64BIT))) case cJU_JPLEAF1: #endif case cJU_JPLEAF2: case cJU_JPLEAF3: #ifdef JU_64BIT case cJU_JPLEAF4: case cJU_JPLEAF5: case cJU_JPLEAF6: case cJU_JPLEAF7: #endif case cJU_JPLEAF_B1: return(JU_JPLEAF_POP0(Pjp) + 1); #ifdef JUDY1 case cJ1_JPFULLPOPU1: return(cJU_JPFULLPOPU1_POP0 + 1); #endif case cJU_JPIMMED_1_01: case cJU_JPIMMED_2_01: case cJU_JPIMMED_3_01: return(1); #ifdef JU_64BIT case cJU_JPIMMED_4_01: case cJU_JPIMMED_5_01: case cJU_JPIMMED_6_01: case cJU_JPIMMED_7_01: return(1); #endif case cJU_JPIMMED_1_02: return(2); case cJU_JPIMMED_1_03: return(3); #if (defined(JUDY1) || defined(JU_64BIT)) case cJU_JPIMMED_1_04: return(4); case cJU_JPIMMED_1_05: return(5); case cJU_JPIMMED_1_06: return(6); case cJU_JPIMMED_1_07: return(7); #endif #if (defined(JUDY1) && defined(JU_64BIT)) case cJ1_JPIMMED_1_08: return(8); case cJ1_JPIMMED_1_09: return(9); case cJ1_JPIMMED_1_10: return(10); case cJ1_JPIMMED_1_11: return(11); case cJ1_JPIMMED_1_12: return(12); case cJ1_JPIMMED_1_13: return(13); case cJ1_JPIMMED_1_14: return(14); case cJ1_JPIMMED_1_15: return(15); #endif #if (defined(JUDY1) || defined(JU_64BIT)) case cJU_JPIMMED_2_02: return(2); case cJU_JPIMMED_2_03: return(3); #endif #if (defined(JUDY1) && defined(JU_64BIT)) case cJ1_JPIMMED_2_04: return(4); case cJ1_JPIMMED_2_05: return(5); case cJ1_JPIMMED_2_06: return(6); case cJ1_JPIMMED_2_07: return(7); #endif #if (defined(JUDY1) || defined(JU_64BIT)) case cJU_JPIMMED_3_02: return(2); #endif #if (defined(JUDY1) && defined(JU_64BIT)) case cJ1_JPIMMED_3_03: return(3); case cJ1_JPIMMED_3_04: return(4); case cJ1_JPIMMED_3_05: return(5); case cJ1_JPIMMED_4_02: return(2); case cJ1_JPIMMED_4_03: return(3); case cJ1_JPIMMED_5_02: return(2); case cJ1_JPIMMED_5_03: return(3); case cJ1_JPIMMED_6_02: return(2); case cJ1_JPIMMED_7_02: return(2); #endif default: return(cJU_ALLONES); } /*NOTREACHED*/ } // j__udyJPPop1()