diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 10:05:51 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 10:05:51 +0000 |
commit | 5d1646d90e1f2cceb9f0828f4b28318cd0ec7744 (patch) | |
tree | a94efe259b9009378be6d90eb30d2b019d95c194 /lib/zstd/fse_compress.c | |
parent | Initial commit. (diff) | |
download | linux-5d1646d90e1f2cceb9f0828f4b28318cd0ec7744.tar.xz linux-5d1646d90e1f2cceb9f0828f4b28318cd0ec7744.zip |
Adding upstream version 5.10.209.upstream/5.10.209upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'lib/zstd/fse_compress.c')
-rw-r--r-- | lib/zstd/fse_compress.c | 795 |
1 files changed, 795 insertions, 0 deletions
diff --git a/lib/zstd/fse_compress.c b/lib/zstd/fse_compress.c new file mode 100644 index 000000000..ef3d1741d --- /dev/null +++ b/lib/zstd/fse_compress.c @@ -0,0 +1,795 @@ +/* + * FSE : Finite State Entropy encoder + * Copyright (C) 2013-2015, Yann Collet. + * + * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * This program is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License version 2 as published by the + * Free Software Foundation. This program is dual-licensed; you may select + * either version 2 of the GNU General Public License ("GPL") or BSD license + * ("BSD"). + * + * You can contact the author at : + * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + */ + +/* ************************************************************** +* Compiler specifics +****************************************************************/ +#define FORCE_INLINE static __always_inline + +/* ************************************************************** +* Includes +****************************************************************/ +#include "bitstream.h" +#include "fse.h" +#include <linux/compiler.h> +#include <linux/kernel.h> +#include <linux/math64.h> +#include <linux/string.h> /* memcpy, memset */ + +/* ************************************************************** +* Error Management +****************************************************************/ +#define FSE_STATIC_ASSERT(c) \ + { \ + enum { FSE_static_assert = 1 / (int)(!!(c)) }; \ + } /* use only *after* variable declarations */ + +/* ************************************************************** +* Templates +****************************************************************/ +/* + designed to be included + for type-specific functions (template emulation in C) + Objective is to write these functions only once, for improved maintenance +*/ + +/* safety checks */ +#ifndef FSE_FUNCTION_EXTENSION +#error "FSE_FUNCTION_EXTENSION must be defined" +#endif +#ifndef FSE_FUNCTION_TYPE +#error "FSE_FUNCTION_TYPE must be defined" +#endif + +/* Function names */ +#define FSE_CAT(X, Y) X##Y +#define FSE_FUNCTION_NAME(X, Y) FSE_CAT(X, Y) +#define FSE_TYPE_NAME(X, Y) FSE_CAT(X, Y) + +/* Function templates */ + +/* FSE_buildCTable_wksp() : + * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`). + * wkspSize should be sized to handle worst case situation, which is `1<<max_tableLog * sizeof(FSE_FUNCTION_TYPE)` + * workSpace must also be properly aligned with FSE_FUNCTION_TYPE requirements + */ +size_t FSE_buildCTable_wksp(FSE_CTable *ct, const short *normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void *workspace, size_t workspaceSize) +{ + U32 const tableSize = 1 << tableLog; + U32 const tableMask = tableSize - 1; + void *const ptr = ct; + U16 *const tableU16 = ((U16 *)ptr) + 2; + void *const FSCT = ((U32 *)ptr) + 1 /* header */ + (tableLog ? tableSize >> 1 : 1); + FSE_symbolCompressionTransform *const symbolTT = (FSE_symbolCompressionTransform *)(FSCT); + U32 const step = FSE_TABLESTEP(tableSize); + U32 highThreshold = tableSize - 1; + + U32 *cumul; + FSE_FUNCTION_TYPE *tableSymbol; + size_t spaceUsed32 = 0; + + cumul = (U32 *)workspace + spaceUsed32; + spaceUsed32 += FSE_MAX_SYMBOL_VALUE + 2; + tableSymbol = (FSE_FUNCTION_TYPE *)((U32 *)workspace + spaceUsed32); + spaceUsed32 += ALIGN(sizeof(FSE_FUNCTION_TYPE) * ((size_t)1 << tableLog), sizeof(U32)) >> 2; + + if ((spaceUsed32 << 2) > workspaceSize) + return ERROR(tableLog_tooLarge); + workspace = (U32 *)workspace + spaceUsed32; + workspaceSize -= (spaceUsed32 << 2); + + /* CTable header */ + tableU16[-2] = (U16)tableLog; + tableU16[-1] = (U16)maxSymbolValue; + + /* For explanations on how to distribute symbol values over the table : + * http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */ + + /* symbol start positions */ + { + U32 u; + cumul[0] = 0; + for (u = 1; u <= maxSymbolValue + 1; u++) { + if (normalizedCounter[u - 1] == -1) { /* Low proba symbol */ + cumul[u] = cumul[u - 1] + 1; + tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u - 1); + } else { + cumul[u] = cumul[u - 1] + normalizedCounter[u - 1]; + } + } + cumul[maxSymbolValue + 1] = tableSize + 1; + } + + /* Spread symbols */ + { + U32 position = 0; + U32 symbol; + for (symbol = 0; symbol <= maxSymbolValue; symbol++) { + int nbOccurences; + for (nbOccurences = 0; nbOccurences < normalizedCounter[symbol]; nbOccurences++) { + tableSymbol[position] = (FSE_FUNCTION_TYPE)symbol; + position = (position + step) & tableMask; + while (position > highThreshold) + position = (position + step) & tableMask; /* Low proba area */ + } + } + + if (position != 0) + return ERROR(GENERIC); /* Must have gone through all positions */ + } + + /* Build table */ + { + U32 u; + for (u = 0; u < tableSize; u++) { + FSE_FUNCTION_TYPE s = tableSymbol[u]; /* note : static analyzer may not understand tableSymbol is properly initialized */ + tableU16[cumul[s]++] = (U16)(tableSize + u); /* TableU16 : sorted by symbol order; gives next state value */ + } + } + + /* Build Symbol Transformation Table */ + { + unsigned total = 0; + unsigned s; + for (s = 0; s <= maxSymbolValue; s++) { + switch (normalizedCounter[s]) { + case 0: break; + + case -1: + case 1: + symbolTT[s].deltaNbBits = (tableLog << 16) - (1 << tableLog); + symbolTT[s].deltaFindState = total - 1; + total++; + break; + default: { + U32 const maxBitsOut = tableLog - BIT_highbit32(normalizedCounter[s] - 1); + U32 const minStatePlus = normalizedCounter[s] << maxBitsOut; + symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus; + symbolTT[s].deltaFindState = total - normalizedCounter[s]; + total += normalizedCounter[s]; + } + } + } + } + + return 0; +} + +/*-************************************************************** +* FSE NCount encoding-decoding +****************************************************************/ +size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog) +{ + size_t const maxHeaderSize = (((maxSymbolValue + 1) * tableLog) >> 3) + 3; + return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */ +} + +static size_t FSE_writeNCount_generic(void *header, size_t headerBufferSize, const short *normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, + unsigned writeIsSafe) +{ + BYTE *const ostart = (BYTE *)header; + BYTE *out = ostart; + BYTE *const oend = ostart + headerBufferSize; + int nbBits; + const int tableSize = 1 << tableLog; + int remaining; + int threshold; + U32 bitStream; + int bitCount; + unsigned charnum = 0; + int previous0 = 0; + + bitStream = 0; + bitCount = 0; + /* Table Size */ + bitStream += (tableLog - FSE_MIN_TABLELOG) << bitCount; + bitCount += 4; + + /* Init */ + remaining = tableSize + 1; /* +1 for extra accuracy */ + threshold = tableSize; + nbBits = tableLog + 1; + + while (remaining > 1) { /* stops at 1 */ + if (previous0) { + unsigned start = charnum; + while (!normalizedCounter[charnum]) + charnum++; + while (charnum >= start + 24) { + start += 24; + bitStream += 0xFFFFU << bitCount; + if ((!writeIsSafe) && (out > oend - 2)) + return ERROR(dstSize_tooSmall); /* Buffer overflow */ + out[0] = (BYTE)bitStream; + out[1] = (BYTE)(bitStream >> 8); + out += 2; + bitStream >>= 16; + } + while (charnum >= start + 3) { + start += 3; + bitStream += 3 << bitCount; + bitCount += 2; + } + bitStream += (charnum - start) << bitCount; + bitCount += 2; + if (bitCount > 16) { + if ((!writeIsSafe) && (out > oend - 2)) + return ERROR(dstSize_tooSmall); /* Buffer overflow */ + out[0] = (BYTE)bitStream; + out[1] = (BYTE)(bitStream >> 8); + out += 2; + bitStream >>= 16; + bitCount -= 16; + } + } + { + int count = normalizedCounter[charnum++]; + int const max = (2 * threshold - 1) - remaining; + remaining -= count < 0 ? -count : count; + count++; /* +1 for extra accuracy */ + if (count >= threshold) + count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */ + bitStream += count << bitCount; + bitCount += nbBits; + bitCount -= (count < max); + previous0 = (count == 1); + if (remaining < 1) + return ERROR(GENERIC); + while (remaining < threshold) + nbBits--, threshold >>= 1; + } + if (bitCount > 16) { + if ((!writeIsSafe) && (out > oend - 2)) + return ERROR(dstSize_tooSmall); /* Buffer overflow */ + out[0] = (BYTE)bitStream; + out[1] = (BYTE)(bitStream >> 8); + out += 2; + bitStream >>= 16; + bitCount -= 16; + } + } + + /* flush remaining bitStream */ + if ((!writeIsSafe) && (out > oend - 2)) + return ERROR(dstSize_tooSmall); /* Buffer overflow */ + out[0] = (BYTE)bitStream; + out[1] = (BYTE)(bitStream >> 8); + out += (bitCount + 7) / 8; + + if (charnum > maxSymbolValue + 1) + return ERROR(GENERIC); + + return (out - ostart); +} + +size_t FSE_writeNCount(void *buffer, size_t bufferSize, const short *normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) +{ + if (tableLog > FSE_MAX_TABLELOG) + return ERROR(tableLog_tooLarge); /* Unsupported */ + if (tableLog < FSE_MIN_TABLELOG) + return ERROR(GENERIC); /* Unsupported */ + + if (bufferSize < FSE_NCountWriteBound(maxSymbolValue, tableLog)) + return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 0); + + return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1); +} + +/*-************************************************************** +* Counting histogram +****************************************************************/ +/*! FSE_count_simple + This function counts byte values within `src`, and store the histogram into table `count`. + It doesn't use any additional memory. + But this function is unsafe : it doesn't check that all values within `src` can fit into `count`. + For this reason, prefer using a table `count` with 256 elements. + @return : count of most numerous element +*/ +size_t FSE_count_simple(unsigned *count, unsigned *maxSymbolValuePtr, const void *src, size_t srcSize) +{ + const BYTE *ip = (const BYTE *)src; + const BYTE *const end = ip + srcSize; + unsigned maxSymbolValue = *maxSymbolValuePtr; + unsigned max = 0; + + memset(count, 0, (maxSymbolValue + 1) * sizeof(*count)); + if (srcSize == 0) { + *maxSymbolValuePtr = 0; + return 0; + } + + while (ip < end) + count[*ip++]++; + + while (!count[maxSymbolValue]) + maxSymbolValue--; + *maxSymbolValuePtr = maxSymbolValue; + + { + U32 s; + for (s = 0; s <= maxSymbolValue; s++) + if (count[s] > max) + max = count[s]; + } + + return (size_t)max; +} + +/* FSE_count_parallel_wksp() : + * Same as FSE_count_parallel(), but using an externally provided scratch buffer. + * `workSpace` size must be a minimum of `1024 * sizeof(unsigned)`` */ +static size_t FSE_count_parallel_wksp(unsigned *count, unsigned *maxSymbolValuePtr, const void *source, size_t sourceSize, unsigned checkMax, + unsigned *const workSpace) +{ + const BYTE *ip = (const BYTE *)source; + const BYTE *const iend = ip + sourceSize; + unsigned maxSymbolValue = *maxSymbolValuePtr; + unsigned max = 0; + U32 *const Counting1 = workSpace; + U32 *const Counting2 = Counting1 + 256; + U32 *const Counting3 = Counting2 + 256; + U32 *const Counting4 = Counting3 + 256; + + memset(Counting1, 0, 4 * 256 * sizeof(unsigned)); + + /* safety checks */ + if (!sourceSize) { + memset(count, 0, maxSymbolValue + 1); + *maxSymbolValuePtr = 0; + return 0; + } + if (!maxSymbolValue) + maxSymbolValue = 255; /* 0 == default */ + + /* by stripes of 16 bytes */ + { + U32 cached = ZSTD_read32(ip); + ip += 4; + while (ip < iend - 15) { + U32 c = cached; + cached = ZSTD_read32(ip); + ip += 4; + Counting1[(BYTE)c]++; + Counting2[(BYTE)(c >> 8)]++; + Counting3[(BYTE)(c >> 16)]++; + Counting4[c >> 24]++; + c = cached; + cached = ZSTD_read32(ip); + ip += 4; + Counting1[(BYTE)c]++; + Counting2[(BYTE)(c >> 8)]++; + Counting3[(BYTE)(c >> 16)]++; + Counting4[c >> 24]++; + c = cached; + cached = ZSTD_read32(ip); + ip += 4; + Counting1[(BYTE)c]++; + Counting2[(BYTE)(c >> 8)]++; + Counting3[(BYTE)(c >> 16)]++; + Counting4[c >> 24]++; + c = cached; + cached = ZSTD_read32(ip); + ip += 4; + Counting1[(BYTE)c]++; + Counting2[(BYTE)(c >> 8)]++; + Counting3[(BYTE)(c >> 16)]++; + Counting4[c >> 24]++; + } + ip -= 4; + } + + /* finish last symbols */ + while (ip < iend) + Counting1[*ip++]++; + + if (checkMax) { /* verify stats will fit into destination table */ + U32 s; + for (s = 255; s > maxSymbolValue; s--) { + Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s]; + if (Counting1[s]) + return ERROR(maxSymbolValue_tooSmall); + } + } + + { + U32 s; + for (s = 0; s <= maxSymbolValue; s++) { + count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s]; + if (count[s] > max) + max = count[s]; + } + } + + while (!count[maxSymbolValue]) + maxSymbolValue--; + *maxSymbolValuePtr = maxSymbolValue; + return (size_t)max; +} + +/* FSE_countFast_wksp() : + * Same as FSE_countFast(), but using an externally provided scratch buffer. + * `workSpace` size must be table of >= `1024` unsigned */ +size_t FSE_countFast_wksp(unsigned *count, unsigned *maxSymbolValuePtr, const void *source, size_t sourceSize, unsigned *workSpace) +{ + if (sourceSize < 1500) + return FSE_count_simple(count, maxSymbolValuePtr, source, sourceSize); + return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 0, workSpace); +} + +/* FSE_count_wksp() : + * Same as FSE_count(), but using an externally provided scratch buffer. + * `workSpace` size must be table of >= `1024` unsigned */ +size_t FSE_count_wksp(unsigned *count, unsigned *maxSymbolValuePtr, const void *source, size_t sourceSize, unsigned *workSpace) +{ + if (*maxSymbolValuePtr < 255) + return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 1, workSpace); + *maxSymbolValuePtr = 255; + return FSE_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace); +} + +/*-************************************************************** +* FSE Compression Code +****************************************************************/ +/*! FSE_sizeof_CTable() : + FSE_CTable is a variable size structure which contains : + `U16 tableLog;` + `U16 maxSymbolValue;` + `U16 nextStateNumber[1 << tableLog];` // This size is variable + `FSE_symbolCompressionTransform symbolTT[maxSymbolValue+1];` // This size is variable +Allocation is manual (C standard does not support variable-size structures). +*/ +size_t FSE_sizeof_CTable(unsigned maxSymbolValue, unsigned tableLog) +{ + if (tableLog > FSE_MAX_TABLELOG) + return ERROR(tableLog_tooLarge); + return FSE_CTABLE_SIZE_U32(tableLog, maxSymbolValue) * sizeof(U32); +} + +/* provides the minimum logSize to safely represent a distribution */ +static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue) +{ + U32 minBitsSrc = BIT_highbit32((U32)(srcSize - 1)) + 1; + U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2; + U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols; + return minBits; +} + +unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus) +{ + U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - minus; + U32 tableLog = maxTableLog; + U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue); + if (tableLog == 0) + tableLog = FSE_DEFAULT_TABLELOG; + if (maxBitsSrc < tableLog) + tableLog = maxBitsSrc; /* Accuracy can be reduced */ + if (minBits > tableLog) + tableLog = minBits; /* Need a minimum to safely represent all symbol values */ + if (tableLog < FSE_MIN_TABLELOG) + tableLog = FSE_MIN_TABLELOG; + if (tableLog > FSE_MAX_TABLELOG) + tableLog = FSE_MAX_TABLELOG; + return tableLog; +} + +unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue) +{ + return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 2); +} + +/* Secondary normalization method. + To be used when primary method fails. */ + +static size_t FSE_normalizeM2(short *norm, U32 tableLog, const unsigned *count, size_t total, U32 maxSymbolValue) +{ + short const NOT_YET_ASSIGNED = -2; + U32 s; + U32 distributed = 0; + U32 ToDistribute; + + /* Init */ + U32 const lowThreshold = (U32)(total >> tableLog); + U32 lowOne = (U32)((total * 3) >> (tableLog + 1)); + + for (s = 0; s <= maxSymbolValue; s++) { + if (count[s] == 0) { + norm[s] = 0; + continue; + } + if (count[s] <= lowThreshold) { + norm[s] = -1; + distributed++; + total -= count[s]; + continue; + } + if (count[s] <= lowOne) { + norm[s] = 1; + distributed++; + total -= count[s]; + continue; + } + + norm[s] = NOT_YET_ASSIGNED; + } + ToDistribute = (1 << tableLog) - distributed; + + if ((total / ToDistribute) > lowOne) { + /* risk of rounding to zero */ + lowOne = (U32)((total * 3) / (ToDistribute * 2)); + for (s = 0; s <= maxSymbolValue; s++) { + if ((norm[s] == NOT_YET_ASSIGNED) && (count[s] <= lowOne)) { + norm[s] = 1; + distributed++; + total -= count[s]; + continue; + } + } + ToDistribute = (1 << tableLog) - distributed; + } + + if (distributed == maxSymbolValue + 1) { + /* all values are pretty poor; + probably incompressible data (should have already been detected); + find max, then give all remaining points to max */ + U32 maxV = 0, maxC = 0; + for (s = 0; s <= maxSymbolValue; s++) + if (count[s] > maxC) + maxV = s, maxC = count[s]; + norm[maxV] += (short)ToDistribute; + return 0; + } + + if (total == 0) { + /* all of the symbols were low enough for the lowOne or lowThreshold */ + for (s = 0; ToDistribute > 0; s = (s + 1) % (maxSymbolValue + 1)) + if (norm[s] > 0) + ToDistribute--, norm[s]++; + return 0; + } + + { + U64 const vStepLog = 62 - tableLog; + U64 const mid = (1ULL << (vStepLog - 1)) - 1; + U64 const rStep = div_u64((((U64)1 << vStepLog) * ToDistribute) + mid, (U32)total); /* scale on remaining */ + U64 tmpTotal = mid; + for (s = 0; s <= maxSymbolValue; s++) { + if (norm[s] == NOT_YET_ASSIGNED) { + U64 const end = tmpTotal + (count[s] * rStep); + U32 const sStart = (U32)(tmpTotal >> vStepLog); + U32 const sEnd = (U32)(end >> vStepLog); + U32 const weight = sEnd - sStart; + if (weight < 1) + return ERROR(GENERIC); + norm[s] = (short)weight; + tmpTotal = end; + } + } + } + + return 0; +} + +size_t FSE_normalizeCount(short *normalizedCounter, unsigned tableLog, const unsigned *count, size_t total, unsigned maxSymbolValue) +{ + /* Sanity checks */ + if (tableLog == 0) + tableLog = FSE_DEFAULT_TABLELOG; + if (tableLog < FSE_MIN_TABLELOG) + return ERROR(GENERIC); /* Unsupported size */ + if (tableLog > FSE_MAX_TABLELOG) + return ERROR(tableLog_tooLarge); /* Unsupported size */ + if (tableLog < FSE_minTableLog(total, maxSymbolValue)) + return ERROR(GENERIC); /* Too small tableLog, compression potentially impossible */ + + { + U32 const rtbTable[] = {0, 473195, 504333, 520860, 550000, 700000, 750000, 830000}; + U64 const scale = 62 - tableLog; + U64 const step = div_u64((U64)1 << 62, (U32)total); /* <== here, one division ! */ + U64 const vStep = 1ULL << (scale - 20); + int stillToDistribute = 1 << tableLog; + unsigned s; + unsigned largest = 0; + short largestP = 0; + U32 lowThreshold = (U32)(total >> tableLog); + + for (s = 0; s <= maxSymbolValue; s++) { + if (count[s] == total) + return 0; /* rle special case */ + if (count[s] == 0) { + normalizedCounter[s] = 0; + continue; + } + if (count[s] <= lowThreshold) { + normalizedCounter[s] = -1; + stillToDistribute--; + } else { + short proba = (short)((count[s] * step) >> scale); + if (proba < 8) { + U64 restToBeat = vStep * rtbTable[proba]; + proba += (count[s] * step) - ((U64)proba << scale) > restToBeat; + } + if (proba > largestP) + largestP = proba, largest = s; + normalizedCounter[s] = proba; + stillToDistribute -= proba; + } + } + if (-stillToDistribute >= (normalizedCounter[largest] >> 1)) { + /* corner case, need another normalization method */ + size_t const errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue); + if (FSE_isError(errorCode)) + return errorCode; + } else + normalizedCounter[largest] += (short)stillToDistribute; + } + + return tableLog; +} + +/* fake FSE_CTable, for raw (uncompressed) input */ +size_t FSE_buildCTable_raw(FSE_CTable *ct, unsigned nbBits) +{ + const unsigned tableSize = 1 << nbBits; + const unsigned tableMask = tableSize - 1; + const unsigned maxSymbolValue = tableMask; + void *const ptr = ct; + U16 *const tableU16 = ((U16 *)ptr) + 2; + void *const FSCT = ((U32 *)ptr) + 1 /* header */ + (tableSize >> 1); /* assumption : tableLog >= 1 */ + FSE_symbolCompressionTransform *const symbolTT = (FSE_symbolCompressionTransform *)(FSCT); + unsigned s; + + /* Sanity checks */ + if (nbBits < 1) + return ERROR(GENERIC); /* min size */ + + /* header */ + tableU16[-2] = (U16)nbBits; + tableU16[-1] = (U16)maxSymbolValue; + + /* Build table */ + for (s = 0; s < tableSize; s++) + tableU16[s] = (U16)(tableSize + s); + + /* Build Symbol Transformation Table */ + { + const U32 deltaNbBits = (nbBits << 16) - (1 << nbBits); + for (s = 0; s <= maxSymbolValue; s++) { + symbolTT[s].deltaNbBits = deltaNbBits; + symbolTT[s].deltaFindState = s - 1; + } + } + + return 0; +} + +/* fake FSE_CTable, for rle input (always same symbol) */ +size_t FSE_buildCTable_rle(FSE_CTable *ct, BYTE symbolValue) +{ + void *ptr = ct; + U16 *tableU16 = ((U16 *)ptr) + 2; + void *FSCTptr = (U32 *)ptr + 2; + FSE_symbolCompressionTransform *symbolTT = (FSE_symbolCompressionTransform *)FSCTptr; + + /* header */ + tableU16[-2] = (U16)0; + tableU16[-1] = (U16)symbolValue; + + /* Build table */ + tableU16[0] = 0; + tableU16[1] = 0; /* just in case */ + + /* Build Symbol Transformation Table */ + symbolTT[symbolValue].deltaNbBits = 0; + symbolTT[symbolValue].deltaFindState = 0; + + return 0; +} + +static size_t FSE_compress_usingCTable_generic(void *dst, size_t dstSize, const void *src, size_t srcSize, const FSE_CTable *ct, const unsigned fast) +{ + const BYTE *const istart = (const BYTE *)src; + const BYTE *const iend = istart + srcSize; + const BYTE *ip = iend; + + BIT_CStream_t bitC; + FSE_CState_t CState1, CState2; + + /* init */ + if (srcSize <= 2) + return 0; + { + size_t const initError = BIT_initCStream(&bitC, dst, dstSize); + if (FSE_isError(initError)) + return 0; /* not enough space available to write a bitstream */ + } + +#define FSE_FLUSHBITS(s) (fast ? BIT_flushBitsFast(s) : BIT_flushBits(s)) + + if (srcSize & 1) { + FSE_initCState2(&CState1, ct, *--ip); + FSE_initCState2(&CState2, ct, *--ip); + FSE_encodeSymbol(&bitC, &CState1, *--ip); + FSE_FLUSHBITS(&bitC); + } else { + FSE_initCState2(&CState2, ct, *--ip); + FSE_initCState2(&CState1, ct, *--ip); + } + + /* join to mod 4 */ + srcSize -= 2; + if ((sizeof(bitC.bitContainer) * 8 > FSE_MAX_TABLELOG * 4 + 7) && (srcSize & 2)) { /* test bit 2 */ + FSE_encodeSymbol(&bitC, &CState2, *--ip); + FSE_encodeSymbol(&bitC, &CState1, *--ip); + FSE_FLUSHBITS(&bitC); + } + + /* 2 or 4 encoding per loop */ + while (ip > istart) { + + FSE_encodeSymbol(&bitC, &CState2, *--ip); + + if (sizeof(bitC.bitContainer) * 8 < FSE_MAX_TABLELOG * 2 + 7) /* this test must be static */ + FSE_FLUSHBITS(&bitC); + + FSE_encodeSymbol(&bitC, &CState1, *--ip); + + if (sizeof(bitC.bitContainer) * 8 > FSE_MAX_TABLELOG * 4 + 7) { /* this test must be static */ + FSE_encodeSymbol(&bitC, &CState2, *--ip); + FSE_encodeSymbol(&bitC, &CState1, *--ip); + } + + FSE_FLUSHBITS(&bitC); + } + + FSE_flushCState(&bitC, &CState2); + FSE_flushCState(&bitC, &CState1); + return BIT_closeCStream(&bitC); +} + +size_t FSE_compress_usingCTable(void *dst, size_t dstSize, const void *src, size_t srcSize, const FSE_CTable *ct) +{ + unsigned const fast = (dstSize >= FSE_BLOCKBOUND(srcSize)); + + if (fast) + return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 1); + else + return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 0); +} + +size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); } |