diff options
Diffstat (limited to 'src/xxHash')
-rw-r--r-- | src/xxHash/.gitattributes | 10 | ||||
-rw-r--r-- | src/xxHash/.gitignore | 4 | ||||
-rw-r--r-- | src/xxHash/.travis.yml | 11 | ||||
-rw-r--r-- | src/xxHash/LICENSE | 24 | ||||
-rw-r--r-- | src/xxHash/Makefile | 143 | ||||
-rw-r--r-- | src/xxHash/README.md | 75 | ||||
-rw-r--r-- | src/xxHash/cmake_unofficial/CMakeLists.txt | 24 | ||||
-rw-r--r-- | src/xxHash/xxhash.c | 1012 | ||||
-rw-r--r-- | src/xxHash/xxhash.h | 255 | ||||
-rw-r--r-- | src/xxHash/xxhsum.1 | 115 | ||||
-rw-r--r-- | src/xxHash/xxhsum.1.md | 119 | ||||
-rw-r--r-- | src/xxHash/xxhsum.c | 1391 |
12 files changed, 3183 insertions, 0 deletions
diff --git a/src/xxHash/.gitattributes b/src/xxHash/.gitattributes new file mode 100644 index 00000000..fbcf75b5 --- /dev/null +++ b/src/xxHash/.gitattributes @@ -0,0 +1,10 @@ +# Set the default behavior +* text eol=lf + +# Explicitly declare source files +*.c text eol=lf +*.h text eol=lf + +# Denote files that should not be modified. +*.odt binary + diff --git a/src/xxHash/.gitignore b/src/xxHash/.gitignore new file mode 100644 index 00000000..dda93c99 --- /dev/null +++ b/src/xxHash/.gitignore @@ -0,0 +1,4 @@ +/.deps +/.dirstamp +/.libs +/xxhash.lo diff --git a/src/xxHash/.travis.yml b/src/xxHash/.travis.yml new file mode 100644 index 00000000..4adeb390 --- /dev/null +++ b/src/xxHash/.travis.yml @@ -0,0 +1,11 @@ +language: c +compiler: gcc +script: make -B test-all +before_install: + - sudo apt-get update -qq + - sudo apt-get install -qq gcc-arm-linux-gnueabi + - sudo apt-get install -qq clang + - sudo apt-get install -qq g++-multilib + - sudo apt-get install -qq gcc-multilib + - sudo apt-get install -qq valgrind + diff --git a/src/xxHash/LICENSE b/src/xxHash/LICENSE new file mode 100644 index 00000000..7de801ed --- /dev/null +++ b/src/xxHash/LICENSE @@ -0,0 +1,24 @@ +xxHash Library +Copyright (c) 2012-2014, Yann Collet +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, this + list of conditions and the following disclaimer in the documentation and/or + other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/src/xxHash/Makefile b/src/xxHash/Makefile new file mode 100644 index 00000000..36bc20d0 --- /dev/null +++ b/src/xxHash/Makefile @@ -0,0 +1,143 @@ +# ################################################################ +# xxHash Makefile +# Copyright (C) Yann Collet 2012-2015 +# +# GPL v2 License +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# You can contact the author at : +# - xxHash source repository : http://code.google.com/p/xxhash/ +# ################################################################ +# xxhsum : provides 32/64 bits hash of one or multiple files, or stdin +# ################################################################ + +# Version numbers +LIBVER_MAJOR:=`sed -n '/define XXH_VERSION_MAJOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < xxhash.h` +LIBVER_MINOR:=`sed -n '/define XXH_VERSION_MINOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < xxhash.h` +LIBVER_PATCH:=`sed -n '/define XXH_VERSION_RELEASE/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < xxhash.h` +LIBVER := $(LIBVER_MAJOR).$(LIBVER_MINOR).$(LIBVER_PATCH) + +CFLAGS ?= -O3 +CFLAGS += -std=c99 -Wall -Wextra -Wshadow -Wcast-qual -Wcast-align -Wstrict-prototypes -Wstrict-aliasing=1 -Wswitch-enum -Wundef -pedantic +FLAGS := $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $(MOREFLAGS) +XXHSUM_VERSION=$(LIBVER) +MD2ROFF =ronn +MD2ROFF_FLAGS = --roff --warnings --manual="User Commands" --organization="xxhsum $(XXHSUM_VERSION)" + +# Define *.exe as extension for Windows systems +ifneq (,$(filter Windows%,$(OS))) +EXT =.exe +else +EXT = +endif + +.PHONY: clean all + +default: xxhsum + +all: xxhsum xxhsum32 xxhsum_privateXXH + +xxhsum: xxhash.c xxhsum.c + $(CC) $(FLAGS) $^ -o $@$(EXT) + ln -sf $@ xxh32sum + ln -sf $@ xxh64sum + +xxhsum32: xxhash.c xxhsum.c + $(CC) -m32 $(FLAGS) $^ -o $@$(EXT) + +xxhsum_privateXXH: xxhsum.c + $(CC) $(FLAGS) -DXXHSUM_INCLUDE_XXHC $^ -o $@$(EXT) + +test: clean xxhsum + # stdin + ./xxhsum < xxhash.c + # multiple files + ./xxhsum * + # internal bench + ./xxhsum -bi1 + # file bench + ./xxhsum -bi1 xxhash.c + # memory tests + valgrind --leak-check=yes --error-exitcode=1 ./xxhsum -bi1 xxhash.c + valgrind --leak-check=yes --error-exitcode=1 ./xxhsum -H0 xxhash.c + valgrind --leak-check=yes --error-exitcode=1 ./xxhsum -H1 xxhash.c + +test32: clean xxhsum32 + @echo ---- test 32-bits ---- + ./xxhsum32 -bi1 xxhash.c + +test-xxhsum-c: xxhsum + # xxhsum to/from pipe + ./xxhsum * | ./xxhsum -c - + ./xxhsum -H0 * | ./xxhsum -c - + # xxhsum to/from file, shell redirection + ./xxhsum * > .test.xxh64 + ./xxhsum -H0 * > .test.xxh32 + ./xxhsum -c .test.xxh64 + ./xxhsum -c .test.xxh32 + ./xxhsum -c < .test.xxh64 + ./xxhsum -c < .test.xxh32 + # xxhsum -c warns improperly format lines. + cat .test.xxh64 .test.xxh32 | ./xxhsum -c - + cat .test.xxh32 .test.xxh64 | ./xxhsum -c - + # Expects "FAILED" + echo "0000000000000000 LICENSE" | ./xxhsum -c -; test $$? -eq 1 + echo "00000000 LICENSE" | ./xxhsum -c -; test $$? -eq 1 + # Expects "FAILED open or read" + echo "0000000000000000 test-expects-file-not-found" | ./xxhsum -c -; test $$? -eq 1 + echo "00000000 test-expects-file-not-found" | ./xxhsum -c -; test $$? -eq 1 + +clean-xxhsum-c: + @rm -f .test.xxh32 .test.xxh64 + +armtest: clean + @echo ---- test ARM compilation ---- + $(MAKE) xxhsum CC=arm-linux-gnueabi-gcc MOREFLAGS="-Werror" + +clangtest: clean + @echo ---- test clang compilation ---- + $(MAKE) all CC=clang MOREFLAGS="-Werror -Wconversion -Wno-sign-conversion" + +gpptest: clean + @echo ---- test g++ compilation ---- + $(MAKE) all CC=g++ CFLAGS="-O3 -Wall -Wextra -Wundef -Wshadow -Wcast-align -Werror" + +sanitize: clean + @echo ---- check undefined behavior - sanitize ---- + $(MAKE) test CC=clang MOREFLAGS="-g -fsanitize=undefined" + +staticAnalyze: clean + @echo ---- static analyzer - scan-build ---- + CFLAGS="-g -Werror" scan-build --status-bugs -v $(MAKE) all + +xxhsum.1: xxhsum.1.md + cat $^ | $(MD2ROFF) $(MD2ROFF_FLAGS) | sed -n '/^\.\\\".*/!p' > $@ + +man: xxhsum.1 + +clean-man: + rm xxhsum.1 + +preview-man: clean-man man + man ./xxhsum.1 + +test-all: clean all test test32 test-xxhsum-c clean-xxhsum-c armtest clangtest gpptest sanitize staticAnalyze + +clean: clean-xxhsum-c + @rm -f core *.o xxhsum$(EXT) xxhsum32$(EXT) xxhsum_privateXXH$(EXT) xxh32sum xxh64sum + @echo cleaning completed + + diff --git a/src/xxHash/README.md b/src/xxHash/README.md new file mode 100644 index 00000000..e2ec2b8e --- /dev/null +++ b/src/xxHash/README.md @@ -0,0 +1,75 @@ +xxHash - Extremely fast hash algorithm +====================================== + +xxHash is an Extremely fast Hash algorithm, running at RAM speed limits. +It successfully completes the [SMHasher](http://code.google.com/p/smhasher/wiki/SMHasher) test suite +which evaluates collision, dispersion and randomness qualities of hash functions. + +|Branch |Status | +|------------|---------| +|master | [![Build Status](https://travis-ci.org/Cyan4973/xxHash.svg?branch=master)](https://travis-ci.org/Cyan4973/xxHash?branch=master) | +|dev | [![Build Status](https://travis-ci.org/Cyan4973/xxHash.svg?branch=dev)](https://travis-ci.org/Cyan4973/xxHash?branch=dev) | + +> **Branch Policy:** + +> - The "master" branch is considered stable, at all times. +> - The "dev" branch is the one where all contributions must be merged + before being promoted to master. +> + If you plan to propose a patch, please commit into the "dev" branch, + or its own feature branch. + Direct commit to "master" are not permitted. + + +Benchmarks +------------------------- + +The benchmark uses SMHasher speed test, compiled with Visual 2010 on a Windows Seven 32-bits box. +The reference system uses a Core 2 Duo @3GHz + + +| Name | Speed | Quality | Author | +|---------------|----------|:-------:|------------------| +| [xxHash] | 5.4 GB/s | 10 | Y.C. | +| MurmurHash 3a | 2.7 GB/s | 10 | Austin Appleby | +| SBox | 1.4 GB/s | 9 | Bret Mulvey | +| Lookup3 | 1.2 GB/s | 9 | Bob Jenkins | +| CityHash64 | 1.05 GB/s| 10 | Pike & Alakuijala| +| FNV | 0.55 GB/s| 5 | Fowler, Noll, Vo | +| CRC32 | 0.43 GB/s| 9 | | +| MD5-32 | 0.33 GB/s| 10 | Ronald L.Rivest | +| SHA1-32 | 0.28 GB/s| 10 | | + +[xxHash]: http://www.xxhash.com + +Q.Score is a measure of quality of the hash function. +It depends on successfully passing SMHasher test set. +10 is a perfect score. +Algorithms with a score < 5 are not listed on this table. + +A new version, XXH64, has been created thanks to [Mathias Westerdahl]'s contribution, +which offers superior speed and dispersion for 64-bits systems. +Note however that 32-bits applications will still run faster using the 32-bits version. +[Mathias Westerdahl]: https://github.com/JCash + +SMHasher speed test, compiled using GCC 4.8.2, on Linux Mint 64-bits. +The reference system uses a Core i5-3340M @2.7GHz + +| Version | Speed on 64-bits | Speed on 32-bits | +|------------|------------------|------------------| +| XXH64 | 13.8 GB/s | 1.9 GB/s | +| XXH32 | 6.8 GB/s | 6.0 GB/s | + + +### License + +The library files `xxhash.c` and `xxhash.h` are BSD licensed. +The utility `xxhsum` is GPL licensed. + + +### Other languages + +Beyond the C reference version, +xxHash is also available on many programming languages, +thanks to great contributors. +They are [listed here](http://www.xxhash.com/#other-languages). + diff --git a/src/xxHash/cmake_unofficial/CMakeLists.txt b/src/xxHash/cmake_unofficial/CMakeLists.txt new file mode 100644 index 00000000..825193e1 --- /dev/null +++ b/src/xxHash/cmake_unofficial/CMakeLists.txt @@ -0,0 +1,24 @@ +cmake_minimum_required(VERSION 2.6) +cmake_policy(VERSION 2.6) + +project(xxhash) + +set(XXHASH_LIB_VERSION "0.42.0") +set(XXHASH_LIB_SOVERSION "0") + +add_library(xxhash SHARED ../xxhash.c) +set_target_properties(xxhash PROPERTIES COMPILE_DEFINITIONS "XXHASH_EXPORT" + VERSION "${XXHASH_LIB_VERSION}" + SOVERSION "${XXHASH_LIB_SOVERSION}") +set(install_libs xxhash) + +set(BUILD_STATIC_LIBS ON CACHE BOOL "Set to ON to build static libraries") +if(BUILD_STATIC_LIBS) + add_library(xxhashstatic ../xxhash.c) + set_target_properties(xxhashstatic PROPERTIES OUTPUT_NAME xxhash) + LIST(APPEND install_libs xxhashstatic) +endif(BUILD_STATIC_LIBS) + + +INSTALL(FILES ../xxhash.h DESTINATION include) +INSTALL(TARGETS ${install_libs} DESTINATION lib) diff --git a/src/xxHash/xxhash.c b/src/xxHash/xxhash.c new file mode 100644 index 00000000..35f53155 --- /dev/null +++ b/src/xxHash/xxhash.c @@ -0,0 +1,1012 @@ +/* +xxHash - Fast Hash algorithm +Copyright (C) 2012-2016, Yann Collet + +BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +* Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +You can contact the author at : +- xxHash source repository : https://github.com/Cyan4973/xxHash +*/ + + +/* ************************************* +* Tuning parameters +***************************************/ +/*!XXH_FORCE_MEMORY_ACCESS + * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. + * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. + * The below switch allow to select different access method for improved performance. + * Method 0 (default) : use `memcpy()`. Safe and portable. + * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). + * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. + * Method 2 : direct access. This method doesn't depend on compiler but violate C standard. + * It can generate buggy code on targets which do not support unaligned memory accesses. + * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) + * See http://stackoverflow.com/a/32095106/646947 for details. + * Prefer these methods in priority order (0 > 1 > 2) + */ +#ifndef XXH_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ +# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) +# define XXH_FORCE_MEMORY_ACCESS 2 +# elif defined(__INTEL_COMPILER) || \ + (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) )) +# define XXH_FORCE_MEMORY_ACCESS 1 +# endif +#endif + +/*!XXH_ACCEPT_NULL_INPUT_POINTER : + * If the input pointer is a null pointer, xxHash default behavior is to trigger a memory access error, since it is a bad pointer. + * When this option is enabled, xxHash output for null input pointers will be the same as a null-length input. + * By default, this option is disabled. To enable it, uncomment below define : + */ +/* #define XXH_ACCEPT_NULL_INPUT_POINTER 1 */ + +/*!XXH_FORCE_NATIVE_FORMAT : + * By default, xxHash library provides endian-independant Hash values, based on little-endian convention. + * Results are therefore identical for little-endian and big-endian CPU. + * This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format. + * Should endian-independance be of no importance for your application, you may set the #define below to 1, + * to improve speed for Big-endian CPU. + * This option has no impact on Little_Endian CPU. + */ +#define XXH_FORCE_NATIVE_FORMAT 0 + +/*!XXH_USELESS_ALIGN_BRANCH : + * This is a minor performance trick, only useful with lots of very small keys. + * It means : don't check for aligned/unaligned input, because performance will be the same. + * It saves one initial branch per hash. + */ +#if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64) +# define XXH_USELESS_ALIGN_BRANCH 1 +#endif + + +/* ************************************* +* Compiler Specific Options +***************************************/ +#ifdef _MSC_VER /* Visual Studio */ +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +# define FORCE_INLINE static __forceinline +#else +# if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ +# ifdef __GNUC__ +# define FORCE_INLINE static inline __attribute__((always_inline)) +# else +# define FORCE_INLINE static inline +# endif +# else +# define FORCE_INLINE static +# endif /* __STDC_VERSION__ */ +#endif + + +/* ************************************* +* Includes & Memory related functions +***************************************/ +/* Modify the local functions below should you wish to use some other memory routines */ +/* for malloc(), free() */ +#include <stdlib.h> +static void* XXH_malloc(size_t s) { return malloc(s); } +static void XXH_free (void* p) { free(p); } +/* for memcpy() */ +#include <string.h> +static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); } + +#include "xxhash.h" + + +/* ************************************* +* Basic Types +***************************************/ +#ifndef MEM_MODULE +# define MEM_MODULE +# if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ +# include <stdint.h> + typedef uint8_t BYTE; + typedef uint16_t U16; + typedef uint32_t U32; + typedef int32_t S32; + typedef uint64_t U64; +# else + typedef unsigned char BYTE; + typedef unsigned short U16; + typedef unsigned int U32; + typedef signed int S32; + typedef unsigned long long U64; +# endif +#endif + + +#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2)) + +/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */ +static U32 XXH_read32(const void* memPtr) { return *(const U32*) memPtr; } +static U64 XXH_read64(const void* memPtr) { return *(const U64*) memPtr; } + +#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1)) + +/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ +/* currently only defined for gcc and icc */ +typedef union { U32 u32; U64 u64; } __attribute__((packed)) unalign; + +static U32 XXH_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } +static U64 XXH_read64(const void* ptr) { return ((const unalign*)ptr)->u64; } + +#else + +/* portable and safe solution. Generally efficient. + * see : http://stackoverflow.com/a/32095106/646947 + */ + +static U32 XXH_read32(const void* memPtr) +{ + U32 val; + memcpy(&val, memPtr, sizeof(val)); + return val; +} + +static U64 XXH_read64(const void* memPtr) +{ + U64 val; + memcpy(&val, memPtr, sizeof(val)); + return val; +} + +#endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */ + + +/* **************************************** +* Compiler-specific Functions and Macros +******************************************/ +#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) + +/* Note : although _rotl exists for minGW (GCC under windows), performance seems poor */ +#if defined(_MSC_VER) +# define XXH_rotl32(x,r) _rotl(x,r) +# define XXH_rotl64(x,r) _rotl64(x,r) +#else +# define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r))) +# define XXH_rotl64(x,r) ((x << r) | (x >> (64 - r))) +#endif + +#if defined(_MSC_VER) /* Visual Studio */ +# define XXH_swap32 _byteswap_ulong +# define XXH_swap64 _byteswap_uint64 +#elif GCC_VERSION >= 403 +# define XXH_swap32 __builtin_bswap32 +# define XXH_swap64 __builtin_bswap64 +#else +static U32 XXH_swap32 (U32 x) +{ + return ((x << 24) & 0xff000000 ) | + ((x << 8) & 0x00ff0000 ) | + ((x >> 8) & 0x0000ff00 ) | + ((x >> 24) & 0x000000ff ); +} +static U64 XXH_swap64 (U64 x) +{ + return ((x << 56) & 0xff00000000000000ULL) | + ((x << 40) & 0x00ff000000000000ULL) | + ((x << 24) & 0x0000ff0000000000ULL) | + ((x << 8) & 0x000000ff00000000ULL) | + ((x >> 8) & 0x00000000ff000000ULL) | + ((x >> 24) & 0x0000000000ff0000ULL) | + ((x >> 40) & 0x000000000000ff00ULL) | + ((x >> 56) & 0x00000000000000ffULL); +} +#endif + + +/* ************************************* +* Architecture Macros +***************************************/ +typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess; + +/* XXH_CPU_LITTLE_ENDIAN can be defined externally, for example on the compiler command line */ +#ifndef XXH_CPU_LITTLE_ENDIAN + static const int g_one = 1; +# define XXH_CPU_LITTLE_ENDIAN (*(const char*)(&g_one)) +#endif + + +/* *************************** +* Memory reads +*****************************/ +typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment; + +FORCE_INLINE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, XXH_alignment align) +{ + if (align==XXH_unaligned) + return endian==XXH_littleEndian ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr)); + else + return endian==XXH_littleEndian ? *(const U32*)ptr : XXH_swap32(*(const U32*)ptr); +} + +FORCE_INLINE U32 XXH_readLE32(const void* ptr, XXH_endianess endian) +{ + return XXH_readLE32_align(ptr, endian, XXH_unaligned); +} + +static U32 XXH_readBE32(const void* ptr) +{ + return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr); +} + +FORCE_INLINE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_alignment align) +{ + if (align==XXH_unaligned) + return endian==XXH_littleEndian ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr)); + else + return endian==XXH_littleEndian ? *(const U64*)ptr : XXH_swap64(*(const U64*)ptr); +} + +FORCE_INLINE U64 XXH_readLE64(const void* ptr, XXH_endianess endian) +{ + return XXH_readLE64_align(ptr, endian, XXH_unaligned); +} + +static U64 XXH_readBE64(const void* ptr) +{ + return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr); +} + + +/* ************************************* +* Macros +***************************************/ +#define XXH_STATIC_ASSERT(c) { enum { XXH_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ + + +/* ************************************* +* Constants +***************************************/ +#define PRIME32_1 2654435761U +#define PRIME32_2 2246822519U +#define PRIME32_3 3266489917U +#define PRIME32_4 668265263U +#define PRIME32_5 374761393U + +#define PRIME64_1 11400714785074694791ULL +#define PRIME64_2 14029467366897019727ULL +#define PRIME64_3 1609587929392839161ULL +#define PRIME64_4 9650029242287828579ULL +#define PRIME64_5 2870177450012600261ULL + +XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; } + + +/* *************************** +* Simple Hash Functions +*****************************/ +FORCE_INLINE U32 XXH32_endian_align(const void* input, size_t len, U32 seed, XXH_endianess endian, XXH_alignment align) +{ + const BYTE* p = (const BYTE*)input; + const BYTE* bEnd = p + len; + U32 h32; +#define XXH_get32bits(p) XXH_readLE32_align(p, endian, align) + +#ifdef XXH_ACCEPT_NULL_INPUT_POINTER + if (p==NULL) + { + len=0; + bEnd=p=(const BYTE*)(size_t)16; + } +#endif + + if (len>=16) + { + const BYTE* const limit = bEnd - 16; + U32 v1 = seed + PRIME32_1 + PRIME32_2; + U32 v2 = seed + PRIME32_2; + U32 v3 = seed + 0; + U32 v4 = seed - PRIME32_1; + + do + { + v1 += XXH_get32bits(p) * PRIME32_2; + v1 = XXH_rotl32(v1, 13); + v1 *= PRIME32_1; + p+=4; + v2 += XXH_get32bits(p) * PRIME32_2; + v2 = XXH_rotl32(v2, 13); + v2 *= PRIME32_1; + p+=4; + v3 += XXH_get32bits(p) * PRIME32_2; + v3 = XXH_rotl32(v3, 13); + v3 *= PRIME32_1; + p+=4; + v4 += XXH_get32bits(p) * PRIME32_2; + v4 = XXH_rotl32(v4, 13); + v4 *= PRIME32_1; + p+=4; + } + while (p<=limit); + + h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18); + } + else + { + h32 = seed + PRIME32_5; + } + + h32 += (U32) len; + + while (p+4<=bEnd) + { + h32 += XXH_get32bits(p) * PRIME32_3; + h32 = XXH_rotl32(h32, 17) * PRIME32_4 ; + p+=4; + } + + while (p<bEnd) + { + h32 += (*p) * PRIME32_5; + h32 = XXH_rotl32(h32, 11) * PRIME32_1 ; + p++; + } + + h32 ^= h32 >> 15; + h32 *= PRIME32_2; + h32 ^= h32 >> 13; + h32 *= PRIME32_3; + h32 ^= h32 >> 16; + + return h32; +} + + +XXH_PUBLIC_API unsigned int XXH32 (const void* input, size_t len, unsigned int seed) +{ +#if 0 + /* Simple version, good for code maintenance, but unfortunately slow for small inputs */ + XXH32_CREATESTATE_STATIC(state); + XXH32_reset(state, seed); + XXH32_update(state, input, len); + return XXH32_digest(state); +#else + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + +# if !defined(XXH_USELESS_ALIGN_BRANCH) + if ((((size_t)input) & 3) == 0) /* Input is 4-bytes aligned, leverage the speed benefit */ + { + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned); + else + return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned); + } +# endif + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned); + else + return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned); +#endif +} + +FORCE_INLINE U64 XXH64_endian_align(const void* input, size_t len, U64 seed, XXH_endianess endian, XXH_alignment align) +{ + const BYTE* p = (const BYTE*)input; + const BYTE* bEnd = p + len; + U64 h64; +#define XXH_get64bits(p) XXH_readLE64_align(p, endian, align) + +#ifdef XXH_ACCEPT_NULL_INPUT_POINTER + if (p==NULL) + { + len=0; + bEnd=p=(const BYTE*)(size_t)32; + } +#endif + + if (len>=32) + { + const BYTE* const limit = bEnd - 32; + U64 v1 = seed + PRIME64_1 + PRIME64_2; + U64 v2 = seed + PRIME64_2; + U64 v3 = seed + 0; + U64 v4 = seed - PRIME64_1; + + do + { + v1 += XXH_get64bits(p) * PRIME64_2; + p+=8; + v1 = XXH_rotl64(v1, 31); + v1 *= PRIME64_1; + v2 += XXH_get64bits(p) * PRIME64_2; + p+=8; + v2 = XXH_rotl64(v2, 31); + v2 *= PRIME64_1; + v3 += XXH_get64bits(p) * PRIME64_2; + p+=8; + v3 = XXH_rotl64(v3, 31); + v3 *= PRIME64_1; + v4 += XXH_get64bits(p) * PRIME64_2; + p+=8; + v4 = XXH_rotl64(v4, 31); + v4 *= PRIME64_1; + } + while (p<=limit); + + h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18); + + v1 *= PRIME64_2; + v1 = XXH_rotl64(v1, 31); + v1 *= PRIME64_1; + h64 ^= v1; + h64 = h64 * PRIME64_1 + PRIME64_4; + + v2 *= PRIME64_2; + v2 = XXH_rotl64(v2, 31); + v2 *= PRIME64_1; + h64 ^= v2; + h64 = h64 * PRIME64_1 + PRIME64_4; + + v3 *= PRIME64_2; + v3 = XXH_rotl64(v3, 31); + v3 *= PRIME64_1; + h64 ^= v3; + h64 = h64 * PRIME64_1 + PRIME64_4; + + v4 *= PRIME64_2; + v4 = XXH_rotl64(v4, 31); + v4 *= PRIME64_1; + h64 ^= v4; + h64 = h64 * PRIME64_1 + PRIME64_4; + } + else + { + h64 = seed + PRIME64_5; + } + + h64 += (U64) len; + + while (p+8<=bEnd) + { + U64 k1 = XXH_get64bits(p); + k1 *= PRIME64_2; + k1 = XXH_rotl64(k1,31); + k1 *= PRIME64_1; + h64 ^= k1; + h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; + p+=8; + } + + if (p+4<=bEnd) + { + h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1; + h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; + p+=4; + } + + while (p<bEnd) + { + h64 ^= (*p) * PRIME64_5; + h64 = XXH_rotl64(h64, 11) * PRIME64_1; + p++; + } + + h64 ^= h64 >> 33; + h64 *= PRIME64_2; + h64 ^= h64 >> 29; + h64 *= PRIME64_3; + h64 ^= h64 >> 32; + + return h64; +} + + +XXH_PUBLIC_API unsigned long long XXH64 (const void* input, size_t len, unsigned long long seed) +{ +#if 0 + /* Simple version, good for code maintenance, but unfortunately slow for small inputs */ + XXH64_CREATESTATE_STATIC(state); + XXH64_reset(state, seed); + XXH64_update(state, input, len); + return XXH64_digest(state); +#else + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + +# if !defined(XXH_USELESS_ALIGN_BRANCH) + if ((((size_t)input) & 7)==0) /* Input is aligned, let's leverage the speed advantage */ + { + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned); + else + return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned); + } +# endif + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned); + else + return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned); +#endif +} + +/* ************************************************** +* Advanced Hash Functions +****************************************************/ + +/*** Allocation ***/ +struct XXH32_state_s +{ + U64 total_len; + U32 seed; + U32 v1; + U32 v2; + U32 v3; + U32 v4; + U32 mem32[4]; /* defined as U32 for alignment */ + U32 memsize; +}; /* typedef'd to XXH32_state_t within xxhash.h */ + +struct XXH64_state_s +{ + U64 total_len; + U64 seed; + U64 v1; + U64 v2; + U64 v3; + U64 v4; + U64 mem64[4]; /* defined as U64 for alignment */ + U32 memsize; +}; /* typedef'd to XXH64_state_t within xxhash.h */ + + +XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void) +{ + XXH_STATIC_ASSERT(sizeof(XXH32_stateBody_t) >= sizeof(XXH32_state_t)); /* A compilation error here means XXH32_state_t is not large enough */ + return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t)); +} +XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr) +{ + XXH_free(statePtr); + return XXH_OK; +} + +XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void) +{ + XXH_STATIC_ASSERT(sizeof(XXH64_stateBody_t) >= sizeof(XXH64_state_t)); /* A compilation error here means XXH64_state_t is not large enough */ + return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t)); +} +XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr) +{ + XXH_free(statePtr); + return XXH_OK; +} + + +/*** Hash feed ***/ + +XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, unsigned int seed) +{ + XXH32_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */ + memset(&state, 0, sizeof(state)); + state.seed = seed; + state.v1 = seed + PRIME32_1 + PRIME32_2; + state.v2 = seed + PRIME32_2; + state.v3 = seed + 0; + state.v4 = seed - PRIME32_1; + memcpy(statePtr, &state, sizeof(state)); + return XXH_OK; +} + + +XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, unsigned long long seed) +{ + XXH64_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */ + memset(&state, 0, sizeof(state)); + state.seed = seed; + state.v1 = seed + PRIME64_1 + PRIME64_2; + state.v2 = seed + PRIME64_2; + state.v3 = seed + 0; + state.v4 = seed - PRIME64_1; + memcpy(statePtr, &state, sizeof(state)); + return XXH_OK; +} + + +FORCE_INLINE XXH_errorcode XXH32_update_endian (XXH32_state_t* state, const void* input, size_t len, XXH_endianess endian) +{ + const BYTE* p = (const BYTE*)input; + const BYTE* const bEnd = p + len; + +#ifdef XXH_ACCEPT_NULL_INPUT_POINTER + if (input==NULL) return XXH_ERROR; +#endif + + state->total_len += len; + + if (state->memsize + len < 16) /* fill in tmp buffer */ + { + XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, len); + state->memsize += (U32)len; + return XXH_OK; + } + + if (state->memsize) /* some data left from previous update */ + { + XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, 16-state->memsize); + { + const U32* p32 = state->mem32; + state->v1 += XXH_readLE32(p32, endian) * PRIME32_2; + state->v1 = XXH_rotl32(state->v1, 13); + state->v1 *= PRIME32_1; + p32++; + state->v2 += XXH_readLE32(p32, endian) * PRIME32_2; + state->v2 = XXH_rotl32(state->v2, 13); + state->v2 *= PRIME32_1; + p32++; + state->v3 += XXH_readLE32(p32, endian) * PRIME32_2; + state->v3 = XXH_rotl32(state->v3, 13); + state->v3 *= PRIME32_1; + p32++; + state->v4 += XXH_readLE32(p32, endian) * PRIME32_2; + state->v4 = XXH_rotl32(state->v4, 13); + state->v4 *= PRIME32_1; + p32++; + } + p += 16-state->memsize; + state->memsize = 0; + } + + if (p <= bEnd-16) + { + const BYTE* const limit = bEnd - 16; + U32 v1 = state->v1; + U32 v2 = state->v2; + U32 v3 = state->v3; + U32 v4 = state->v4; + + do + { + v1 += XXH_readLE32(p, endian) * PRIME32_2; + v1 = XXH_rotl32(v1, 13); + v1 *= PRIME32_1; + p+=4; + v2 += XXH_readLE32(p, endian) * PRIME32_2; + v2 = XXH_rotl32(v2, 13); + v2 *= PRIME32_1; + p+=4; + v3 += XXH_readLE32(p, endian) * PRIME32_2; + v3 = XXH_rotl32(v3, 13); + v3 *= PRIME32_1; + p+=4; + v4 += XXH_readLE32(p, endian) * PRIME32_2; + v4 = XXH_rotl32(v4, 13); + v4 *= PRIME32_1; + p+=4; + } + while (p<=limit); + + state->v1 = v1; + state->v2 = v2; + state->v3 = v3; + state->v4 = v4; + } + + if (p < bEnd) + { + XXH_memcpy(state->mem32, p, bEnd-p); + state->memsize = (int)(bEnd-p); + } + + return XXH_OK; +} + +XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void* input, size_t len) +{ + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH32_update_endian(state_in, input, len, XXH_littleEndian); + else + return XXH32_update_endian(state_in, input, len, XXH_bigEndian); +} + + + +FORCE_INLINE U32 XXH32_digest_endian (const XXH32_state_t* state, XXH_endianess endian) +{ + const BYTE * p = (const BYTE*)state->mem32; + const BYTE* bEnd = (const BYTE*)(state->mem32) + state->memsize; + U32 h32; + + if (state->total_len >= 16) + { + h32 = XXH_rotl32(state->v1, 1) + XXH_rotl32(state->v2, 7) + XXH_rotl32(state->v3, 12) + XXH_rotl32(state->v4, 18); + } + else + { + h32 = state->seed + PRIME32_5; + } + + h32 += (U32) state->total_len; + + while (p+4<=bEnd) + { + h32 += XXH_readLE32(p, endian) * PRIME32_3; + h32 = XXH_rotl32(h32, 17) * PRIME32_4; + p+=4; + } + + while (p<bEnd) + { + h32 += (*p) * PRIME32_5; + h32 = XXH_rotl32(h32, 11) * PRIME32_1; + p++; + } + + h32 ^= h32 >> 15; + h32 *= PRIME32_2; + h32 ^= h32 >> 13; + h32 *= PRIME32_3; + h32 ^= h32 >> 16; + + return h32; +} + + +XXH_PUBLIC_API unsigned int XXH32_digest (const XXH32_state_t* state_in) +{ + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH32_digest_endian(state_in, XXH_littleEndian); + else + return XXH32_digest_endian(state_in, XXH_bigEndian); +} + + + +/* **** XXH64 **** */ + +FORCE_INLINE XXH_errorcode XXH64_update_endian (XXH64_state_t* state, const void* input, size_t len, XXH_endianess endian) +{ + const BYTE* p = (const BYTE*)input; + const BYTE* const bEnd = p + len; + +#ifdef XXH_ACCEPT_NULL_INPUT_POINTER + if (input==NULL) return XXH_ERROR; +#endif + + state->total_len += len; + + if (state->memsize + len < 32) /* fill in tmp buffer */ + { + XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len); + state->memsize += (U32)len; + return XXH_OK; + } + + if (state->memsize) /* some data left from previous update */ + { + XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, 32-state->memsize); + { + const U64* p64 = state->mem64; + state->v1 += XXH_readLE64(p64, endian) * PRIME64_2; + state->v1 = XXH_rotl64(state->v1, 31); + state->v1 *= PRIME64_1; + p64++; + state->v2 += XXH_readLE64(p64, endian) * PRIME64_2; + state->v2 = XXH_rotl64(state->v2, 31); + state->v2 *= PRIME64_1; + p64++; + state->v3 += XXH_readLE64(p64, endian) * PRIME64_2; + state->v3 = XXH_rotl64(state->v3, 31); + state->v3 *= PRIME64_1; + p64++; + state->v4 += XXH_readLE64(p64, endian) * PRIME64_2; + state->v4 = XXH_rotl64(state->v4, 31); + state->v4 *= PRIME64_1; + p64++; + } + p += 32-state->memsize; + state->memsize = 0; + } + + if (p+32 <= bEnd) + { + const BYTE* const limit = bEnd - 32; + U64 v1 = state->v1; + U64 v2 = state->v2; + U64 v3 = state->v3; + U64 v4 = state->v4; + + do + { + v1 += XXH_readLE64(p, endian) * PRIME64_2; + v1 = XXH_rotl64(v1, 31); + v1 *= PRIME64_1; + p+=8; + v2 += XXH_readLE64(p, endian) * PRIME64_2; + v2 = XXH_rotl64(v2, 31); + v2 *= PRIME64_1; + p+=8; + v3 += XXH_readLE64(p, endian) * PRIME64_2; + v3 = XXH_rotl64(v3, 31); + v3 *= PRIME64_1; + p+=8; + v4 += XXH_readLE64(p, endian) * PRIME64_2; + v4 = XXH_rotl64(v4, 31); + v4 *= PRIME64_1; + p+=8; + } + while (p<=limit); + + state->v1 = v1; + state->v2 = v2; + state->v3 = v3; + state->v4 = v4; + } + + if (p < bEnd) + { + XXH_memcpy(state->mem64, p, bEnd-p); + state->memsize = (int)(bEnd-p); + } + + return XXH_OK; +} + +XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* state_in, const void* input, size_t len) +{ + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH64_update_endian(state_in, input, len, XXH_littleEndian); + else + return XXH64_update_endian(state_in, input, len, XXH_bigEndian); +} + + + +FORCE_INLINE U64 XXH64_digest_endian (const XXH64_state_t* state, XXH_endianess endian) +{ + const BYTE * p = (const BYTE*)state->mem64; + const BYTE* bEnd = (const BYTE*)state->mem64 + state->memsize; + U64 h64; + + if (state->total_len >= 32) + { + U64 v1 = state->v1; + U64 v2 = state->v2; + U64 v3 = state->v3; + U64 v4 = state->v4; + + h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18); + + v1 *= PRIME64_2; + v1 = XXH_rotl64(v1, 31); + v1 *= PRIME64_1; + h64 ^= v1; + h64 = h64*PRIME64_1 + PRIME64_4; + + v2 *= PRIME64_2; + v2 = XXH_rotl64(v2, 31); + v2 *= PRIME64_1; + h64 ^= v2; + h64 = h64*PRIME64_1 + PRIME64_4; + + v3 *= PRIME64_2; + v3 = XXH_rotl64(v3, 31); + v3 *= PRIME64_1; + h64 ^= v3; + h64 = h64*PRIME64_1 + PRIME64_4; + + v4 *= PRIME64_2; + v4 = XXH_rotl64(v4, 31); + v4 *= PRIME64_1; + h64 ^= v4; + h64 = h64*PRIME64_1 + PRIME64_4; + } + else + { + h64 = state->seed + PRIME64_5; + } + + h64 += (U64) state->total_len; + + while (p+8<=bEnd) + { + U64 k1 = XXH_readLE64(p, endian); + k1 *= PRIME64_2; + k1 = XXH_rotl64(k1,31); + k1 *= PRIME64_1; + h64 ^= k1; + h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; + p+=8; + } + + if (p+4<=bEnd) + { + h64 ^= (U64)(XXH_readLE32(p, endian)) * PRIME64_1; + h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; + p+=4; + } + + while (p<bEnd) + { + h64 ^= (*p) * PRIME64_5; + h64 = XXH_rotl64(h64, 11) * PRIME64_1; + p++; + } + + h64 ^= h64 >> 33; + h64 *= PRIME64_2; + h64 ^= h64 >> 29; + h64 *= PRIME64_3; + h64 ^= h64 >> 32; + + return h64; +} + + +XXH_PUBLIC_API unsigned long long XXH64_digest (const XXH64_state_t* state_in) +{ + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH64_digest_endian(state_in, XXH_littleEndian); + else + return XXH64_digest_endian(state_in, XXH_bigEndian); +} + + +/* ************************** +* Canonical representation +****************************/ + +/*! Default XXH result types are basic unsigned 32 and 64 bits. +* The canonical representation follows human-readable write convention, aka big-endian (large digits first). +* These functions allow transformation of hash result into and from its canonical format. +* This way, hash values can be written into a file or buffer, and remain comparable across different systems and programs. +*/ + +XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash) +{ + XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t)); + if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash); + memcpy(dst, &hash, sizeof(*dst)); +} + +XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash) +{ + XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t)); + if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash); + memcpy(dst, &hash, sizeof(*dst)); +} + +XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src) +{ + return XXH_readBE32(src); +} + +XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src) +{ + return XXH_readBE64(src); +} + diff --git a/src/xxHash/xxhash.h b/src/xxHash/xxhash.h new file mode 100644 index 00000000..ed7ae141 --- /dev/null +++ b/src/xxHash/xxhash.h @@ -0,0 +1,255 @@ +/* + xxHash - Extremely Fast Hash algorithm + Header File + Copyright (C) 2012-2016, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - xxHash source repository : https://github.com/Cyan4973/xxHash +*/ + +/* Notice extracted from xxHash homepage : + +xxHash is an extremely fast Hash algorithm, running at RAM speed limits. +It also successfully passes all tests from the SMHasher suite. + +Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz) + +Name Speed Q.Score Author +xxHash 5.4 GB/s 10 +CrapWow 3.2 GB/s 2 Andrew +MumurHash 3a 2.7 GB/s 10 Austin Appleby +SpookyHash 2.0 GB/s 10 Bob Jenkins +SBox 1.4 GB/s 9 Bret Mulvey +Lookup3 1.2 GB/s 9 Bob Jenkins +SuperFastHash 1.2 GB/s 1 Paul Hsieh +CityHash64 1.05 GB/s 10 Pike & Alakuijala +FNV 0.55 GB/s 5 Fowler, Noll, Vo +CRC32 0.43 GB/s 9 +MD5-32 0.33 GB/s 10 Ronald L. Rivest +SHA1-32 0.28 GB/s 10 + +Q.Score is a measure of quality of the hash function. +It depends on successfully passing SMHasher test set. +10 is a perfect score. + +A 64-bits version, named XXH64, is available since r35. +It offers much better speed, but for 64-bits applications only. +Name Speed on 64 bits Speed on 32 bits +XXH64 13.8 GB/s 1.9 GB/s +XXH32 6.8 GB/s 6.0 GB/s +*/ + +#ifndef XXHASH_H_5627135585666179 +#define XXHASH_H_5627135585666179 1 + +#if defined (__cplusplus) +extern "C" { +#endif + + +/* **************************** +* Definitions +******************************/ +#include <stddef.h> /* size_t */ +typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode; + + +/* **************************** +* API modifier +******************************/ +/*!XXH_PRIVATE_API +* Transforms all publics symbols within `xxhash.c` into private ones. +* Methodology : +* instead of : #include "xxhash.h" +* do : +* #define XXH_PRIVATE_API +* #include "xxhash.c" // note the .c , instead of .h +* also : don't compile and link xxhash.c separately +*/ +#ifdef XXH_PRIVATE_API +# if defined(__GNUC__) +# define XXH_PUBLIC_API static __attribute__((unused)) +# elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +# define XXH_PUBLIC_API static inline +# elif defined(_MSC_VER) +# define XXH_PUBLIC_API static __inline +# else +# define XXH_PUBLIC_API static /* this version may generate warnings for unused static functions; disable the relevant warning */ +# endif +#else +# define XXH_PUBLIC_API /* do nothing */ +#endif + +/*!XXH_NAMESPACE, aka Namespace Emulation : + +If you want to include _and expose_ xxHash functions from within your own library, +but also want to avoid symbol collisions with another library which also includes xxHash, + +you can use XXH_NAMESPACE, to automatically prefix any public symbol from `xxhash.c` +with the value of XXH_NAMESPACE (so avoid to keep it NULL and avoid numeric values). + +Note that no change is required within the calling program as long as it also includes `xxhash.h` : +regular symbol name will be automatically translated by this header. +*/ +#ifdef XXH_NAMESPACE +# define XXH_CAT(A,B) A##B +# define XXH_NAME2(A,B) XXH_CAT(A,B) +# define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32) +# define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64) +# define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber) +# define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState) +# define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState) +# define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState) +# define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState) +# define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset) +# define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset) +# define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update) +# define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update) +# define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest) +# define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest) +#endif + + +/* ************************************* +* Version +***************************************/ +#define XXH_VERSION_MAJOR 0 +#define XXH_VERSION_MINOR 5 +#define XXH_VERSION_RELEASE 1 +#define XXH_VERSION_NUMBER (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE) +XXH_PUBLIC_API unsigned XXH_versionNumber (void); + + +/* **************************** +* Simple Hash Functions +******************************/ +typedef unsigned int XXH32_hash_t; +typedef unsigned long long XXH64_hash_t; + +XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, unsigned int seed); +XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t length, unsigned long long seed); + +/*! +XXH32() : + Calculate the 32-bits hash of sequence "length" bytes stored at memory address "input". + The memory between input & input+length must be valid (allocated and read-accessible). + "seed" can be used to alter the result predictably. + Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s +XXH64() : + Calculate the 64-bits hash of sequence of length "len" stored at memory address "input". + "seed" can be used to alter the result predictably. + This function runs faster on 64-bits systems, but slower on 32-bits systems (see benchmark). +*/ + + +/* **************************** +* Streaming Hash Functions +******************************/ +typedef struct XXH32_state_s XXH32_state_t; /* incomplete type */ +typedef struct XXH64_state_s XXH64_state_t; /* incomplete type */ + + +/*! Static allocation + For static linking only, do not use in the context of DLL ! + XXHnn_CREATESTATE_STATIC(name); + is static-allocation equivalent of : + XXHnn_state_t* name = XXHnn_createState(); +*/ +typedef struct { long long ll[ 6]; } XXH32_stateBody_t; +typedef struct { long long ll[11]; } XXH64_stateBody_t; + +#define XXH32_CREATESTATE_STATIC(name) XXH32_stateBody_t name##xxhbody; void* name##xxhvoid = &(name##xxhbody); XXH32_state_t* name = (XXH32_state_t*)(name##xxhvoid) /* no final ; */ +#define XXH64_CREATESTATE_STATIC(name) XXH64_stateBody_t name##xxhbody; void* name##xxhvoid = &(name##xxhbody); XXH64_state_t* name = (XXH64_state_t*)(name##xxhvoid) /* no final ; */ + + +/*!Dynamic allocation + To be preferred in the context of DLL */ + +XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void); +XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr); + +XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void); +XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr); + + +/* hash streaming */ + +XXH_PUBLIC_API XXH_errorcode XXH32_reset (XXH32_state_t* statePtr, unsigned int seed); +XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length); +XXH_PUBLIC_API XXH32_hash_t XXH32_digest (const XXH32_state_t* statePtr); + +XXH_PUBLIC_API XXH_errorcode XXH64_reset (XXH64_state_t* statePtr, unsigned long long seed); +XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length); +XXH_PUBLIC_API XXH64_hash_t XXH64_digest (const XXH64_state_t* statePtr); + +/*! +These functions generate the xxHash of an input provided in multiple segments, +as opposed to provided as a single block. + +XXH state must first be allocated, using either static or dynamic method provided above. + +Start a new hash by initializing state with a seed, using XXHnn_reset(). + +Then, feed the hash state by calling XXHnn_update() as many times as necessary. +Obviously, input must be valid, hence allocated and read accessible. +The function returns an error code, with 0 meaning OK, and any other value meaning there is an error. + +Finally, a hash value can be produced anytime, by using XXHnn_digest(). +This function returns the nn-bits hash as an int or long long. + +It's still possible to continue inserting input into the hash state after a digest, +and later on generate some new hashes, by calling again XXHnn_digest(). + +When done, free XXH state space if it was allocated dynamically. +*/ + + +/* ************************** +* Canonical representation +****************************/ +typedef struct { unsigned char digest[4]; } XXH32_canonical_t; +typedef struct { unsigned char digest[8]; } XXH64_canonical_t; + +XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash); +XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash); + +XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src); +XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src); + +/*! Default result type for XXH functions are primitive unsigned 32 and 64 bits. +* The canonical representation uses human-readable write convention, aka big-endian (large digits first). +* These functions allow transformation of hash result into and from its canonical format. +* This way, hash values can be written into a file / memory, and remain comparable on different systems and programs. +*/ + + +#if defined (__cplusplus) +} +#endif + +#endif /* XXHASH_H_5627135585666179 */ diff --git a/src/xxHash/xxhsum.1 b/src/xxHash/xxhsum.1 new file mode 100644 index 00000000..7d1027d3 --- /dev/null +++ b/src/xxHash/xxhsum.1 @@ -0,0 +1,115 @@ +. +.TH "XXHSUM" "1" "February 2016" "xxhsum 0.5.0" "User Commands" +. +.SH "NAME" +\fBxxhsum\fR \- print or check xxHash non\-cryptographic checksums +. +.SH "SYNOPSIS" +\fBxxhsum\fR [\fIOPTION\fR] \.\.\. [\fIFILE\fR] \.\.\. +. +.SH "DESCRIPTION" +Print or check xxHash (32 or 64bit) checksums\. When \fIFILE\fR is \fB\-\fR, read standard input\. +. +.P +\fBxxhsum\fR supports a command line syntax similar but not indentical to md5sum(1)\. Differences are: \fBxxhsum\fR doesn\'t have text/binary mode switch (\fB\-b\fR, \fB\-t\fR); \fBxxhsum\fR always treats file as binary file; \fBxxhsum\fR has hash bit width switch (\fB\-H\fR); +. +.P +Since xxHash is non\-cryptographic checksum algorithm, \fBxxhsum\fR should not be used any more for security related purposes\. +. +.SH "OPTIONS" +. +.TP +\fB\-b\fR +Benchmark mode +. +.TP +\fB\-B\fR\fIBLOCKSIZE\fR +\fIBLOCKSIZE\fR specifies benchmark mode\'s test data block size in bytes\. Default value is 102400 +. +.TP +\fB\-c\fR, \fB\-\-check\fR +Read xxHash sums from the \fIFILE\fRs and check them +. +.TP +\fB\-h\fR, \fB\-\-help\fR +Display help and exit +. +.TP +\fB\-H\fR\fIHASHTYPE\fR +Hash selection\. \fIHASHTYPE\fR means \fB0\fR=32bits, \fB1\fR=64bits\. Default value is \fB1\fR (64bits) +. +.TP +\fB\-\-little\-endian\fR +Set output hexadecimal checksum value as little endian convention\. By default, value is displayed as big endian +. +.TP +\fB\-V\fR, \fB\-\-version\fR +Display xxhsum version +. +.P +\fBThe following four options are useful only when verifying checksums (\fB\-c\fR)\fR +. +.TP +\fB\-\-quiet\fR +Exit non\-zero for improperly formatted checksum lines +. +.TP +\fB\-\-strict\fR +Don\'t print OK for each successfully verified file +. +.TP +\fB\-\-status\fR +Don\'t output anything, status code shows success +. +.TP +\fB\-w\fR, \fB\-\-warn\fR +Warn about improperly formatted checksum lines +. +.SH "EXIT STATUS" +\fBxxhsum\fR exit \fB0\fR on success, \fB1\fR if at least one file couldn\'t be read or doesn\'t have the same checksum as the \fB\-c\fR option\. +. +.SH "EXAMPLES" +Output xxHash (64bit) checksum values of specific files to standard output +. +.IP "" 4 +. +.nf + +$ xxhsum \-H1 foo bar baz +. +.fi +. +.IP "" 0 +. +.P +Output xxHash (32bit and 64bit) checksum values of specific files to standard output, and redirect it to \fBxyz\.xxh32\fR and \fBqux\.xxh64\fR +. +.IP "" 4 +. +.nf + +$ xxhsum \-H0 foo bar baz > xyz\.xxh32 +$ xxhsum \-H1 foo bar baz > qux\.xxh64 +. +.fi +. +.IP "" 0 +. +.P +Read xxHash sums from specific files and check them +. +.IP "" 4 +. +.nf + +$ xxhsum \-c xyz\.xxh32 qux\.xxh64 +. +.fi +. +.IP "" 0 +. +.SH "BUGS" +Report bugs at: https://github\.com/Cyan4973/xxHash/issues/ +. +.SH "AUTHOR" +Yann Collet diff --git a/src/xxHash/xxhsum.1.md b/src/xxHash/xxhsum.1.md new file mode 100644 index 00000000..426ceef2 --- /dev/null +++ b/src/xxHash/xxhsum.1.md @@ -0,0 +1,119 @@ +xxhsum(1) -- print or check xxHash non-cryptographic checksums +============================================================== + +SYNOPSIS +-------- + +`xxhsum` [<OPTION>] ... [<FILE>] ...<br/> +`xxhsum -b` [<OPTION>] ... + +DESCRIPTION +----------- + +Print or check xxHash (32 or 64bit) checksums. When <FILE> is `-`, read +standard input. + +`xxhsum` supports a command line syntax similar but not indentical to +md5sum(1). Differences are: `xxhsum` doesn't have text/binary mode switch +(`-b`, `-t`); `xxhsum` always treats file as binary file; `xxhsum` has hash +bit width switch (`-H`); + +Since xxHash is non-cryptographic checksum algorithm, `xxhsum` should not be +used any more for security related purposes. + +`xxhsum -b` invokes benchmark mode. See [OPTIONS](#OPTIONS) and [EXAMPLES](#EXAMPLES) for details. + +OPTIONS +------- + +* `-c`, `--check`: + Read xxHash sums from the <FILE>s and check them + +* `-h`, `--help`: + Display help and exit + +* `-H`<HASHTYPE>: + Hash selection. <HASHTYPE> means `0`=32bits, `1`=64bits. + Default value is `1` (64bits) + +* `--little-endian`: + Set output hexadecimal checksum value as little endian convention. + By default, value is displayed as big endian + +* `-V`, `--version`: + Display xxhsum version + +**The following four options are useful only when verifying checksums (`-c`)** + +* `--quiet`: + Exit non-zero for improperly formatted checksum lines + +* `--strict`: + Don't print OK for each successfully verified file + +* `--status`: + Don't output anything, status code shows success + +* `-w`, `--warn`: + Warn about improperly formatted checksum lines + +**The following options are useful only benchmark purpose** + +* `-b`: + Benchmark mode. See [EXAMPLES](#EXAMPLES) for details. + +* `-B`<BLOCKSIZE>: + Only useful for benchmark mode (`-b`). See [EXAMPLES](#EXAMPLES) for details. + <BLOCKSIZE> specifies benchmark mode's test data block size in bytes. + Default value is 102400 + +* `-i`<ITERATIONS>: + Only useful for benchmark mode (`-b`). See [EXAMPLES](#EXAMPLES) for details. + <ITERATIONS> specifies number of iterations in benchmark. Single iteration + takes at least 2500 milliseconds. Default value is 3 + +EXIT STATUS +----------- + +`xxhsum` exit `0` on success, `1` if at least one file couldn't be read or +doesn't have the same checksum as the `-c` option. + +EXAMPLES +-------- + +Output xxHash (64bit) checksum values of specific files to standard output + + $ xxhsum -H1 foo bar baz + +Output xxHash (32bit and 64bit) checksum values of specific files to standard +output, and redirect it to `xyz.xxh32` and `qux.xxh64` + + $ xxhsum -H0 foo bar baz > xyz.xxh32 + $ xxhsum -H1 foo bar baz > qux.xxh64 + +Read xxHash sums from specific files and check them + + $ xxhsum -c xyz.xxh32 qux.xxh64 + +Benchmark xxHash algorithm for 16384 bytes data in 10 times. `xxhsum` +benchmarks xxHash algorithm for 32-bit and 64-bit and output results to +standard output. First column means algorithm, second column is source data +size in bytes, last column means hash generation speed in mega-bytes per +seconds. + + $ xxhsum -b -i10 -B16384 + +BUGS +---- + +Report bugs at: https://github.com/Cyan4973/xxHash/issues/ + +AUTHOR +------ + +Yann Collet + +SEE ALSO +-------- + +md5sum(1) diff --git a/src/xxHash/xxhsum.c b/src/xxHash/xxhsum.c new file mode 100644 index 00000000..8667596b --- /dev/null +++ b/src/xxHash/xxhsum.c @@ -0,0 +1,1391 @@ +/* +xxhsum - Command line interface for xxhash algorithms +Copyright (C) Yann Collet 2012-2016 + +GPL v2 License + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +You can contact the author at : +- xxHash source repository : https://github.com/Cyan4973/xxHash +*/ + +/*! xxhsum + * Provides hash value of a file content, or a list of files, or stdin + * Display convention is Big Endian, for both 32 and 64 bits algorithms + */ + + +/* ************************************ +* Compiler Options +**************************************/ +/* MS Visual */ +#if defined(_MSC_VER) || defined(_WIN32) +# define _CRT_SECURE_NO_WARNINGS /* removes visual warnings */ +# define BMK_LEGACY_TIMER 1 /* gettimeofday() not supported by MSVC */ +#endif + +/* Under Linux at least, pull in the *64 commands */ +#ifndef _LARGEFILE64_SOURCE +# define _LARGEFILE64_SOURCE +#endif + + +/* ************************************ +* Includes +**************************************/ +#include <stdlib.h> /* malloc */ +#include <stdio.h> /* fprintf, fopen, ftello64, fread, stdin, stdout; when present : _fileno */ +#include <string.h> /* strcmp */ +#include <sys/types.h> /* stat64 */ +#include <sys/stat.h> /* stat64 */ + +#if defined(XXHSUM_INCLUDE_XXHC) /* for tests */ +# define XXH_PRIVATE_API +# include "xxhash.c" +#else +# include "xxhash.h" +#endif + + +/* ************************************ +* OS-Specific Includes +**************************************/ +/*!Use ftime() if gettimeofday() is not available on your target */ +#if defined(BMK_LEGACY_TIMER) +# include <sys/timeb.h> /* timeb, ftime */ +#else +# include <sys/time.h> /* gettimeofday */ +#endif + +#if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(_WIN32) || defined(__CYGWIN__) +# include <fcntl.h> /* _O_BINARY */ +# include <io.h> /* _setmode, _isatty */ +# ifdef __MINGW32__ + int _fileno(FILE *stream); /* MINGW somehow forgets to include this windows declaration into <stdio.h> */ +# endif +# define SET_BINARY_MODE(file) _setmode(_fileno(file), _O_BINARY) +# define IS_CONSOLE(stdStream) _isatty(_fileno(stdStream)) +#else +# include <unistd.h> /* isatty, STDIN_FILENO */ +# define SET_BINARY_MODE(file) +# define IS_CONSOLE(stdStream) isatty(STDIN_FILENO) +#endif + +#if !defined(S_ISREG) +# define S_ISREG(x) (((x) & S_IFMT) == S_IFREG) +#endif + + +/* ************************************ +* Basic Types +**************************************/ +#ifndef MEM_MODULE +# define MEM_MODULE +# if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ +# include <stdint.h> + typedef uint8_t BYTE; + typedef uint16_t U16; + typedef uint32_t U32; + typedef int32_t S32; + typedef uint64_t U64; +# else + typedef unsigned char BYTE; + typedef unsigned short U16; + typedef unsigned int U32; + typedef signed int S32; + typedef unsigned long long U64; +# endif +#endif + +static unsigned BMK_isLittleEndian(void) +{ + const union { U32 i; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */ + return one.c[0]; +} + + +/* ************************************* +* Constants +***************************************/ +#define PROGRAM_NAME exename +#define LIB_VERSION XXH_VERSION_MAJOR.XXH_VERSION_MINOR.XXH_VERSION_RELEASE +#define QUOTE(str) #str +#define EXPAND_AND_QUOTE(str) QUOTE(str) +#define PROGRAM_VERSION EXPAND_AND_QUOTE(LIB_VERSION) +static const int g_nbBits = (int)(sizeof(void*)*8); +static const char g_lename[] = "little endian"; +static const char g_bename[] = "big endian"; +#define ENDIAN_NAME (BMK_isLittleEndian() ? g_lename : g_bename) +#define COMPILED __DATE__ +static const char author[] = "Yann Collet"; +#define WELCOME_MESSAGE "%s %s (%i-bits %s), by %s (%s) \n", PROGRAM_NAME, PROGRAM_VERSION, g_nbBits, ENDIAN_NAME, author, COMPILED + +#define NBLOOPS 3 /* Default number of benchmark iterations */ +#define TIMELOOP 2500 /* Minimum timing per iteration */ +#define XXHSUM32_DEFAULT_SEED 0 /* Default seed for algo_xxh32 */ +#define XXHSUM64_DEFAULT_SEED 0 /* Default seed for algo_xxh64 */ + +#define KB *( 1<<10) +#define MB *( 1<<20) +#define GB *(1U<<30) + +#define MAX_MEM (2 GB - 64 MB) + +static const char stdinName[] = "-"; +typedef enum { algo_xxh32, algo_xxh64 } algoType; +static const algoType g_defaultAlgo = algo_xxh64; /* required within main() & usage() */ + +/* <16 hex char> <SPC> <SPC> <filename> <'\0'> + * '4096' is typical Linux PATH_MAX configuration. */ +#define DEFAULT_LINE_LENGTH (sizeof(XXH64_hash_t) * 2 + 2 + 4096 + 1) + +/* Maximum acceptable line length. */ +#define MAX_LINE_LENGTH (32 KB) + + +/* ************************************ +* Display macros +**************************************/ +#define DISPLAY(...) fprintf(stderr, __VA_ARGS__) +#define DISPLAYRESULT(...) fprintf(stdout, __VA_ARGS__) +#define DISPLAYLEVEL(l, ...) if (g_displayLevel>=l) DISPLAY(__VA_ARGS__); +static unsigned g_displayLevel = 1; + + +/* ************************************ +* Local variables +**************************************/ +static size_t g_sampleSize = 100 KB; +static int g_nbIterations = NBLOOPS; + + +/* ************************************ +* Benchmark Functions +**************************************/ +#if defined(BMK_LEGACY_TIMER) + +static int BMK_GetMilliStart(void) +{ + /* Based on Legacy ftime() + * Rolls over every ~ 12.1 days (0x100000/24/60/60) + * Use GetMilliSpan to correct for rollover */ + struct timeb tb; + int nCount; + ftime( &tb ); + nCount = (int) (tb.millitm + (tb.time & 0xfffff) * 1000); + return nCount; +} + +#else + +static int BMK_GetMilliStart(void) +{ + /* Based on newer gettimeofday() + * Use GetMilliSpan to correct for rollover */ + struct timeval tv; + int nCount; + gettimeofday(&tv, NULL); + nCount = (int) (tv.tv_usec/1000 + (tv.tv_sec & 0xfffff) * 1000); + return nCount; +} + +#endif + +static int BMK_GetMilliSpan( int nTimeStart ) +{ + int nSpan = BMK_GetMilliStart() - nTimeStart; + if ( nSpan < 0 ) + nSpan += 0x100000 * 1000; + return nSpan; +} + + +static size_t BMK_findMaxMem(U64 requiredMem) +{ + size_t step = 64 MB; + BYTE* testmem=NULL; + + requiredMem = (((requiredMem >> 26) + 1) << 26); + requiredMem += 2*step; + if (requiredMem > MAX_MEM) requiredMem = MAX_MEM; + + while (!testmem) + { + if (requiredMem > step) requiredMem -= step; + else requiredMem >>= 1; + testmem = (BYTE*) malloc ((size_t)requiredMem); + } + free (testmem); + + /* keep some space available */ + if (requiredMem > step) requiredMem -= step; + else requiredMem >>= 1; + + return (size_t)requiredMem; +} + + +static U64 BMK_GetFileSize(const char* infilename) +{ + int r; +#if defined(_MSC_VER) + struct _stat64 statbuf; + r = _stat64(infilename, &statbuf); +#else + struct stat statbuf; + r = stat(infilename, &statbuf); +#endif + if (r || !S_ISREG(statbuf.st_mode)) return 0; /* No good... */ + return (U64)statbuf.st_size; +} + +typedef void (*hashFunction)(const void* buffer, size_t bufferSize); + +static void localXXH32(const void* buffer, size_t bufferSize) { XXH32(buffer, bufferSize, 0); } + +static void localXXH64(const void* buffer, size_t bufferSize) { XXH64(buffer, bufferSize, 0); } + +static void BMK_benchHash(hashFunction h, const char* hName, const void* buffer, size_t bufferSize) +{ + static const int nbh_perloop = 100; + int iterationNb; + double fastestH = 100000000.; + + DISPLAY("\r%79s\r", ""); /* Clean display line */ + if (g_nbIterations<1) g_nbIterations=1; + for (iterationNb = 1; iterationNb <= g_nbIterations; iterationNb++) + { + int nbHashes = 0; + int milliTime; + + DISPLAY("%1i-%-17.17s : %10i ->\r", iterationNb, hName, (int)bufferSize); + + /* Timing loop */ + milliTime = BMK_GetMilliStart(); + while(BMK_GetMilliStart() == milliTime); + milliTime = BMK_GetMilliStart(); + while(BMK_GetMilliSpan(milliTime) < TIMELOOP) + { + int i; + for (i=0; i<nbh_perloop; i++) + { + h(buffer, bufferSize); + } + nbHashes += nbh_perloop; + } + milliTime = BMK_GetMilliSpan(milliTime); + if ((double)milliTime < fastestH*nbHashes) fastestH = (double)milliTime/nbHashes; + DISPLAY("%1i-%-17.17s : %10i -> %7.1f MB/s\r", iterationNb, hName, (int)bufferSize, (double)bufferSize / fastestH / 1000.); + } + DISPLAY("%-19.19s : %10i -> %7.1f MB/s \n", hName, (int)bufferSize, (double)bufferSize / fastestH / 1000.); +} + + +/* Note : buffer is supposed malloc'ed, hence aligned */ +static void BMK_benchMem(const void* buffer, size_t bufferSize) +{ + /* XXH32 bench */ + BMK_benchHash(localXXH32, "XXH32", buffer, bufferSize); + + /* Bench XXH32 on Unaligned input */ + if (bufferSize>1) + BMK_benchHash(localXXH32, "XXH32 unaligned", ((const char*)buffer)+1, bufferSize-1); + + /* Bench XXH64 */ + BMK_benchHash(localXXH64, "XXH64", buffer, bufferSize); + + /* Bench XXH64 on Unaligned input */ + if (bufferSize>1) + BMK_benchHash(localXXH64, "XXH64 unaligned", ((const char*)buffer)+1, bufferSize-1); +} + + +static int BMK_benchFiles(const char** fileNamesTable, int nbFiles) +{ + int fileIdx=0; + + while (fileIdx<nbFiles) + { + FILE* inFile; + const char* inFileName; + U64 inFileSize; + size_t benchedSize; + size_t readSize; + char* buffer; + char* alignedBuffer; + + /* Check file existence */ + inFileName = fileNamesTable[fileIdx++]; + inFile = fopen( inFileName, "rb" ); + if ((inFile==NULL) || (inFileName==NULL)) + { + DISPLAY( "Pb opening %s\n", inFileName); + return 11; + } + + /* Memory allocation & restrictions */ + inFileSize = BMK_GetFileSize(inFileName); + benchedSize = (size_t) BMK_findMaxMem(inFileSize); + if ((U64)benchedSize > inFileSize) benchedSize = (size_t)inFileSize; + if (benchedSize < inFileSize) + { + DISPLAY("Not enough memory for '%s' full size; testing %i MB only...\n", inFileName, (int)(benchedSize>>20)); + } + + buffer = (char*)malloc((size_t )benchedSize+16); + if(!buffer) + { + DISPLAY("\nError: not enough memory!\n"); + fclose(inFile); + return 12; + } + alignedBuffer = (buffer+15) - (((size_t)(buffer+15)) & 0xF); /* align on next 16 bytes boundaries */ + + /* Fill input buffer */ + DISPLAY("\rLoading %s... \n", inFileName); + readSize = fread(alignedBuffer, 1, benchedSize, inFile); + fclose(inFile); + + if(readSize != benchedSize) + { + DISPLAY("\nError: problem reading file '%s' !! \n", inFileName); + free(buffer); + return 13; + } + + /* bench */ + BMK_benchMem(alignedBuffer, benchedSize); + + free(buffer); + } + + return 0; +} + + + +static int BMK_benchInternal(void) +{ + const size_t benchedSize = g_sampleSize; + void* buffer; + + buffer = malloc(benchedSize); + if(!buffer) + { + DISPLAY("\nError: not enough memory!\n"); + return 12; + } + + /* bench */ + DISPLAY("\rSample of %u KB... \n", (U32)(benchedSize >> 10)); + BMK_benchMem(buffer, benchedSize); + + free(buffer); + + return 0; +} + + +static void BMK_checkResult(U32 r1, U32 r2) +{ + static int nbTests = 1; + + if (r1==r2) DISPLAY("\rTest%3i : %08X == %08X ok ", nbTests, r1, r2); + else + { + DISPLAY("\rERROR : Test%3i : %08X <> %08X !!!!! \n", nbTests, r1, r2); + exit(1); + } + nbTests++; +} + + +static void BMK_checkResult64(U64 r1, U64 r2) +{ + static int nbTests = 1; + + if (r1!=r2) + { + DISPLAY("\rERROR : Test%3i : 64-bits values non equals !!!!! \n", nbTests); + DISPLAY("\r %08X%08X != %08X%08X \n", (U32)(r1>>32), (U32)r1, (U32)(r2>>32), (U32)r2); + exit(1); + } + nbTests++; +} + + +static void BMK_testSequence64(void* sentence, int len, U64 seed, U64 Nresult) +{ + XXH64_CREATESTATE_STATIC(state); + U64 Dresult; + int index; + + Dresult = XXH64(sentence, len, seed); + BMK_checkResult64(Dresult, Nresult); + + XXH64_reset(state, seed); + XXH64_update(state, sentence, len); + Dresult = XXH64_digest(state); + BMK_checkResult64(Dresult, Nresult); + + XXH64_reset(state, seed); + for (index=0; index<len; index++) XXH64_update(state, ((char*)sentence)+index, 1); + Dresult = XXH64_digest(state); + BMK_checkResult64(Dresult, Nresult); +} + + +static void BMK_testSequence(const void* sequence, size_t len, U32 seed, U32 Nresult) +{ + XXH32_CREATESTATE_STATIC(state); + U32 Dresult; + size_t index; + + Dresult = XXH32(sequence, len, seed); + BMK_checkResult(Dresult, Nresult); + + XXH32_reset(state, seed); + XXH32_update(state, sequence, len); + Dresult = XXH32_digest(state); + BMK_checkResult(Dresult, Nresult); + + XXH32_reset(state, seed); + for (index=0; index<len; index++) XXH32_update(state, ((const char*)sequence)+index, 1); + Dresult = XXH32_digest(state); + BMK_checkResult(Dresult, Nresult); +} + + +#define SANITY_BUFFER_SIZE 101 +static void BMK_sanityCheck(void) +{ + BYTE sanityBuffer[SANITY_BUFFER_SIZE]; + int i; + static const U32 prime = 2654435761U; + U32 byteGen = prime; + + + for (i=0; i<SANITY_BUFFER_SIZE; i++) + { + sanityBuffer[i] = (BYTE)(byteGen>>24); + byteGen *= byteGen; + } + + BMK_testSequence(NULL, 0, 0, 0x02CC5D05); + BMK_testSequence(NULL, 0, prime, 0x36B78AE7); + BMK_testSequence(sanityBuffer, 1, 0, 0xB85CBEE5); + BMK_testSequence(sanityBuffer, 1, prime, 0xD5845D64); + BMK_testSequence(sanityBuffer, 14, 0, 0xE5AA0AB4); + BMK_testSequence(sanityBuffer, 14, prime, 0x4481951D); + BMK_testSequence(sanityBuffer, SANITY_BUFFER_SIZE, 0, 0x1F1AA412); + BMK_testSequence(sanityBuffer, SANITY_BUFFER_SIZE, prime, 0x498EC8E2); + + BMK_testSequence64(NULL , 0, 0, 0xEF46DB3751D8E999ULL); + BMK_testSequence64(NULL , 0, prime, 0xAC75FDA2929B17EFULL); + BMK_testSequence64(sanityBuffer, 1, 0, 0x4FCE394CC88952D8ULL); + BMK_testSequence64(sanityBuffer, 1, prime, 0x739840CB819FA723ULL); + BMK_testSequence64(sanityBuffer, 14, 0, 0xCFFA8DB881BC3A3DULL); + BMK_testSequence64(sanityBuffer, 14, prime, 0x5B9611585EFCC9CBULL); + BMK_testSequence64(sanityBuffer, SANITY_BUFFER_SIZE, 0, 0x0EAB543384F878ADULL); + BMK_testSequence64(sanityBuffer, SANITY_BUFFER_SIZE, prime, 0xCAA65939306F1E21ULL); + + DISPLAY("\r%79s\r", ""); /* Clean display line */ + DISPLAYLEVEL(2, "Sanity check -- all tests ok\n"); +} + + +/* ******************************************************** +* File Hashing +**********************************************************/ + +static void BMK_display_LittleEndian(const void* ptr, size_t length) +{ + const BYTE* p = (const BYTE*)ptr; + size_t index; + for (index=length-1; index<length; index--) /* intentional underflow to negative to detect end */ + DISPLAYRESULT("%02x", p[index]); +} + +static void BMK_display_BigEndian(const void* ptr, size_t length) +{ + const BYTE* p = (const BYTE*)ptr; + size_t index; + for (index=0; index<length; index++) + DISPLAYRESULT("%02x", p[index]); +} + +static void BMK_hashStream(void* xxhHashValue, const algoType hashType, FILE* inFile, void* buffer, size_t blockSize) +{ + XXH64_CREATESTATE_STATIC(state64); + XXH32_CREATESTATE_STATIC(state32); + size_t readSize; + + /* Init */ + XXH32_reset(state32, XXHSUM32_DEFAULT_SEED); + XXH64_reset(state64, XXHSUM64_DEFAULT_SEED); + + /* Load file & update hash */ + readSize = 1; + while (readSize) + { + readSize = fread(buffer, 1, blockSize, inFile); + switch(hashType) + { + case algo_xxh32: + XXH32_update(state32, buffer, readSize); + break; + case algo_xxh64: + XXH64_update(state64, buffer, readSize); + break; + default: + break; + } + } + + switch(hashType) + { + case algo_xxh32: + { + U32 h32 = XXH32_digest(state32); + memcpy(xxhHashValue, &h32, sizeof(h32)); + break; + } + case algo_xxh64: + { + U64 h64 = XXH64_digest(state64); + memcpy(xxhHashValue, &h64, sizeof(h64)); + break; + } + default: + break; + } +} + + +typedef enum { big_endian, little_endian} endianess; + +static int BMK_hash(const char* fileName, + const algoType hashType, + const endianess displayEndianess) +{ + FILE* inFile; + size_t const blockSize = 64 KB; + void* buffer; + U32 h32 = 0; + U64 h64 = 0; + + /* Check file existence */ + if (fileName == stdinName) + { + inFile = stdin; + SET_BINARY_MODE(stdin); + } + else + inFile = fopen( fileName, "rb" ); + if (inFile==NULL) + { + DISPLAY( "Pb opening %s\n", fileName); + return 1; + } + + /* Memory allocation & restrictions */ + buffer = malloc(blockSize); + if(!buffer) + { + DISPLAY("\nError: not enough memory!\n"); + fclose(inFile); + return 1; + } + + /* loading notification */ + { + const size_t fileNameSize = strlen(fileName); + const char* const fileNameEnd = fileName + fileNameSize; + const size_t maxInfoFilenameSize = fileNameSize > 30 ? 30 : fileNameSize; + size_t infoFilenameSize = 1; + while ( (infoFilenameSize < maxInfoFilenameSize) + &&(fileNameEnd[-1-infoFilenameSize] != '/') + &&(fileNameEnd[-1-infoFilenameSize] != '\\') ) + infoFilenameSize++; + DISPLAY("\rLoading %s... \r", fileNameEnd - infoFilenameSize); + } + + /* Load file & update hash */ + switch(hashType) + { + case algo_xxh32: + BMK_hashStream(&h32, hashType, inFile, buffer, blockSize); + break; + case algo_xxh64: + BMK_hashStream(&h64, hashType, inFile, buffer, blockSize); + break; + default: + break; + } + + fclose(inFile); + free(buffer); + + /* display Hash */ + switch(hashType) + { + case algo_xxh32: + { + XXH32_canonical_t hcbe32; + XXH32_canonicalFromHash(&hcbe32, h32); + displayEndianess==big_endian ? + BMK_display_BigEndian(&hcbe32, sizeof(hcbe32)) : BMK_display_LittleEndian(&hcbe32, sizeof(hcbe32)); + DISPLAYRESULT(" %s\n", fileName); + break; + } + case algo_xxh64: + { + XXH64_canonical_t hcbe64; + XXH64_canonicalFromHash(&hcbe64, h64); + displayEndianess==big_endian ? + BMK_display_BigEndian(&hcbe64, sizeof(hcbe64)) : BMK_display_LittleEndian(&hcbe64, sizeof(hcbe64)); + DISPLAYRESULT(" %s\n", fileName); + break; + } + default: + break; + } + + return 0; +} + + +static int BMK_hashFiles(const char** fnList, int fnTotal, + algoType hashType, endianess displayEndianess) +{ + int fnNb; + int result = 0; + + if (fnTotal==0) + return BMK_hash(stdinName, hashType, displayEndianess); + + for (fnNb=0; fnNb<fnTotal; fnNb++) + result += BMK_hash(fnList[fnNb], hashType, displayEndianess); + DISPLAY("\r%70s\r", ""); + return result; +} + + +typedef enum { + GetLine_ok, + GetLine_eof, + GetLine_exceedMaxLineLength, + GetLine_outOfMemory, +} GetLineResult; + +typedef enum { + CanonicalFromString_ok, + CanonicalFromString_invalidFormat, +} CanonicalFromStringResult; + +typedef enum { + ParseLine_ok, + ParseLine_invalidFormat, +} ParseLineResult; + +typedef enum { + LineStatus_hashOk, + LineStatus_hashFailed, + LineStatus_failedToOpen, +} LineStatus; + +typedef union { + XXH32_canonical_t xxh32; + XXH64_canonical_t xxh64; +} Canonical; + +typedef struct { + Canonical canonical; + const char* filename; + int xxhBits; /* canonical type : 32:xxh32, 64:xxh64 */ +} ParsedLine; + +typedef struct { + unsigned long nProperlyFormattedLines; + unsigned long nImproperlyFormattedLines; + unsigned long nMismatchedChecksums; + unsigned long nOpenOrReadFailures; + unsigned long nMixedFormatLines; + int xxhBits; + int quit; +} ParseFileReport; + +typedef struct { + const char* inFileName; + FILE* inFile; + int lineMax; + char* lineBuf; + size_t blockSize; + char* blockBuf; + int strictMode; + int statusOnly; + int warn; + int quiet; + ParseFileReport report; +} ParseFileArg; + + +/* Read line from stream. + Returns GetLine_ok, if it reads line successfully. + Returns GetLine_eof, if stream reaches EOF. + Returns GetLine_exceedMaxLineLength, if line length is longer than MAX_LINE_LENGTH. + Returns GetLine_outOfMemory, if line buffer memory allocation failed. + */ +static GetLineResult getLine(char** lineBuf, int* lineMax, FILE* inFile) +{ + GetLineResult result = GetLine_ok; + int len = 0; + + if (*lineBuf == NULL || *lineMax < 1) + { + *lineMax = DEFAULT_LINE_LENGTH; + *lineBuf = (char*) realloc(*lineBuf, *lineMax); + if(*lineBuf == NULL) return GetLine_outOfMemory; + } + + for (;;) + { + const int c = fgetc(inFile); + if (c == EOF) + { + /* If we meet EOF before first character, returns GetLine_eof, + * otherwise GetLine_ok. + */ + if (len == 0) + { + result = GetLine_eof; + } + break; + } + + /* Make enough space for len+1 (for final NUL) bytes. */ + if (len+1 >= *lineMax) + { + char* newLineBuf = NULL; + int newBufSize = *lineMax; + + newBufSize += (newBufSize/2) + 1; /* x 1.5 */ + if (newBufSize > MAX_LINE_LENGTH) newBufSize = MAX_LINE_LENGTH; + if (len+1 >= newBufSize) return GetLine_exceedMaxLineLength; + + newLineBuf = (char*) realloc(*lineBuf, newBufSize); + if (newLineBuf == NULL) return GetLine_outOfMemory; + + *lineBuf = newLineBuf; + *lineMax = newBufSize; + } + + if (c == '\n') + { + break; + } + (*lineBuf)[len++] = (char) c; + } + + (*lineBuf)[len] = '\0'; + return result; +} + + +/* Converts one hexadecimal character to integer. + * Returns -1, if given character is not hexadecimal. + */ +static int charToHex(char c) +{ + int result = -1; + if (c >= '0' && c <= '9') { + result = (int) (c - '0'); + } else if (c >= 'A' && c <= 'F') { + result = (int) (c - 'A') + 0x0a; + } else if (c >= 'a' && c <= 'f') { + result = (int) (c - 'a') + 0x0a; + } + return result; +} + + +/* Converts XXH32 canonical hexadecimal string hashStr to big endian unsigned char array dst. + * Returns CANONICAL_FROM_STRING_INVALID_FORMAT, if hashStr is not well formatted. + * Returns CANONICAL_FROM_STRING_OK, if hashStr is parsed successfully. + */ +static CanonicalFromStringResult canonicalFromString(unsigned char* dst, + size_t dstSize, + const char* hashStr) +{ + size_t i; + for (i = 0; i < dstSize; ++i) + { + int h0, h1; + + h0 = charToHex(hashStr[i*2 + 0]); + if (h0 < 0) + { + return CanonicalFromString_invalidFormat; + } + + h1 = charToHex(hashStr[i*2 + 1]); + if (h1 < 0) + { + return CanonicalFromString_invalidFormat; + } + + dst[i] = (unsigned char) ((h0 << 4) | h1); + } + return CanonicalFromString_ok; +} + + +/* Parse single line of xxHash checksum file. + * Returns PARSE_LINE_ERROR_INVALID_FORMAT, if line is not well formatted. + * Returns PARSE_LINE_OK if line is parsed successfully. + * And members of parseLine will be filled by parsed values. + * + * - line must be ended with '\0'. + * - Since parsedLine.filename will point within given argument `line`, + * users must keep `line`s content during they are using parsedLine. + * + * Given xxHash checksum line should have the following format: + * + * <8 or 16 hexadecimal char> <space> <space> <filename...> <'\0'> + */ +static ParseLineResult parseLine(ParsedLine* parsedLine, const char* line) +{ + const char* const firstSpace = strchr(line, ' '); + const char* const secondSpace = firstSpace + 1; + + parsedLine->filename = NULL; + parsedLine->xxhBits = 0; + + if (firstSpace == NULL || *secondSpace != ' ') + { + return ParseLine_invalidFormat; + } + + switch (firstSpace - line) + { + case 8: + { + XXH32_canonical_t* xxh32c = &parsedLine->canonical.xxh32; + if (canonicalFromString(xxh32c->digest, sizeof(xxh32c->digest), line) + != CanonicalFromString_ok) + { + return ParseLine_invalidFormat; + } + parsedLine->xxhBits = 32; + break; + } + + case 16: + { + XXH64_canonical_t* xxh64c = &parsedLine->canonical.xxh64; + if (canonicalFromString(xxh64c->digest, sizeof(xxh64c->digest), line) + != CanonicalFromString_ok) + { + return ParseLine_invalidFormat; + } + parsedLine->xxhBits = 64; + break; + } + + default: + return ParseLine_invalidFormat; + break; + } + + parsedLine->filename = secondSpace + 1; + return ParseLine_ok; +} + + +/* Parse xxHash checksum file. + */ +static void parseFile1(ParseFileArg* parseFileArg) +{ + const char* const inFileName = parseFileArg->inFileName; + ParseFileReport* const report = &parseFileArg->report; + + unsigned long lineNumber = 0; + memset(report, 0, sizeof(*report)); + + while (!report->quit) + { + FILE* fp = NULL; + LineStatus lineStatus = LineStatus_hashFailed; + GetLineResult getLineResult; + ParsedLine parsedLine; + memset(&parsedLine, 0, sizeof(parsedLine)); + + lineNumber++; + if (lineNumber == 0) + { + /* This is unlikely happen, but md5sum.c has this + * error check. */ + DISPLAY("%s : too many checksum lines\n", inFileName); + report->quit = 1; + break; + } + + getLineResult = getLine(&parseFileArg->lineBuf, &parseFileArg->lineMax, + parseFileArg->inFile); + if (getLineResult != GetLine_ok) + { + if (getLineResult == GetLine_eof) + { + break; + } + + switch (getLineResult) + { + case GetLine_ok: + case GetLine_eof: + /* These cases never happen. See above getLineResult related "if"s. + They exist just for make gcc's -Wswitch-enum happy. */ + break; + + default: + DISPLAY("%s : %lu: unknown error\n", inFileName, lineNumber); + break; + + case GetLine_exceedMaxLineLength: + DISPLAY("%s : %lu: too long line\n", inFileName, lineNumber); + break; + + case GetLine_outOfMemory: + DISPLAY("%s : %lu: out of memory\n", inFileName, lineNumber); + break; + } + report->quit = 1; + break; + } + + if (parseLine(&parsedLine, parseFileArg->lineBuf) != ParseLine_ok) + { + report->nImproperlyFormattedLines++; + if (parseFileArg->warn) + { + DISPLAY("%s : %lu: improperly formatted XXHASH checksum line\n" + , inFileName, lineNumber); + } + continue; + } + + if (report->xxhBits != 0 && report->xxhBits != parsedLine.xxhBits) + { + /* Don't accept xxh32/xxh64 mixed file */ + report->nImproperlyFormattedLines++; + report->nMixedFormatLines++; + if (parseFileArg->warn) + { + DISPLAY("%s : %lu: improperly formatted XXHASH checksum line (XXH32/64)\n" + , inFileName, lineNumber); + } + continue; + } + + report->nProperlyFormattedLines++; + if (report->xxhBits == 0) + { + report->xxhBits = parsedLine.xxhBits; + } + + fp = fopen(parsedLine.filename, "rb"); + if (fp == NULL) + { + lineStatus = LineStatus_failedToOpen; + } + else + { + lineStatus = LineStatus_hashFailed; + switch (parsedLine.xxhBits) + { + case 32: + { + XXH32_hash_t xxh; + BMK_hashStream(&xxh, algo_xxh32, fp, parseFileArg->blockBuf, parseFileArg->blockSize); + if (xxh == XXH32_hashFromCanonical(&parsedLine.canonical.xxh32)) + { + lineStatus = LineStatus_hashOk; + } + } + break; + + case 64: + { + XXH64_hash_t xxh; + BMK_hashStream(&xxh, algo_xxh64, fp, parseFileArg->blockBuf, parseFileArg->blockSize); + if (xxh == XXH64_hashFromCanonical(&parsedLine.canonical.xxh64)) + { + lineStatus = LineStatus_hashOk; + } + } + break; + + default: + break; + } + fclose(fp); + } + + switch (lineStatus) + { + default: + DISPLAY("%s : unknown error\n", inFileName); + report->quit = 1; + break; + + case LineStatus_failedToOpen: + report->nOpenOrReadFailures++; + if (!parseFileArg->statusOnly) + { + DISPLAYRESULT("%s : %lu: FAILED open or read %s\n" + , inFileName, lineNumber, parsedLine.filename); + } + break; + + case LineStatus_hashOk: + case LineStatus_hashFailed: + { + int b = 1; + if (lineStatus == LineStatus_hashOk) + { + /* If --quiet is specified, don't display "OK" */ + if (parseFileArg->quiet) + { + b = 0; + } + } + else + { + report->nMismatchedChecksums++; + } + + if (b && !parseFileArg->statusOnly) + { + DISPLAYRESULT("%s: %s\n", parsedLine.filename + , lineStatus == LineStatus_hashOk ? "OK" : "FAILED"); + } + } + break; + } + } +} + + +/* Parse xxHash checksum file. + * Returns 1, if all procedures were succeeded. + * Returns 0, if any procedures was failed. + * + * If strictMode != 0, return error code if any line is invalid. + * If statusOnly != 0, don't generate any output. + * If warn != 0, print a warning message to stderr. + * If quiet != 0, suppress "OK" line. + * + * "All procedures are succeeded" means: + * - Checksum file contains at least one line and less than SIZE_T_MAX lines. + * - All files are properly opened and read. + * - All hash values match with its content. + * - (strict mode) All lines in checksum file are consistent and well formatted. + * + */ +static int checkFile(const char* inFileName, + const endianess displayEndianess, + U32 strictMode, + U32 statusOnly, + U32 warn, + U32 quiet) +{ + int result = 0; + FILE* inFile = NULL; + ParseFileArg parseFileArgBody; + ParseFileArg* const parseFileArg = &parseFileArgBody; + ParseFileReport* const report = &parseFileArg->report; + + if (displayEndianess != big_endian) + { + /* Don't accept little endian */ + DISPLAY( "Check file mode doesn't support little endian\n" ); + return 0; + } + + /* note : stdinName is special constant pointer. It is not a string. */ + if (inFileName == stdinName) + { + /* note : Since we expect text input for xxhash -c mode, + * Don't set binary mode for stdin */ + inFile = stdin; + } + else + { + inFile = fopen( inFileName, "rt" ); + } + + if (inFile == NULL) + { + DISPLAY( "Pb opening %s\n", inFileName); + return 0; + } + + parseFileArg->inFileName = inFileName; + parseFileArg->inFile = inFile; + parseFileArg->lineMax = DEFAULT_LINE_LENGTH; + parseFileArg->lineBuf = (char*) malloc((size_t) parseFileArg->lineMax); + parseFileArg->blockSize = 64 * 1024; + parseFileArg->blockBuf = (char*) malloc(parseFileArg->blockSize); + parseFileArg->strictMode = strictMode; + parseFileArg->statusOnly = statusOnly; + parseFileArg->warn = warn; + parseFileArg->quiet = quiet; + + parseFile1(parseFileArg); + + free(parseFileArg->blockBuf); + free(parseFileArg->lineBuf); + + if (inFile != stdin) + { + fclose(inFile); + } + + /* Show error/warning messages. All messages are copied from md5sum.c + */ + if (report->nProperlyFormattedLines == 0) + { + DISPLAY("%s: no properly formatted XXHASH checksum lines found\n", inFileName); + } + else if (!statusOnly) + { + if (report->nImproperlyFormattedLines) + { + DISPLAYRESULT("%lu lines are improperly formatted\n" + , report->nImproperlyFormattedLines); + } + + if (report->nOpenOrReadFailures) + { + DISPLAYRESULT("%lu listed files could not be read\n" + , report->nOpenOrReadFailures); + } + + if (report->nMismatchedChecksums) + { + DISPLAYRESULT("%lu computed checksums did NOT match\n" + , report->nMismatchedChecksums); + } + } + + /* Result (exit) code logic is copied from + * gnu coreutils/src/md5sum.c digest_check() */ + result = report->nProperlyFormattedLines != 0 + && report->nMismatchedChecksums == 0 + && report->nOpenOrReadFailures == 0 + && (!strictMode || report->nImproperlyFormattedLines == 0) + && report->quit == 0; + return result; +} + + +static int checkFiles(const char** fnList, int fnTotal, + const endianess displayEndianess, + U32 strictMode, + U32 statusOnly, + U32 warn, + U32 quiet) +{ + int ok = 1; + + /* Special case for stdinName "-", + * note: stdinName is not a string. It's special pointer. */ + if (fnTotal==0) + { + ok &= checkFile(stdinName, displayEndianess, strictMode, statusOnly, warn, quiet); + } + else + { + int fnNb; + for (fnNb=0; fnNb<fnTotal; fnNb++) + ok &= checkFile(fnList[fnNb], displayEndianess, strictMode, statusOnly, warn, quiet); + } + return ok ? 0 : 1; +} + + +/* ******************************************************** +* Main +**********************************************************/ + +static int usage(const char* exename) +{ + DISPLAY( WELCOME_MESSAGE ); + DISPLAY( "Usage :\n"); + DISPLAY( " %s [arg] [filenames]\n", exename); + DISPLAY( "When no filename provided, or - provided : use stdin as input\n"); + DISPLAY( "Arguments :\n"); + DISPLAY( " -H# : hash selection : 0=32bits, 1=64bits (default: %i)\n", (int)g_defaultAlgo); + DISPLAY( " -c : read xxHash sums from the [filenames] and check them\n"); + DISPLAY( " -h : help \n"); + return 0; +} + + +static int usage_advanced(const char* exename) +{ + usage(exename); + DISPLAY( "Advanced :\n"); + DISPLAY( " --little-endian : hash printed using little endian convention (default: big endian)\n"); + DISPLAY( " -V, --version : display version\n"); + DISPLAY( " -h, --help : display long help and exit\n"); + DISPLAY( " -b : benchmark mode \n"); + DISPLAY( " -i# : number of iterations (benchmark mode; default %i)\n", g_nbIterations); + DISPLAY( "\n"); + DISPLAY( "The following four options are useful only when verifying checksums (-c):\n"); + DISPLAY( "--strict : don't print OK for each successfully verified file\n"); + DISPLAY( "--status : don't output anything, status code shows success\n"); + DISPLAY( "--quiet : exit non-zero for improperly formatted checksum lines\n"); + DISPLAY( "--warn : warn about improperly formatted checksum lines\n"); + return 0; +} + +static int badusage(const char* exename) +{ + DISPLAY("Wrong parameters\n"); + usage(exename); + return 1; +} + + +int main(int argc, const char** argv) +{ + int i, filenamesStart=0; + const char* exename = argv[0]; + U32 benchmarkMode = 0; + U32 fileCheckMode = 0; + U32 strictMode = 0; + U32 statusOnly = 0; + U32 warn = 0; + U32 quiet = 0; + algoType algo = g_defaultAlgo; + endianess displayEndianess = big_endian; + + /* special case : xxh32sum default to 32 bits checksum */ + if (strstr(exename, "xxh32sum") != NULL) algo = algo_xxh32; + + for(i=1; i<argc; i++) + { + const char* argument = argv[i]; + + if(!argument) continue; /* Protection, if argument empty */ + + if (!strcmp(argument, "--little-endian")) { displayEndianess = little_endian; continue; } + if (!strcmp(argument, "--check")) { fileCheckMode = 1; continue; } + if (!strcmp(argument, "--strict")) { strictMode = 1; continue; } + if (!strcmp(argument, "--status")) { statusOnly = 1; continue; } + if (!strcmp(argument, "--quiet")) { quiet = 1; continue; } + if (!strcmp(argument, "--warn")) { warn = 1; continue; } + if (!strcmp(argument, "--help")) { return usage_advanced(exename); } + if (!strcmp(argument, "--version")) { DISPLAY(WELCOME_MESSAGE); return 0; } + + if (*argument!='-') + { + if (filenamesStart==0) filenamesStart=i; /* only supports a continuous list of filenames */ + continue; + } + + /* command selection */ + argument++; /* note : *argument=='-' */ + + while (*argument!=0) + { + switch(*argument) + { + /* Display version */ + case 'V': + DISPLAY(WELCOME_MESSAGE); return 0; + + /* Display help on usage */ + case 'h': + return usage_advanced(exename); + + /* select hash algorithm */ + case 'H': + algo = (algoType)(argument[1] - '0'); + argument+=2; + break; + + /* File check mode */ + case 'c': + fileCheckMode=1; + argument++; + break; + + /* Warning mode (file check mode only, alias of "--warning") */ + case 'w': + warn=1; + argument++; + break; + + /* Trigger benchmark mode */ + case 'b': + argument++; + benchmarkMode=1; + break; + + /* Modify Nb Iterations (benchmark only) */ + case 'i': + g_nbIterations = argument[1] - '0'; + argument+=2; + break; + + /* Modify Block size (benchmark only) */ + case 'B': + argument++; + g_sampleSize = 0; + while (argument[0]>='0' && argument[0]<='9') + g_sampleSize *= 10, g_sampleSize += argument[0]-'0', argument++; + break; + + default: + return badusage(exename); + } + } + } + + /* Check benchmark mode */ + if (benchmarkMode) + { + DISPLAY( WELCOME_MESSAGE ); + BMK_sanityCheck(); + if (filenamesStart==0) return BMK_benchInternal(); + return BMK_benchFiles(argv+filenamesStart, argc-filenamesStart); + } + + /* Check if input is defined as console; trigger an error in this case */ + if ( (filenamesStart==0) && IS_CONSOLE(stdin) ) return badusage(exename); + + if (filenamesStart==0) filenamesStart = argc; + if (fileCheckMode) + { + return checkFiles(argv+filenamesStart, argc-filenamesStart, displayEndianess, strictMode, statusOnly, warn, quiet); + } + else + { + return BMK_hashFiles(argv+filenamesStart, argc-filenamesStart, algo, displayEndianess); + } +} |