From d4d7f315c7101cc999e110ac5433ce326849eb17 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 7 Nov 2015 15:46:33 +0100 Subject: Merging upstream version 1.4~rc1. Signed-off-by: Daniel Baumann --- 7zFile.c | 240 --- 7zFile.h | 80 - 7zStream.c | 164 -- Alloc.c | 127 -- Alloc.h | 38 - ChangeLog | 16 +- INSTALL | 11 +- LzFind.c | 1384 ++++++++--------- LzFind.h | 189 +-- LzHash.h | 54 - LzmaDec.c | 1898 ++++++++++++------------ LzmaDec.h | 281 ++-- LzmaEnc.c | 3780 +++++++++++++++++++++-------------------------- LzmaEnc.h | 77 +- Makefile.in | 44 +- NEWS | 20 +- README | 13 +- Types.h | 225 --- carg_parser.c | 277 ++++ carg_parser.h | 85 ++ clzip.h | 223 +++ configure | 95 +- doc/pdlzip.1 | 20 +- main.c | 1787 ++++++++++++---------- pdarg_parser.c | 277 ---- pdarg_parser.h | 85 -- pdlzip.h | 190 --- testsuite/check.sh | 39 +- testsuite/test.lz | Bin 11540 -> 0 bytes testsuite/test.lzma | Bin 11535 -> 0 bytes testsuite/test.txt.lz | Bin 0 -> 11518 bytes testsuite/test.txt.lzma | Bin 0 -> 11505 bytes 32 files changed, 5231 insertions(+), 6488 deletions(-) delete mode 100644 7zFile.c delete mode 100644 7zFile.h delete mode 100644 7zStream.c delete mode 100644 Alloc.c delete mode 100644 Alloc.h delete mode 100644 LzHash.h delete mode 100644 Types.h create mode 100644 carg_parser.c create mode 100644 carg_parser.h create mode 100644 clzip.h delete mode 100644 pdarg_parser.c delete mode 100644 pdarg_parser.h delete mode 100644 pdlzip.h delete mode 100644 testsuite/test.lz delete mode 100644 testsuite/test.lzma create mode 100644 testsuite/test.txt.lz create mode 100644 testsuite/test.txt.lzma diff --git a/7zFile.c b/7zFile.c deleted file mode 100644 index 71c4fee..0000000 --- a/7zFile.c +++ /dev/null @@ -1,240 +0,0 @@ -/* 7zFile.c -- File IO -2009-11-24 : Igor Pavlov : Public domain */ - -#define _FILE_OFFSET_BITS 64 - -#include "7zFile.h" - -#ifndef USE_WINDOWS_FILE - -#ifndef UNDER_CE -#include -#endif - -#else - -/* - ReadFile and WriteFile functions in Windows have BUG: - If you Read or Write 64MB or more (probably min_failure_size = 64MB - 32KB + 1) - from/to Network file, it returns ERROR_NO_SYSTEM_RESOURCES - (Insufficient system resources exist to complete the requested service). - Probably in some version of Windows there are problems with other sizes: - for 32 MB (maybe also for 16 MB). - And message can be "Network connection was lost" -*/ - -#define kChunkSizeMax (1 << 22) - -#endif - -void File_Construct(CSzFile *p) -{ - #ifdef USE_WINDOWS_FILE - p->handle = INVALID_HANDLE_VALUE; - #else - p->file = NULL; - #endif -} - -#if !defined(UNDER_CE) || !defined(USE_WINDOWS_FILE) -static WRes File_Open(CSzFile *p, const char *name, int writeMode) -{ - #ifdef USE_WINDOWS_FILE - p->handle = CreateFileA(name, - writeMode ? GENERIC_WRITE : GENERIC_READ, - FILE_SHARE_READ, NULL, - writeMode ? CREATE_ALWAYS : OPEN_EXISTING, - FILE_ATTRIBUTE_NORMAL, NULL); - return (p->handle != INVALID_HANDLE_VALUE) ? 0 : GetLastError(); - #else - if( !name[0] ) p->file = writeMode ? stdout : stdin; - else p->file = fopen(name, writeMode ? "wb+" : "rb"); - return (p->file != 0) ? 0 : - #ifdef UNDER_CE - 2; /* ENOENT */ - #else - errno; - #endif - #endif -} - -WRes InFile_Open(CSzFile *p, const char *name) { return File_Open(p, name, 0); } -WRes OutFile_Open(CSzFile *p, const char *name) { return File_Open(p, name, 1); } -#endif - -#ifdef USE_WINDOWS_FILE -static WRes File_OpenW(CSzFile *p, const WCHAR *name, int writeMode) -{ - p->handle = CreateFileW(name, - writeMode ? GENERIC_WRITE : GENERIC_READ, - FILE_SHARE_READ, NULL, - writeMode ? CREATE_ALWAYS : OPEN_EXISTING, - FILE_ATTRIBUTE_NORMAL, NULL); - return (p->handle != INVALID_HANDLE_VALUE) ? 0 : GetLastError(); -} -WRes InFile_OpenW(CSzFile *p, const WCHAR *name) { return File_OpenW(p, name, 0); } -WRes OutFile_OpenW(CSzFile *p, const WCHAR *name) { return File_OpenW(p, name, 1); } -#endif - -WRes File_Close(CSzFile *p) -{ - #ifdef USE_WINDOWS_FILE - if (p->handle != INVALID_HANDLE_VALUE) - { - if (!CloseHandle(p->handle)) - return GetLastError(); - p->handle = INVALID_HANDLE_VALUE; - } - #else - if (p->file != NULL) - { - int res = fclose(p->file); - if (res != 0) - return res; - p->file = NULL; - } - #endif - return 0; -} - -WRes File_Read(CSzFile *p, void *data, size_t *size) -{ - size_t originalSize = *size; - if (originalSize == 0) - return 0; - - #ifdef USE_WINDOWS_FILE - - *size = 0; - do - { - DWORD curSize = (originalSize > kChunkSizeMax) ? kChunkSizeMax : (DWORD)originalSize; - DWORD processed = 0; - BOOL res = ReadFile(p->handle, data, curSize, &processed, NULL); - data = (void *)((Byte *)data + processed); - originalSize -= processed; - *size += processed; - if (!res) - return GetLastError(); - if (processed == 0) - break; - } - while (originalSize > 0); - return 0; - - #else - - *size = fread(data, 1, originalSize, p->file); - if (*size == originalSize) - return 0; - return ferror(p->file); - - #endif -} - -WRes File_Write(CSzFile *p, const void *data, size_t *size) -{ - size_t originalSize = *size; - if (originalSize == 0) - return 0; - - #ifdef USE_WINDOWS_FILE - - *size = 0; - do - { - DWORD curSize = (originalSize > kChunkSizeMax) ? kChunkSizeMax : (DWORD)originalSize; - DWORD processed = 0; - BOOL res = WriteFile(p->handle, data, curSize, &processed, NULL); - data = (void *)((Byte *)data + processed); - originalSize -= processed; - *size += processed; - if (!res) - return GetLastError(); - if (processed == 0) - break; - } - while (originalSize > 0); - return 0; - - #else - - *size = fwrite(data, 1, originalSize, p->file); - if (*size == originalSize) - return 0; - return ferror(p->file); - - #endif -} - -WRes File_Seek(CSzFile *p, Int64 *pos, ESzSeek origin) -{ - #ifdef USE_WINDOWS_FILE - - LARGE_INTEGER value; - DWORD moveMethod; - value.LowPart = (DWORD)*pos; - value.HighPart = (LONG)((UInt64)*pos >> 16 >> 16); /* for case when UInt64 is 32-bit only */ - switch (origin) - { - case SZ_SEEK_SET: moveMethod = FILE_BEGIN; break; - case SZ_SEEK_CUR: moveMethod = FILE_CURRENT; break; - case SZ_SEEK_END: moveMethod = FILE_END; break; - default: return ERROR_INVALID_PARAMETER; - } - value.LowPart = SetFilePointer(p->handle, value.LowPart, &value.HighPart, moveMethod); - if (value.LowPart == 0xFFFFFFFF) - { - WRes res = GetLastError(); - if (res != NO_ERROR) - return res; - } - *pos = ((Int64)value.HighPart << 32) | value.LowPart; - return 0; - - #else - - int moveMethod; - int res; - switch (origin) - { - case SZ_SEEK_SET: moveMethod = SEEK_SET; break; - case SZ_SEEK_CUR: moveMethod = SEEK_CUR; break; - case SZ_SEEK_END: moveMethod = SEEK_END; break; - default: return 1; - } - res = fseek(p->file, (long)*pos, moveMethod); - *pos = ftell(p->file); - return res; - - #endif -} - - -/* ---------- FileSeqInStream ---------- */ - -static SRes FileSeqInStream_Read(void *pp, void *buf, size_t *size) -{ - CFileSeqInStream *p = (CFileSeqInStream *)pp; - return File_Read(&p->file, buf, size) == 0 ? SZ_OK : SZ_ERROR_READ; -} - -void FileSeqInStream_CreateVTable(CFileSeqInStream *p) -{ - p->s.Read = FileSeqInStream_Read; -} - - -/* ---------- FileOutStream ---------- */ - -static size_t FileOutStream_Write(void *pp, const void *data, size_t size) -{ - CFileOutStream *p = (CFileOutStream *)pp; - File_Write(&p->file, data, &size); - return size; -} - -void FileOutStream_CreateVTable(CFileOutStream *p) -{ - p->s.Write = FileOutStream_Write; -} diff --git a/7zFile.h b/7zFile.h deleted file mode 100644 index edda354..0000000 --- a/7zFile.h +++ /dev/null @@ -1,80 +0,0 @@ -/* 7zFile.h -- File IO -2009-11-24 : Igor Pavlov : Public domain */ - -#ifndef __7Z_FILE_H -#define __7Z_FILE_H - -#ifdef _WIN32 -#define USE_WINDOWS_FILE -#endif - -#ifdef USE_WINDOWS_FILE -#include -#else -#include -#endif - -#include "Types.h" - -EXTERN_C_BEGIN - -/* ---------- File ---------- */ - -typedef struct -{ - #ifdef USE_WINDOWS_FILE - HANDLE handle; - #else - FILE *file; - #endif -} CSzFile; - -void File_Construct(CSzFile *p); -#if !defined(UNDER_CE) || !defined(USE_WINDOWS_FILE) -WRes InFile_Open(CSzFile *p, const char *name); -WRes OutFile_Open(CSzFile *p, const char *name); -#endif -#ifdef USE_WINDOWS_FILE -WRes InFile_OpenW(CSzFile *p, const WCHAR *name); -WRes OutFile_OpenW(CSzFile *p, const WCHAR *name); -#endif -WRes File_Close(CSzFile *p); - -/* reads max(*size, remain file's size) bytes */ -WRes File_Read(CSzFile *p, void *data, size_t *size); - -/* writes *size bytes */ -WRes File_Write(CSzFile *p, const void *data, size_t *size); - -WRes File_Seek(CSzFile *p, Int64 *pos, ESzSeek origin); - - -/* ---------- FileInStream ---------- */ - -typedef struct -{ - ISeqInStream s; - CSzFile file; -} CFileSeqInStream; - -void FileSeqInStream_CreateVTable(CFileSeqInStream *p); - - -typedef struct -{ - ISeekInStream s; - CSzFile file; -} CFileInStream; - - -typedef struct -{ - ISeqOutStream s; - CSzFile file; -} CFileOutStream; - -void FileOutStream_CreateVTable(CFileOutStream *p); - -EXTERN_C_END - -#endif diff --git a/7zStream.c b/7zStream.c deleted file mode 100644 index 12aa3a3..0000000 --- a/7zStream.c +++ /dev/null @@ -1,164 +0,0 @@ -/* 7zStream.c -- 7z Stream functions -2008-11-23 : Igor Pavlov : Public domain */ - -#define _FILE_OFFSET_BITS 64 - -#include - -#include "Types.h" - -SRes SeqInStream_Read2(ISeqInStream *stream, void *buf, size_t size, SRes errorType) -{ - while (size != 0) - { - size_t processed = size; - RINOK(stream->Read(stream, buf, &processed)); - if (processed == 0) - return errorType; - buf = (void *)((Byte *)buf + processed); - size -= processed; - } - return SZ_OK; -} - -SRes SeqInStream_Read(ISeqInStream *stream, void *buf, size_t size) -{ - return SeqInStream_Read2(stream, buf, size, SZ_ERROR_INPUT_EOF); -} - -SRes LookInStream_SeekTo(ILookInStream *stream, UInt64 offset) -{ - Int64 t = offset; - return stream->Seek(stream, &t, SZ_SEEK_SET); -} - -SRes LookInStream_LookRead(ILookInStream *stream, void *buf, size_t *size) -{ - void *lookBuf; - if (*size == 0) - return SZ_OK; - RINOK(stream->Look(stream, &lookBuf, size)); - memcpy(buf, lookBuf, *size); - return stream->Skip(stream, *size); -} - -SRes LookInStream_Read2(ILookInStream *stream, void *buf, size_t size, SRes errorType) -{ - while (size != 0) - { - size_t processed = size; - RINOK(stream->Read(stream, buf, &processed)); - if (processed == 0) - return errorType; - buf = (void *)((Byte *)buf + processed); - size -= processed; - } - return SZ_OK; -} - -SRes LookInStream_Read(ILookInStream *stream, void *buf, size_t size) -{ - return LookInStream_Read2(stream, buf, size, SZ_ERROR_INPUT_EOF); -} - -static SRes LookToRead_Look_Lookahead(void *pp, void **buf, size_t *size) -{ - SRes res = SZ_OK; - CLookToRead *p = (CLookToRead *)pp; - size_t size2 = p->size - p->pos; - if (size2 == 0 && *size > 0) - { - p->pos = 0; - size2 = LookToRead_BUF_SIZE; - res = p->realStream->Read(p->realStream, p->buf, &size2); - p->size = size2; - } - if (size2 < *size) - *size = size2; - *buf = p->buf + p->pos; - return res; -} - -static SRes LookToRead_Look_Exact(void *pp, void **buf, size_t *size) -{ - SRes res = SZ_OK; - CLookToRead *p = (CLookToRead *)pp; - size_t size2 = p->size - p->pos; - if (size2 == 0 && *size > 0) - { - p->pos = 0; - if (*size > LookToRead_BUF_SIZE) - *size = LookToRead_BUF_SIZE; - res = p->realStream->Read(p->realStream, p->buf, size); - size2 = p->size = *size; - } - if (size2 < *size) - *size = size2; - *buf = p->buf + p->pos; - return res; -} - -static SRes LookToRead_Skip(void *pp, size_t offset) -{ - CLookToRead *p = (CLookToRead *)pp; - p->pos += offset; - return SZ_OK; -} - -static SRes LookToRead_Read(void *pp, void *buf, size_t *size) -{ - CLookToRead *p = (CLookToRead *)pp; - size_t rem = p->size - p->pos; - if (rem == 0) - return p->realStream->Read(p->realStream, buf, size); - if (rem > *size) - rem = *size; - memcpy(buf, p->buf + p->pos, rem); - p->pos += rem; - *size = rem; - return SZ_OK; -} - -static SRes LookToRead_Seek(void *pp, Int64 *pos, ESzSeek origin) -{ - CLookToRead *p = (CLookToRead *)pp; - p->pos = p->size = 0; - return p->realStream->Seek(p->realStream, pos, origin); -} - -void LookToRead_CreateVTable(CLookToRead *p, int lookahead) -{ - p->s.Look = lookahead ? - LookToRead_Look_Lookahead : - LookToRead_Look_Exact; - p->s.Skip = LookToRead_Skip; - p->s.Read = LookToRead_Read; - p->s.Seek = LookToRead_Seek; -} - -void LookToRead_Init(CLookToRead *p) -{ - p->pos = p->size = 0; -} - -static SRes SecToLook_Read(void *pp, void *buf, size_t *size) -{ - CSecToLook *p = (CSecToLook *)pp; - return LookInStream_LookRead(p->realStream, buf, size); -} - -void SecToLook_CreateVTable(CSecToLook *p) -{ - p->s.Read = SecToLook_Read; -} - -static SRes SecToRead_Read(void *pp, void *buf, size_t *size) -{ - CSecToRead *p = (CSecToRead *)pp; - return p->realStream->Read(p->realStream, buf, size); -} - -void SecToRead_CreateVTable(CSecToRead *p) -{ - p->s.Read = SecToRead_Read; -} diff --git a/Alloc.c b/Alloc.c deleted file mode 100644 index 09a54c3..0000000 --- a/Alloc.c +++ /dev/null @@ -1,127 +0,0 @@ -/* Alloc.c -- Memory allocation functions -2008-09-24 : Igor Pavlov : Public domain */ - -#define _FILE_OFFSET_BITS 64 - -#ifdef _WIN32 -#include -#endif -#include - -#include "Alloc.h" - -/* #define _SZ_ALLOC_DEBUG */ - -/* use _SZ_ALLOC_DEBUG to debug alloc/free operations */ -#ifdef _SZ_ALLOC_DEBUG -#include -int g_allocCount = 0; -int g_allocCountMid = 0; -int g_allocCountBig = 0; -#endif - -void *MyAlloc(size_t size) -{ - if (size == 0) - return 0; - #ifdef _SZ_ALLOC_DEBUG - { - void *p = malloc(size); - fprintf(stderr, "\nAlloc %10d bytes, count = %10d, addr = %8X", size, g_allocCount++, (unsigned)p); - return p; - } - #else - return malloc(size); - #endif -} - -void MyFree(void *address) -{ - #ifdef _SZ_ALLOC_DEBUG - if (address != 0) - fprintf(stderr, "\nFree; count = %10d, addr = %8X", --g_allocCount, (unsigned)address); - #endif - free(address); -} - -#ifdef _WIN32 - -void *MidAlloc(size_t size) -{ - if (size == 0) - return 0; - #ifdef _SZ_ALLOC_DEBUG - fprintf(stderr, "\nAlloc_Mid %10d bytes; count = %10d", size, g_allocCountMid++); - #endif - return VirtualAlloc(0, size, MEM_COMMIT, PAGE_READWRITE); -} - -void MidFree(void *address) -{ - #ifdef _SZ_ALLOC_DEBUG - if (address != 0) - fprintf(stderr, "\nFree_Mid; count = %10d", --g_allocCountMid); - #endif - if (address == 0) - return; - VirtualFree(address, 0, MEM_RELEASE); -} - -#ifndef MEM_LARGE_PAGES -#undef _7ZIP_LARGE_PAGES -#endif - -#ifdef _7ZIP_LARGE_PAGES -SIZE_T g_LargePageSize = 0; -typedef SIZE_T (WINAPI *GetLargePageMinimumP)(); -#endif - -void SetLargePageSize() -{ - #ifdef _7ZIP_LARGE_PAGES - SIZE_T size = 0; - GetLargePageMinimumP largePageMinimum = (GetLargePageMinimumP) - GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), "GetLargePageMinimum"); - if (largePageMinimum == 0) - return; - size = largePageMinimum(); - if (size == 0 || (size & (size - 1)) != 0) - return; - g_LargePageSize = size; - #endif -} - - -void *BigAlloc(size_t size) -{ - if (size == 0) - return 0; - #ifdef _SZ_ALLOC_DEBUG - fprintf(stderr, "\nAlloc_Big %10d bytes; count = %10d", size, g_allocCountBig++); - #endif - - #ifdef _7ZIP_LARGE_PAGES - if (g_LargePageSize != 0 && g_LargePageSize <= (1 << 30) && size >= (1 << 18)) - { - void *res = VirtualAlloc(0, (size + g_LargePageSize - 1) & (~(g_LargePageSize - 1)), - MEM_COMMIT | MEM_LARGE_PAGES, PAGE_READWRITE); - if (res != 0) - return res; - } - #endif - return VirtualAlloc(0, size, MEM_COMMIT, PAGE_READWRITE); -} - -void BigFree(void *address) -{ - #ifdef _SZ_ALLOC_DEBUG - if (address != 0) - fprintf(stderr, "\nFree_Big; count = %10d", --g_allocCountBig); - #endif - - if (address == 0) - return; - VirtualFree(address, 0, MEM_RELEASE); -} - -#endif diff --git a/Alloc.h b/Alloc.h deleted file mode 100644 index 6b3f034..0000000 --- a/Alloc.h +++ /dev/null @@ -1,38 +0,0 @@ -/* Alloc.h -- Memory allocation functions -2009-02-07 : Igor Pavlov : Public domain */ - -#ifndef __COMMON_ALLOC_H -#define __COMMON_ALLOC_H - -#include - -#ifdef __cplusplus -extern "C" { -#endif - -void *MyAlloc(size_t size); -void MyFree(void *address); - -#ifdef _WIN32 - -void SetLargePageSize(); - -void *MidAlloc(size_t size); -void MidFree(void *address); -void *BigAlloc(size_t size); -void BigFree(void *address); - -#else - -#define MidAlloc(size) MyAlloc(size) -#define MidFree(address) MyFree(address) -#define BigAlloc(size) MyAlloc(size) -#define BigFree(address) MyFree(address) - -#endif - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/ChangeLog b/ChangeLog index 1f19301..9214436 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,17 @@ +2013-02-18 Antonio Diaz Diaz + + * Version 1.4-rc1 released. + * main.c: Added new option '-f, --force'. + * main.c: Added new option '-F, --recompress'. + * main.c: Added new option '-k, --keep'. + * main.c: Added new option '-o, --output'. + * main.c: Accept more than one file in command line. + * Decompression time has been reduced by 5%. + * main.c: '--test' no more needs '/dev/null'. + * configure: 'datadir' renamed to 'datarootdir'. + * Makefile.in: Added new target 'install-as-lzip'. + * Makefile.in: Added new target 'install-bin'. + 2012-01-03 Antonio Diaz Diaz * Version 1.3 released. @@ -27,7 +41,7 @@ * Using LZMA SDK 9.10 (public domain) from Igor Pavlov. -Copyright (C) 2010, 2011, 2012 Antonio Diaz Diaz. +Copyright (C) 2010, 2011, 2012, 2013 Antonio Diaz Diaz. This file is a collection of facts, and thus it is not copyrightable, but just in case, you have unlimited permission to copy, distribute and diff --git a/INSTALL b/INSTALL index ad5a756..feadbf6 100644 --- a/INSTALL +++ b/INSTALL @@ -1,7 +1,7 @@ Requirements ------------ You will need a C compiler. -I use gcc 4.3.5 and 3.3.6, but the code should compile with any +I use gcc 4.7.2 and 3.3.6, but the code should compile with any standards compliant compiler. Gcc is available at http://gcc.gnu.org. @@ -32,6 +32,13 @@ the main archive. 5. Type 'make install' to install the program and any data files and documentation. + You can install only the program, the info manual or the man page + typing 'make install-bin', 'make install-info' or 'make install-man' + respectively. + +5a. Type 'make install-as-lzip' to install the program and any data + files and documentation, and link the program to the name 'lzip'. + Another way ----------- @@ -50,7 +57,7 @@ After running 'configure', you can run 'make' and 'make install' as explained above. -Copyright (C) 2010, 2011, 2012 Antonio Diaz Diaz. +Copyright (C) 2010, 2011, 2012, 2013 Antonio Diaz Diaz. This file is free documentation: you have unlimited permission to copy, distribute and modify it. diff --git a/LzFind.c b/LzFind.c index a2a6636..06d1c2c 100644 --- a/LzFind.c +++ b/LzFind.c @@ -1,756 +1,628 @@ -/* LzFind.c -- Match finder for LZ algorithms -2009-04-22 : Igor Pavlov : Public domain */ - -#define _FILE_OFFSET_BITS 64 - -#include -#include - -#include "pdlzip.h" -#include "LzFind.h" -#include "LzHash.h" - -#define kEmptyHashValue 0 -#define kMaxValForNormalize ((UInt32)0xFFFFFFFF) -#define kNormalizeStepMin (1 << 10) /* it must be power of 2 */ -#define kNormalizeMask (~(kNormalizeStepMin - 1)) -#define kMaxHistorySize ((UInt32)3 << 30) - -#define kStartMaxLen 3 - -static void LzInWindow_Free(CMatchFinder *p, ISzAlloc *alloc) -{ - if (!p->directInput) - { - alloc->Free(alloc, p->bufferBase); - p->bufferBase = 0; - } -} - -/* keepSizeBefore + keepSizeAfter + keepSizeReserv must be < 4G) */ - -static int LzInWindow_Create(CMatchFinder *p, UInt32 keepSizeReserv, ISzAlloc *alloc) -{ - UInt32 blockSize = p->keepSizeBefore + p->keepSizeAfter + keepSizeReserv; - if (p->directInput) - { - p->blockSize = blockSize; - return 1; - } - if (p->bufferBase == 0 || p->blockSize != blockSize) - { - LzInWindow_Free(p, alloc); - p->blockSize = blockSize; - p->bufferBase = (Byte *)alloc->Alloc(alloc, (size_t)blockSize); - } - return (p->bufferBase != 0); -} - -Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p) { return p->buffer; } -Byte MatchFinder_GetIndexByte(CMatchFinder *p, Int32 index) { return p->buffer[index]; } - -UInt32 MatchFinder_GetNumAvailableBytes(CMatchFinder *p) { return p->streamPos - p->pos; } - -void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue) -{ - p->posLimit -= subValue; - p->pos -= subValue; - p->streamPos -= subValue; -} - -static void MatchFinder_ReadBlock(CMatchFinder *p) -{ - if (p->streamEndWasReached || p->result != SZ_OK) - return; - if (p->directInput) - { - UInt32 curSize = 0xFFFFFFFF - p->streamPos; - if (curSize > p->directInputRem) - curSize = (UInt32)p->directInputRem; - p->directInputRem -= curSize; - p->streamPos += curSize; - if (p->directInputRem == 0) - p->streamEndWasReached = 1; - return; - } - for (;;) - { - Byte *dest = p->buffer + (p->streamPos - p->pos); - size_t size = (p->bufferBase + p->blockSize - dest); - if (size == 0) - return; - p->result = p->stream->Read(p->stream, dest, &size); - if (p->result != SZ_OK) - return; - if (size == 0) - { - p->streamEndWasReached = 1; - return; - } - CRC32_update_buf( &p->crc, dest, size ); - p->streamPos += (UInt32)size; - if (p->streamPos - p->pos > p->keepSizeAfter) - return; - } -} - -void MatchFinder_MoveBlock(CMatchFinder *p) -{ - memmove(p->bufferBase, - p->buffer - p->keepSizeBefore, - (size_t)(p->streamPos - p->pos + p->keepSizeBefore)); - p->buffer = p->bufferBase + p->keepSizeBefore; -} - -int MatchFinder_NeedMove(CMatchFinder *p) -{ - if (p->directInput) - return 0; - /* if (p->streamEndWasReached) return 0; */ - return ((size_t)(p->bufferBase + p->blockSize - p->buffer) <= p->keepSizeAfter); -} - -void MatchFinder_ReadIfRequired(CMatchFinder *p) -{ - if (p->streamEndWasReached) - return; - if (p->keepSizeAfter >= p->streamPos - p->pos) - MatchFinder_ReadBlock(p); -} - -static void MatchFinder_CheckAndMoveAndRead(CMatchFinder *p) -{ - if (MatchFinder_NeedMove(p)) - MatchFinder_MoveBlock(p); - MatchFinder_ReadBlock(p); -} - -static void MatchFinder_SetDefaultSettings(CMatchFinder *p) -{ - p->cutValue = 32; - p->btMode = 1; - p->numHashBytes = 4; - p->bigHash = 0; -} - - -void MatchFinder_Construct(CMatchFinder *p) - { - p->bufferBase = 0; - p->directInput = 0; - p->hash = 0; - MatchFinder_SetDefaultSettings(p); - p->crc = 0xFFFFFFFFU; - } - -static void MatchFinder_FreeThisClassMemory(CMatchFinder *p, ISzAlloc *alloc) -{ - alloc->Free(alloc, p->hash); - p->hash = 0; -} - -void MatchFinder_Free(CMatchFinder *p, ISzAlloc *alloc) -{ - MatchFinder_FreeThisClassMemory(p, alloc); - LzInWindow_Free(p, alloc); -} - -static CLzRef* AllocRefs(UInt32 num, ISzAlloc *alloc) -{ - size_t sizeInBytes = (size_t)num * sizeof(CLzRef); - if (sizeInBytes / sizeof(CLzRef) != num) - return 0; - return (CLzRef *)alloc->Alloc(alloc, sizeInBytes); -} - -int MatchFinder_Create(CMatchFinder *p, UInt32 historySize, - UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter, - ISzAlloc *alloc) -{ - UInt32 sizeReserv; - if (historySize > kMaxHistorySize) - { - MatchFinder_Free(p, alloc); - return 0; - } - sizeReserv = historySize >> 1; - if (historySize > ((UInt32)2 << 30)) - sizeReserv = historySize >> 2; - sizeReserv += (keepAddBufferBefore + matchMaxLen + keepAddBufferAfter) / 2 + (1 << 19); - - p->keepSizeBefore = historySize + keepAddBufferBefore + 1; - p->keepSizeAfter = matchMaxLen + keepAddBufferAfter; - /* we need one additional byte, since we use MoveBlock after pos++ and before dictionary using */ - if (LzInWindow_Create(p, sizeReserv, alloc)) - { - UInt32 newCyclicBufferSize = historySize + 1; - UInt32 hs; - p->matchMaxLen = matchMaxLen; - { - p->fixedHashSize = 0; - if (p->numHashBytes == 2) - hs = (1 << 16) - 1; - else - { - hs = historySize - 1; - hs |= (hs >> 1); - hs |= (hs >> 2); - hs |= (hs >> 4); - hs |= (hs >> 8); - hs >>= 1; - hs |= 0xFFFF; /* don't change it! It's required for Deflate */ - if (hs > (1 << 24)) - { - if (p->numHashBytes == 3) - hs = (1 << 24) - 1; - else - hs >>= 1; - } - } - p->hashMask = hs; - hs++; - if (p->numHashBytes > 2) p->fixedHashSize += kHash2Size; - if (p->numHashBytes > 3) p->fixedHashSize += kHash3Size; - if (p->numHashBytes > 4) p->fixedHashSize += kHash4Size; - hs += p->fixedHashSize; - } - - { - UInt32 prevSize = p->hashSizeSum + p->numSons; - UInt32 newSize; - p->historySize = historySize; - p->hashSizeSum = hs; - p->cyclicBufferSize = newCyclicBufferSize; - p->numSons = (p->btMode ? newCyclicBufferSize * 2 : newCyclicBufferSize); - newSize = p->hashSizeSum + p->numSons; - if (p->hash != 0 && prevSize == newSize) - return 1; - MatchFinder_FreeThisClassMemory(p, alloc); - p->hash = AllocRefs(newSize, alloc); - if (p->hash != 0) - { - p->son = p->hash + p->hashSizeSum; - return 1; - } - } - } - MatchFinder_Free(p, alloc); - return 0; -} - -static void MatchFinder_SetLimits(CMatchFinder *p) -{ - UInt32 limit = kMaxValForNormalize - p->pos; - UInt32 limit2 = p->cyclicBufferSize - p->cyclicBufferPos; - if (limit2 < limit) - limit = limit2; - limit2 = p->streamPos - p->pos; - if (limit2 <= p->keepSizeAfter) - { - if (limit2 > 0) - limit2 = 1; - } - else - limit2 -= p->keepSizeAfter; - if (limit2 < limit) - limit = limit2; - { - UInt32 lenLimit = p->streamPos - p->pos; - if (lenLimit > p->matchMaxLen) - lenLimit = p->matchMaxLen; - p->lenLimit = lenLimit; - } - p->posLimit = p->pos + limit; -} - -void MatchFinder_Init(CMatchFinder *p) -{ - UInt32 i; - for (i = 0; i < p->hashSizeSum; i++) - p->hash[i] = kEmptyHashValue; - p->cyclicBufferPos = 0; - p->buffer = p->bufferBase; - p->pos = p->streamPos = p->cyclicBufferSize; - p->result = SZ_OK; - p->streamEndWasReached = 0; - MatchFinder_ReadBlock(p); - MatchFinder_SetLimits(p); -} - -static UInt32 MatchFinder_GetSubValue(CMatchFinder *p) -{ - return (p->pos - p->historySize - 1) & kNormalizeMask; -} - -void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, UInt32 numItems) -{ - UInt32 i; - for (i = 0; i < numItems; i++) - { - UInt32 value = items[i]; - if (value <= subValue) - value = kEmptyHashValue; - else - value -= subValue; - items[i] = value; - } -} - -static void MatchFinder_Normalize(CMatchFinder *p) -{ - UInt32 subValue = MatchFinder_GetSubValue(p); - MatchFinder_Normalize3(subValue, p->hash, p->hashSizeSum + p->numSons); - MatchFinder_ReduceOffsets(p, subValue); -} - -static void MatchFinder_CheckLimits(CMatchFinder *p) -{ - if (p->pos == kMaxValForNormalize) - MatchFinder_Normalize(p); - if (!p->streamEndWasReached && p->keepSizeAfter == p->streamPos - p->pos) - MatchFinder_CheckAndMoveAndRead(p); - if (p->cyclicBufferPos == p->cyclicBufferSize) - p->cyclicBufferPos = 0; - MatchFinder_SetLimits(p); -} - -static UInt32 * Hc_GetMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son, - UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue, - UInt32 *distances, UInt32 maxLen) -{ - son[_cyclicBufferPos] = curMatch; - for (;;) - { - UInt32 delta = pos - curMatch; - if (cutValue-- == 0 || delta >= _cyclicBufferSize) - return distances; - { - const Byte *pb = cur - delta; - curMatch = son[_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)]; - if (pb[maxLen] == cur[maxLen] && *pb == *cur) - { - UInt32 len = 0; - while (++len != lenLimit) - if (pb[len] != cur[len]) - break; - if (maxLen < len) - { - *distances++ = maxLen = len; - *distances++ = delta - 1; - if (len == lenLimit) - return distances; - } - } - } - } -} - -UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son, - UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue, - UInt32 *distances, UInt32 maxLen) -{ - CLzRef *ptr0 = son + (_cyclicBufferPos << 1) + 1; - CLzRef *ptr1 = son + (_cyclicBufferPos << 1); - UInt32 len0 = 0, len1 = 0; - for (;;) - { - UInt32 delta = pos - curMatch; - if (cutValue-- == 0 || delta >= _cyclicBufferSize) - { - *ptr0 = *ptr1 = kEmptyHashValue; - return distances; - } - { - CLzRef *pair = son + ((_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1); - const Byte *pb = cur - delta; - UInt32 len = (len0 < len1 ? len0 : len1); - if (pb[len] == cur[len]) - { - if (++len != lenLimit && pb[len] == cur[len]) - while (++len != lenLimit) - if (pb[len] != cur[len]) - break; - if (maxLen < len) - { - *distances++ = maxLen = len; - *distances++ = delta - 1; - if (len == lenLimit) - { - *ptr1 = pair[0]; - *ptr0 = pair[1]; - return distances; - } - } - } - if (pb[len] < cur[len]) - { - *ptr1 = curMatch; - ptr1 = pair + 1; - curMatch = *ptr1; - len1 = len; - } - else - { - *ptr0 = curMatch; - ptr0 = pair; - curMatch = *ptr0; - len0 = len; - } - } - } -} - -static void SkipMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son, - UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue) -{ - CLzRef *ptr0 = son + (_cyclicBufferPos << 1) + 1; - CLzRef *ptr1 = son + (_cyclicBufferPos << 1); - UInt32 len0 = 0, len1 = 0; - for (;;) - { - UInt32 delta = pos - curMatch; - if (cutValue-- == 0 || delta >= _cyclicBufferSize) - { - *ptr0 = *ptr1 = kEmptyHashValue; - return; - } - { - CLzRef *pair = son + ((_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1); - const Byte *pb = cur - delta; - UInt32 len = (len0 < len1 ? len0 : len1); - if (pb[len] == cur[len]) - { - while (++len != lenLimit) - if (pb[len] != cur[len]) - break; - { - if (len == lenLimit) - { - *ptr1 = pair[0]; - *ptr0 = pair[1]; - return; - } - } - } - if (pb[len] < cur[len]) - { - *ptr1 = curMatch; - ptr1 = pair + 1; - curMatch = *ptr1; - len1 = len; - } - else - { - *ptr0 = curMatch; - ptr0 = pair; - curMatch = *ptr0; - len0 = len; - } - } - } -} - -#define MOVE_POS \ - ++p->cyclicBufferPos; \ - p->buffer++; \ - if (++p->pos == p->posLimit) MatchFinder_CheckLimits(p); - -#define MOVE_POS_RET MOVE_POS return offset; - -static void MatchFinder_MovePos(CMatchFinder *p) { MOVE_POS; } - -#define GET_MATCHES_HEADER2(minLen, ret_op) \ - UInt32 lenLimit; UInt32 hashValue; const Byte *cur; UInt32 curMatch; \ - lenLimit = p->lenLimit; { if (lenLimit < minLen) { MatchFinder_MovePos(p); ret_op; }} \ - cur = p->buffer; - -#define GET_MATCHES_HEADER(minLen) GET_MATCHES_HEADER2(minLen, return 0) -#define SKIP_HEADER(minLen) GET_MATCHES_HEADER2(minLen, continue) - -#define MF_PARAMS(p) p->pos, p->buffer, p->son, p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue - -#define GET_MATCHES_FOOTER(offset, maxLen) \ - offset = (UInt32)(GetMatchesSpec1(lenLimit, curMatch, MF_PARAMS(p), \ - distances + offset, maxLen) - distances); MOVE_POS_RET; - -#define SKIP_FOOTER \ - SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p)); MOVE_POS; - -static UInt32 Bt2_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) -{ - UInt32 offset; - GET_MATCHES_HEADER(2) - HASH2_CALC; - curMatch = p->hash[hashValue]; - p->hash[hashValue] = p->pos; - offset = 0; - GET_MATCHES_FOOTER(offset, 1) -} - -UInt32 Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) -{ - UInt32 offset; - GET_MATCHES_HEADER(3) - HASH_ZIP_CALC; - curMatch = p->hash[hashValue]; - p->hash[hashValue] = p->pos; - offset = 0; - GET_MATCHES_FOOTER(offset, 2) -} - -static UInt32 Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) -{ - UInt32 hash2Value, delta2, maxLen, offset; - GET_MATCHES_HEADER(3) - - HASH3_CALC; - - delta2 = p->pos - p->hash[hash2Value]; - curMatch = p->hash[kFix3HashSize + hashValue]; - - p->hash[hash2Value] = - p->hash[kFix3HashSize + hashValue] = p->pos; - - - maxLen = 2; - offset = 0; - if (delta2 < p->cyclicBufferSize && *(cur - delta2) == *cur) - { - for (; maxLen != lenLimit; maxLen++) - if (cur[(ptrdiff_t)maxLen - delta2] != cur[maxLen]) - break; - distances[0] = maxLen; - distances[1] = delta2 - 1; - offset = 2; - if (maxLen == lenLimit) - { - SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p)); - MOVE_POS_RET; - } - } - GET_MATCHES_FOOTER(offset, maxLen) -} - -static UInt32 Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) -{ - UInt32 hash2Value, hash3Value, delta2, delta3, maxLen, offset; - GET_MATCHES_HEADER(4) - - HASH4_CALC; - - delta2 = p->pos - p->hash[ hash2Value]; - delta3 = p->pos - p->hash[kFix3HashSize + hash3Value]; - curMatch = p->hash[kFix4HashSize + hashValue]; - - p->hash[ hash2Value] = - p->hash[kFix3HashSize + hash3Value] = - p->hash[kFix4HashSize + hashValue] = p->pos; - - maxLen = 1; - offset = 0; - if (delta2 < p->cyclicBufferSize && *(cur - delta2) == *cur) - { - distances[0] = maxLen = 2; - distances[1] = delta2 - 1; - offset = 2; - } - if (delta2 != delta3 && delta3 < p->cyclicBufferSize && *(cur - delta3) == *cur) - { - maxLen = 3; - distances[offset + 1] = delta3 - 1; - offset += 2; - delta2 = delta3; - } - if (offset != 0) - { - for (; maxLen != lenLimit; maxLen++) - if (cur[(ptrdiff_t)maxLen - delta2] != cur[maxLen]) - break; - distances[offset - 2] = maxLen; - if (maxLen == lenLimit) - { - SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p)); - MOVE_POS_RET; - } - } - if (maxLen < 3) - maxLen = 3; - GET_MATCHES_FOOTER(offset, maxLen) -} - -static UInt32 Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) -{ - UInt32 hash2Value, hash3Value, delta2, delta3, maxLen, offset; - GET_MATCHES_HEADER(4) - - HASH4_CALC; - - delta2 = p->pos - p->hash[ hash2Value]; - delta3 = p->pos - p->hash[kFix3HashSize + hash3Value]; - curMatch = p->hash[kFix4HashSize + hashValue]; - - p->hash[ hash2Value] = - p->hash[kFix3HashSize + hash3Value] = - p->hash[kFix4HashSize + hashValue] = p->pos; - - maxLen = 1; - offset = 0; - if (delta2 < p->cyclicBufferSize && *(cur - delta2) == *cur) - { - distances[0] = maxLen = 2; - distances[1] = delta2 - 1; - offset = 2; - } - if (delta2 != delta3 && delta3 < p->cyclicBufferSize && *(cur - delta3) == *cur) - { - maxLen = 3; - distances[offset + 1] = delta3 - 1; - offset += 2; - delta2 = delta3; - } - if (offset != 0) - { - for (; maxLen != lenLimit; maxLen++) - if (cur[(ptrdiff_t)maxLen - delta2] != cur[maxLen]) - break; - distances[offset - 2] = maxLen; - if (maxLen == lenLimit) - { - p->son[p->cyclicBufferPos] = curMatch; - MOVE_POS_RET; - } - } - if (maxLen < 3) - maxLen = 3; - offset = (UInt32)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p), - distances + offset, maxLen) - (distances)); - MOVE_POS_RET -} - -UInt32 Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) -{ - UInt32 offset; - GET_MATCHES_HEADER(3) - HASH_ZIP_CALC; - curMatch = p->hash[hashValue]; - p->hash[hashValue] = p->pos; - offset = (UInt32)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p), - distances, 2) - (distances)); - MOVE_POS_RET -} - -static void Bt2_MatchFinder_Skip(CMatchFinder *p, UInt32 num) -{ - do - { - SKIP_HEADER(2) - HASH2_CALC; - curMatch = p->hash[hashValue]; - p->hash[hashValue] = p->pos; - SKIP_FOOTER - } - while (--num != 0); -} - -void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num) -{ - do - { - SKIP_HEADER(3) - HASH_ZIP_CALC; - curMatch = p->hash[hashValue]; - p->hash[hashValue] = p->pos; - SKIP_FOOTER - } - while (--num != 0); -} - -static void Bt3_MatchFinder_Skip(CMatchFinder *p, UInt32 num) -{ - do - { - UInt32 hash2Value; - SKIP_HEADER(3) - HASH3_CALC; - curMatch = p->hash[kFix3HashSize + hashValue]; - p->hash[hash2Value] = - p->hash[kFix3HashSize + hashValue] = p->pos; - SKIP_FOOTER - } - while (--num != 0); -} - -static void Bt4_MatchFinder_Skip(CMatchFinder *p, UInt32 num) -{ - do - { - UInt32 hash2Value, hash3Value; - SKIP_HEADER(4) - HASH4_CALC; - curMatch = p->hash[kFix4HashSize + hashValue]; - p->hash[ hash2Value] = - p->hash[kFix3HashSize + hash3Value] = p->pos; - p->hash[kFix4HashSize + hashValue] = p->pos; - SKIP_FOOTER - } - while (--num != 0); -} - -static void Hc4_MatchFinder_Skip(CMatchFinder *p, UInt32 num) -{ - do - { - UInt32 hash2Value, hash3Value; - SKIP_HEADER(4) - HASH4_CALC; - curMatch = p->hash[kFix4HashSize + hashValue]; - p->hash[ hash2Value] = - p->hash[kFix3HashSize + hash3Value] = - p->hash[kFix4HashSize + hashValue] = p->pos; - p->son[p->cyclicBufferPos] = curMatch; - MOVE_POS - } - while (--num != 0); -} - -void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num) -{ - do - { - SKIP_HEADER(3) - HASH_ZIP_CALC; - curMatch = p->hash[hashValue]; - p->hash[hashValue] = p->pos; - p->son[p->cyclicBufferPos] = curMatch; - MOVE_POS - } - while (--num != 0); -} - -void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder *vTable) -{ - vTable->Init = (Mf_Init_Func)MatchFinder_Init; - vTable->GetIndexByte = (Mf_GetIndexByte_Func)MatchFinder_GetIndexByte; - vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinder_GetNumAvailableBytes; - vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinder_GetPointerToCurrentPos; - if (!p->btMode) - { - vTable->GetMatches = (Mf_GetMatches_Func)Hc4_MatchFinder_GetMatches; - vTable->Skip = (Mf_Skip_Func)Hc4_MatchFinder_Skip; - } - else if (p->numHashBytes == 2) - { - vTable->GetMatches = (Mf_GetMatches_Func)Bt2_MatchFinder_GetMatches; - vTable->Skip = (Mf_Skip_Func)Bt2_MatchFinder_Skip; - } - else if (p->numHashBytes == 3) - { - vTable->GetMatches = (Mf_GetMatches_Func)Bt3_MatchFinder_GetMatches; - vTable->Skip = (Mf_Skip_Func)Bt3_MatchFinder_Skip; - } - else - { - vTable->GetMatches = (Mf_GetMatches_Func)Bt4_MatchFinder_GetMatches; - vTable->Skip = (Mf_Skip_Func)Bt4_MatchFinder_Skip; - } -} +/* LzFind.c -- Match finder for LZ algorithms +2009-04-22 : Igor Pavlov : Public domain */ + +#define _FILE_OFFSET_BITS 64 + +#include +#include +#include +#include +#include +#include + +#include "clzip.h" +#include "LzFind.h" + +#define kHash2Size (1 << 10) +#define kHash3Size (1 << 16) +#define kHash4Size (1 << 20) + +#define kFix3HashSize (kHash2Size) +#define kFix4HashSize (kHash2Size + kHash3Size) + +#define HASH2_CALC hashValue = cur[0] | ((uint32_t)cur[1] << 8); + +#define HASH3_CALC { \ + uint32_t temp = crc32[cur[0]] ^ cur[1]; \ + hash2Value = temp & (kHash2Size - 1); \ + hashValue = (temp ^ ((uint32_t)cur[2] << 8)) & p->hashMask; } + +#define HASH4_CALC { \ + uint32_t temp = crc32[cur[0]] ^ cur[1]; \ + hash2Value = temp & (kHash2Size - 1); \ + hash3Value = (temp ^ ((uint32_t)cur[2] << 8)) & (kHash3Size - 1); \ + hashValue = (temp ^ ((uint32_t)cur[2] << 8) ^ (crc32[cur[3]] << 5)) & p->hashMask; } + +#define kEmptyHashValue 0 +#define kMaxValForNormalize ((uint32_t)0xFFFFFFFF) +#define kNormalizeStepMin (1 << 10) /* it must be power of 2 */ +#define kNormalizeMask (~(kNormalizeStepMin - 1)) + +#define kStartMaxLen 3 + + +static void Mf_ReadBlock(CMatchFinder *p) +{ + if (p->streamEndWasReached || p->result != SZ_OK) + return; + for (;;) + { + uint8_t * const dest = p->buffer + (p->streamPos - p->pos); + const int size = (p->bufferBase + p->blockSize - dest); + int rd; + if (size == 0) + return; + rd = readblock( p->infd, dest, size ); + if (rd != size && errno) + { p->result = SZ_ERROR_READ; return; } + if (rd == 0) + { + p->streamEndWasReached = true; + return; + } + CRC32_update_buf( &p->crc, dest, rd ); + p->streamPos += rd; + if (p->streamPos - p->pos > p->keepSizeAfter) + return; + } +} + + +static void Mf_CheckAndMoveAndRead(CMatchFinder *p) +{ + if ((uint32_t)(p->bufferBase + p->blockSize - p->buffer) <= p->keepSizeAfter) + { + memmove(p->bufferBase, + p->buffer - p->keepSizeBefore, + p->streamPos - p->pos + p->keepSizeBefore); + p->buffer = p->bufferBase + p->keepSizeBefore; + } + Mf_ReadBlock(p); +} + + +void Mf_Free(CMatchFinder *p) +{ + free(p->hash); + p->hash = 0; + free(p->bufferBase); + p->bufferBase = 0; +} + +static CLzRef* AllocRefs(uint32_t num) +{ + uint32_t sizeInBytes = num * sizeof(CLzRef); + if (sizeInBytes / sizeof(CLzRef) != num) + return 0; + return (CLzRef *)malloc(sizeInBytes); +} + +static void Mf_SetLimits(CMatchFinder *p) +{ + uint32_t limit = kMaxValForNormalize - p->pos; + uint32_t limit2 = p->cyclicBufferSize - p->cyclicBufferPos; + if (limit2 < limit) + limit = limit2; + limit2 = p->streamPos - p->pos; + if (limit2 <= p->keepSizeAfter) + { + if (limit2 > 0) + limit2 = 1; + } + else + limit2 -= p->keepSizeAfter; + if (limit2 < limit) + limit = limit2; + { + uint32_t lenLimit = p->streamPos - p->pos; + if (lenLimit > p->matchMaxLen) + lenLimit = p->matchMaxLen; + p->lenLimit = lenLimit; + } + p->posLimit = p->pos + limit; +} + + +int Mf_Init(CMatchFinder *p, const int ifd, const int mc, uint32_t historySize, + uint32_t keepAddBufferBefore, uint32_t matchMaxLen, uint32_t keepAddBufferAfter) +{ + const uint32_t sizeReserv = ( historySize >> 1 ) + + (keepAddBufferBefore + matchMaxLen + keepAddBufferAfter) / 2 + (1 << 19); + + p->hash = 0; + p->cutValue = mc; + p->infd = ifd; + p->btMode = true; + p->numHashBytes = 4; + p->crc = 0xFFFFFFFFU; + p->keepSizeBefore = historySize + keepAddBufferBefore + 1; + p->keepSizeAfter = matchMaxLen + keepAddBufferAfter; + /* we need one additional byte, since we use MoveBlock after pos++ and before dictionary using */ + /* keepSizeBefore + keepSizeAfter + sizeReserv must be < 4G) */ + p->blockSize = p->keepSizeBefore + p->keepSizeAfter + sizeReserv; + p->buffer = p->bufferBase = (uint8_t *)malloc(p->blockSize); + if( p->bufferBase ) + { + uint32_t newCyclicBufferSize = historySize + 1; + uint32_t hs; + p->matchMaxLen = matchMaxLen; + { + if (p->numHashBytes == 2) + hs = (1 << 16) - 1; + else + { + hs = historySize - 1; + hs |= (hs >> 1); + hs |= (hs >> 2); + hs |= (hs >> 4); + hs |= (hs >> 8); + hs >>= 1; + hs |= 0xFFFF; /* don't change it! It's required for Deflate */ + if (hs > (1 << 24)) + { + if (p->numHashBytes == 3) + hs = (1 << 24) - 1; + else + hs >>= 1; + } + } + p->hashMask = hs; + hs++; + if (p->numHashBytes > 2) hs += kHash2Size; + if (p->numHashBytes > 3) hs += kHash3Size; + if (p->numHashBytes > 4) hs += kHash4Size; + } + + { + uint32_t newSize; + p->historySize = historySize; + p->hashSizeSum = hs; + p->cyclicBufferSize = newCyclicBufferSize; + p->numSons = (p->btMode ? newCyclicBufferSize * 2 : newCyclicBufferSize); + newSize = p->hashSizeSum + p->numSons; + p->hash = AllocRefs(newSize); + if (p->hash != 0) + { + uint32_t i; + p->son = p->hash + p->hashSizeSum; + for (i = 0; i < p->hashSizeSum; i++) + p->hash[i] = kEmptyHashValue; + p->cyclicBufferPos = 0; + p->pos = p->streamPos = p->cyclicBufferSize; + p->result = SZ_OK; + p->streamEndWasReached = false; + Mf_ReadBlock(p); + Mf_SetLimits(p); + return 1; + } + } + } + Mf_Free(p); + return 0; +} + +static void Mf_Normalize3(uint32_t subValue, CLzRef *items, uint32_t numItems) +{ + uint32_t i; + for (i = 0; i < numItems; i++) + { + uint32_t value = items[i]; + if (value <= subValue) + value = kEmptyHashValue; + else + value -= subValue; + items[i] = value; + } +} + +static void Mf_Normalize(CMatchFinder *p) +{ + uint32_t subValue = (p->pos - p->historySize - 1) & kNormalizeMask; + Mf_Normalize3(subValue, p->hash, p->hashSizeSum + p->numSons); + p->posLimit -= subValue; + p->pos -= subValue; + p->streamPos -= subValue; +} + +static void Mf_CheckLimits(CMatchFinder *p) +{ + if (p->pos == kMaxValForNormalize) + Mf_Normalize(p); + if (!p->streamEndWasReached && p->keepSizeAfter == p->streamPos - p->pos) + Mf_CheckAndMoveAndRead(p); + if (p->cyclicBufferPos == p->cyclicBufferSize) + p->cyclicBufferPos = 0; + Mf_SetLimits(p); +} + +static uint32_t * Hc_GetMatchesSpec(uint32_t lenLimit, uint32_t curMatch, uint32_t pos, const uint8_t *cur, CLzRef *son, + uint32_t _cyclicBufferPos, uint32_t _cyclicBufferSize, uint32_t cutValue, + uint32_t *distances, uint32_t maxLen) +{ + son[_cyclicBufferPos] = curMatch; + for (;;) + { + uint32_t delta = pos - curMatch; + if (cutValue-- == 0 || delta >= _cyclicBufferSize) + return distances; + { + const uint8_t *pb = cur - delta; + curMatch = son[_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)]; + if (pb[maxLen] == cur[maxLen] && *pb == *cur) + { + uint32_t len = 0; + while (++len != lenLimit) + if (pb[len] != cur[len]) + break; + if (maxLen < len) + { + *distances++ = maxLen = len; + *distances++ = delta - 1; + if (len == lenLimit) + return distances; + } + } + } + } +} + + +static uint32_t * GetMatchesSpec1( uint32_t lenLimit, uint32_t curMatch, + uint32_t pos, const uint8_t *cur, CLzRef *son, + uint32_t _cyclicBufferPos, uint32_t _cyclicBufferSize, uint32_t cutValue, + uint32_t *distances, uint32_t maxLen ) +{ + CLzRef *ptr0 = son + (_cyclicBufferPos << 1) + 1; + CLzRef *ptr1 = son + (_cyclicBufferPos << 1); + uint32_t len0 = 0, len1 = 0; + for (;;) + { + uint32_t delta = pos - curMatch; + if (cutValue-- == 0 || delta >= _cyclicBufferSize) + { + *ptr0 = *ptr1 = kEmptyHashValue; + return distances; + } + { + CLzRef *pair = son + ((_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1); + const uint8_t *pb = cur - delta; + uint32_t len = (len0 < len1 ? len0 : len1); + if (pb[len] == cur[len]) + { + if (++len != lenLimit && pb[len] == cur[len]) + while (++len != lenLimit) + if (pb[len] != cur[len]) + break; + if (maxLen < len) + { + *distances++ = maxLen = len; + *distances++ = delta - 1; + if (len == lenLimit) + { + *ptr1 = pair[0]; + *ptr0 = pair[1]; + return distances; + } + } + } + if (pb[len] < cur[len]) + { + *ptr1 = curMatch; + ptr1 = pair + 1; + curMatch = *ptr1; + len1 = len; + } + else + { + *ptr0 = curMatch; + ptr0 = pair; + curMatch = *ptr0; + len0 = len; + } + } + } +} + +static void SkipMatchesSpec(uint32_t lenLimit, uint32_t curMatch, uint32_t pos, const uint8_t *cur, CLzRef *son, + uint32_t _cyclicBufferPos, uint32_t _cyclicBufferSize, uint32_t cutValue) +{ + CLzRef *ptr0 = son + (_cyclicBufferPos << 1) + 1; + CLzRef *ptr1 = son + (_cyclicBufferPos << 1); + uint32_t len0 = 0, len1 = 0; + for (;;) + { + uint32_t delta = pos - curMatch; + if (cutValue-- == 0 || delta >= _cyclicBufferSize) + { + *ptr0 = *ptr1 = kEmptyHashValue; + return; + } + { + CLzRef *pair = son + ((_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1); + const uint8_t *pb = cur - delta; + uint32_t len = (len0 < len1 ? len0 : len1); + if (pb[len] == cur[len]) + { + while (++len != lenLimit) + if (pb[len] != cur[len]) + break; + { + if (len == lenLimit) + { + *ptr1 = pair[0]; + *ptr0 = pair[1]; + return; + } + } + } + if (pb[len] < cur[len]) + { + *ptr1 = curMatch; + ptr1 = pair + 1; + curMatch = *ptr1; + len1 = len; + } + else + { + *ptr0 = curMatch; + ptr0 = pair; + curMatch = *ptr0; + len0 = len; + } + } + } +} + +#define MOVE_POS \ + ++p->cyclicBufferPos; \ + p->buffer++; \ + if (++p->pos == p->posLimit) Mf_CheckLimits(p); + +#define MOVE_POS_RET MOVE_POS return offset; + +static void Mf_MovePos(CMatchFinder *p) { MOVE_POS; } + +#define GET_MATCHES_HEADER2(minLen, ret_op) \ + uint32_t lenLimit; uint32_t hashValue; const uint8_t *cur; uint32_t curMatch; \ + lenLimit = p->lenLimit; { if (lenLimit < minLen) { Mf_MovePos(p); ret_op; }} \ + cur = p->buffer; + +#define GET_MATCHES_HEADER(minLen) GET_MATCHES_HEADER2(minLen, return 0) +#define SKIP_HEADER(minLen) GET_MATCHES_HEADER2(minLen, continue) + +#define MF_PARAMS(p) p->pos, p->buffer, p->son, p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue + +#define GET_MATCHES_FOOTER(offset, maxLen) \ + offset = (uint32_t)(GetMatchesSpec1(lenLimit, curMatch, MF_PARAMS(p), \ + distances + offset, maxLen) - distances); MOVE_POS_RET; + +#define SKIP_FOOTER \ + SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p)); MOVE_POS; + +static uint32_t Bt2_MatchFinder_GetMatches(CMatchFinder *p, uint32_t *distances) +{ + uint32_t offset; + GET_MATCHES_HEADER(2) + HASH2_CALC; + curMatch = p->hash[hashValue]; + p->hash[hashValue] = p->pos; + offset = 0; + GET_MATCHES_FOOTER(offset, 1) +} + +static uint32_t Bt3_MatchFinder_GetMatches(CMatchFinder *p, uint32_t *distances) +{ + uint32_t hash2Value, delta2, maxLen, offset; + GET_MATCHES_HEADER(3) + + HASH3_CALC; + + delta2 = p->pos - p->hash[hash2Value]; + curMatch = p->hash[kFix3HashSize + hashValue]; + + p->hash[hash2Value] = + p->hash[kFix3HashSize + hashValue] = p->pos; + + + maxLen = 2; + offset = 0; + if (delta2 < p->cyclicBufferSize && *(cur - delta2) == *cur) + { + for (; maxLen != lenLimit; maxLen++) + if (cur[(ptrdiff_t)maxLen - delta2] != cur[maxLen]) + break; + distances[0] = maxLen; + distances[1] = delta2 - 1; + offset = 2; + if (maxLen == lenLimit) + { + SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p)); + MOVE_POS_RET; + } + } + GET_MATCHES_FOOTER(offset, maxLen) +} + +static uint32_t Bt4_MatchFinder_GetMatches(CMatchFinder *p, uint32_t *distances) +{ + uint32_t hash2Value, hash3Value, delta2, delta3, maxLen, offset; + GET_MATCHES_HEADER(4) + + HASH4_CALC; + + delta2 = p->pos - p->hash[ hash2Value]; + delta3 = p->pos - p->hash[kFix3HashSize + hash3Value]; + curMatch = p->hash[kFix4HashSize + hashValue]; + + p->hash[ hash2Value] = + p->hash[kFix3HashSize + hash3Value] = + p->hash[kFix4HashSize + hashValue] = p->pos; + + maxLen = 1; + offset = 0; + if (delta2 < p->cyclicBufferSize && *(cur - delta2) == *cur) + { + distances[0] = maxLen = 2; + distances[1] = delta2 - 1; + offset = 2; + } + if (delta2 != delta3 && delta3 < p->cyclicBufferSize && *(cur - delta3) == *cur) + { + maxLen = 3; + distances[offset + 1] = delta3 - 1; + offset += 2; + delta2 = delta3; + } + if (offset != 0) + { + for (; maxLen != lenLimit; maxLen++) + if (cur[(ptrdiff_t)maxLen - delta2] != cur[maxLen]) + break; + distances[offset - 2] = maxLen; + if (maxLen == lenLimit) + { + SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p)); + MOVE_POS_RET; + } + } + if (maxLen < 3) + maxLen = 3; + GET_MATCHES_FOOTER(offset, maxLen) +} + +static uint32_t Hc4_MatchFinder_GetMatches(CMatchFinder *p, uint32_t *distances) +{ + uint32_t hash2Value, hash3Value, delta2, delta3, maxLen, offset; + GET_MATCHES_HEADER(4) + + HASH4_CALC; + + delta2 = p->pos - p->hash[ hash2Value]; + delta3 = p->pos - p->hash[kFix3HashSize + hash3Value]; + curMatch = p->hash[kFix4HashSize + hashValue]; + + p->hash[ hash2Value] = + p->hash[kFix3HashSize + hash3Value] = + p->hash[kFix4HashSize + hashValue] = p->pos; + + maxLen = 1; + offset = 0; + if (delta2 < p->cyclicBufferSize && *(cur - delta2) == *cur) + { + distances[0] = maxLen = 2; + distances[1] = delta2 - 1; + offset = 2; + } + if (delta2 != delta3 && delta3 < p->cyclicBufferSize && *(cur - delta3) == *cur) + { + maxLen = 3; + distances[offset + 1] = delta3 - 1; + offset += 2; + delta2 = delta3; + } + if (offset != 0) + { + for (; maxLen != lenLimit; maxLen++) + if (cur[(ptrdiff_t)maxLen - delta2] != cur[maxLen]) + break; + distances[offset - 2] = maxLen; + if (maxLen == lenLimit) + { + p->son[p->cyclicBufferPos] = curMatch; + MOVE_POS_RET; + } + } + if (maxLen < 3) + maxLen = 3; + offset = (uint32_t)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p), + distances + offset, maxLen) - (distances)); + MOVE_POS_RET +} + + +static void Bt2_MatchFinder_Skip(CMatchFinder *p, uint32_t num) +{ + do + { + SKIP_HEADER(2) + HASH2_CALC; + curMatch = p->hash[hashValue]; + p->hash[hashValue] = p->pos; + SKIP_FOOTER + } + while (--num != 0); +} + + +static void Bt3_MatchFinder_Skip(CMatchFinder *p, uint32_t num) +{ + do + { + uint32_t hash2Value; + SKIP_HEADER(3) + HASH3_CALC; + curMatch = p->hash[kFix3HashSize + hashValue]; + p->hash[hash2Value] = + p->hash[kFix3HashSize + hashValue] = p->pos; + SKIP_FOOTER + } + while (--num != 0); +} + +static void Bt4_MatchFinder_Skip(CMatchFinder *p, uint32_t num) +{ + do + { + uint32_t hash2Value, hash3Value; + SKIP_HEADER(4) + HASH4_CALC; + curMatch = p->hash[kFix4HashSize + hashValue]; + p->hash[ hash2Value] = + p->hash[kFix3HashSize + hash3Value] = p->pos; + p->hash[kFix4HashSize + hashValue] = p->pos; + SKIP_FOOTER + } + while (--num != 0); +} + +static void Hc4_MatchFinder_Skip(CMatchFinder *p, uint32_t num) +{ + do + { + uint32_t hash2Value, hash3Value; + SKIP_HEADER(4) + HASH4_CALC; + curMatch = p->hash[kFix4HashSize + hashValue]; + p->hash[ hash2Value] = + p->hash[kFix3HashSize + hash3Value] = + p->hash[kFix4HashSize + hashValue] = p->pos; + p->son[p->cyclicBufferPos] = curMatch; + MOVE_POS + } + while (--num != 0); +} + + +void Mf_CreateVTable(CMatchFinder *p, IMatchFinder *vTable) +{ + if (!p->btMode) + { + vTable->GetMatches = (Mf_GetMatches_Func)Hc4_MatchFinder_GetMatches; + vTable->Skip = (Mf_Skip_Func)Hc4_MatchFinder_Skip; + } + else if (p->numHashBytes == 2) + { + vTable->GetMatches = (Mf_GetMatches_Func)Bt2_MatchFinder_GetMatches; + vTable->Skip = (Mf_Skip_Func)Bt2_MatchFinder_Skip; + } + else if (p->numHashBytes == 3) + { + vTable->GetMatches = (Mf_GetMatches_Func)Bt3_MatchFinder_GetMatches; + vTable->Skip = (Mf_Skip_Func)Bt3_MatchFinder_Skip; + } + else + { + vTable->GetMatches = (Mf_GetMatches_Func)Bt4_MatchFinder_GetMatches; + vTable->Skip = (Mf_Skip_Func)Bt4_MatchFinder_Skip; + } +} diff --git a/LzFind.h b/LzFind.h index ccae302..64b5b2e 100644 --- a/LzFind.h +++ b/LzFind.h @@ -1,115 +1,74 @@ -/* LzFind.h -- Match finder for LZ algorithms -2009-04-22 : Igor Pavlov : Public domain */ - -#ifndef __LZ_FIND_H -#define __LZ_FIND_H - -#include "Types.h" - -#ifdef __cplusplus -extern "C" { -#endif - -typedef UInt32 CLzRef; - -typedef struct _CMatchFinder -{ - Byte *buffer; - UInt32 pos; - UInt32 posLimit; - UInt32 streamPos; - UInt32 lenLimit; - - UInt32 cyclicBufferPos; - UInt32 cyclicBufferSize; /* it must be = (historySize + 1) */ - - UInt32 matchMaxLen; - CLzRef *hash; - CLzRef *son; - UInt32 hashMask; - UInt32 cutValue; - - Byte *bufferBase; - ISeqInStream *stream; - int streamEndWasReached; - - UInt32 blockSize; - UInt32 keepSizeBefore; - UInt32 keepSizeAfter; - - UInt32 numHashBytes; - int directInput; - size_t directInputRem; - int btMode; - int bigHash; - UInt32 historySize; - UInt32 fixedHashSize; - UInt32 hashSizeSum; - UInt32 numSons; - SRes result; - uint32_t crc; -} CMatchFinder; - -#define Inline_MatchFinder_GetPointerToCurrentPos(p) ((p)->buffer) -#define Inline_MatchFinder_GetIndexByte(p, index) ((p)->buffer[(Int32)(index)]) - -#define Inline_MatchFinder_GetNumAvailableBytes(p) ((p)->streamPos - (p)->pos) - -int MatchFinder_NeedMove(CMatchFinder *p); -Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p); -void MatchFinder_MoveBlock(CMatchFinder *p); -void MatchFinder_ReadIfRequired(CMatchFinder *p); - -void MatchFinder_Construct(CMatchFinder *p); - -/* Conditions: - historySize <= 3 GB - keepAddBufferBefore + matchMaxLen + keepAddBufferAfter < 511MB -*/ -int MatchFinder_Create(CMatchFinder *p, UInt32 historySize, - UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter, - ISzAlloc *alloc); -void MatchFinder_Free(CMatchFinder *p, ISzAlloc *alloc); -void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, UInt32 numItems); -void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue); - -UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *buffer, CLzRef *son, - UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 _cutValue, - UInt32 *distances, UInt32 maxLen); - -/* -Conditions: - Mf_GetNumAvailableBytes_Func must be called before each Mf_GetMatchLen_Func. - Mf_GetPointerToCurrentPos_Func's result must be used only before any other function -*/ - -typedef void (*Mf_Init_Func)(void *object); -typedef Byte (*Mf_GetIndexByte_Func)(void *object, Int32 index); -typedef UInt32 (*Mf_GetNumAvailableBytes_Func)(void *object); -typedef const Byte * (*Mf_GetPointerToCurrentPos_Func)(void *object); -typedef UInt32 (*Mf_GetMatches_Func)(void *object, UInt32 *distances); -typedef void (*Mf_Skip_Func)(void *object, UInt32); - -typedef struct _IMatchFinder -{ - Mf_Init_Func Init; - Mf_GetIndexByte_Func GetIndexByte; - Mf_GetNumAvailableBytes_Func GetNumAvailableBytes; - Mf_GetPointerToCurrentPos_Func GetPointerToCurrentPos; - Mf_GetMatches_Func GetMatches; - Mf_Skip_Func Skip; -} IMatchFinder; - -void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder *vTable); - -void MatchFinder_Init(CMatchFinder *p); -UInt32 Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances); -UInt32 Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances); -void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num); -void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num); - -#ifdef __cplusplus -} -#endif - -#endif +/* LzFind.h -- Match finder for LZ algorithms +2009-04-22 : Igor Pavlov : Public domain */ + +typedef uint32_t CLzRef; + +typedef struct +{ + uint8_t *bufferBase; + uint8_t *buffer; + CLzRef *hash; + CLzRef *son; + uint32_t pos; + uint32_t posLimit; + uint32_t streamPos; + uint32_t lenLimit; + + uint32_t cyclicBufferPos; + uint32_t cyclicBufferSize; /* it must be = (historySize + 1) */ + + uint32_t matchMaxLen; + uint32_t hashMask; + uint32_t cutValue; + + uint32_t blockSize; + uint32_t keepSizeBefore; + uint32_t keepSizeAfter; + + uint32_t numHashBytes; + uint32_t historySize; + uint32_t hashSizeSum; + uint32_t numSons; + int infd; + int result; + uint32_t crc; + bool btMode; + bool streamEndWasReached; +} CMatchFinder; + + +/* Conditions: + historySize <= 3 GB + keepAddBufferBefore + matchMaxLen + keepAddBufferAfter < 511MB +*/ +int Mf_Init(CMatchFinder *p, const int ifd, const int mc, uint32_t historySize, + uint32_t keepAddBufferBefore, uint32_t matchMaxLen, uint32_t keepAddBufferAfter); + +void Mf_Free(CMatchFinder *p); + + +/* +Conditions: + Mf_GetNumAvailableBytes_Func must be called before each Mf_GetMatchLen_Func. + Mf_GetPointerToCurrentPos_Func's result must be used only before any other function +*/ + +typedef uint32_t (*Mf_GetMatches_Func)(void *object, uint32_t *distances); +typedef void (*Mf_Skip_Func)(void *object, uint32_t); + +typedef struct _IMatchFinder +{ + Mf_GetMatches_Func GetMatches; + Mf_Skip_Func Skip; +} IMatchFinder; + +void Mf_CreateVTable(CMatchFinder *p, IMatchFinder *vTable); + +static inline uint32_t Mf_GetNumAvailableBytes(CMatchFinder *p) + { return p->streamPos - p->pos; } + +static inline uint8_t Mf_GetIndexByte(CMatchFinder *p, int index) + { return p->buffer[index]; } + +static inline uint8_t * Mf_GetPointerToCurrentPos(CMatchFinder *p) + { return p->buffer; } diff --git a/LzHash.h b/LzHash.h deleted file mode 100644 index 4beaec7..0000000 --- a/LzHash.h +++ /dev/null @@ -1,54 +0,0 @@ -/* LzHash.h -- HASH functions for LZ algorithms -2009-02-07 : Igor Pavlov : Public domain */ - -#ifndef __LZ_HASH_H -#define __LZ_HASH_H - -#define kHash2Size (1 << 10) -#define kHash3Size (1 << 16) -#define kHash4Size (1 << 20) - -#define kFix3HashSize (kHash2Size) -#define kFix4HashSize (kHash2Size + kHash3Size) -#define kFix5HashSize (kHash2Size + kHash3Size + kHash4Size) - -#define HASH2_CALC hashValue = cur[0] | ((UInt32)cur[1] << 8); - -#define HASH3_CALC { \ - UInt32 temp = crc32[cur[0]] ^ cur[1]; \ - hash2Value = temp & (kHash2Size - 1); \ - hashValue = (temp ^ ((UInt32)cur[2] << 8)) & p->hashMask; } - -#define HASH4_CALC { \ - UInt32 temp = crc32[cur[0]] ^ cur[1]; \ - hash2Value = temp & (kHash2Size - 1); \ - hash3Value = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); \ - hashValue = (temp ^ ((UInt32)cur[2] << 8) ^ (crc32[cur[3]] << 5)) & p->hashMask; } - -#define HASH5_CALC { \ - UInt32 temp = crc32[cur[0]] ^ cur[1]; \ - hash2Value = temp & (kHash2Size - 1); \ - hash3Value = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); \ - hash4Value = (temp ^ ((UInt32)cur[2] << 8) ^ (crc32[cur[3]] << 5)); \ - hashValue = (hash4Value ^ (crc32[cur[4]] << 3)) & p->hashMask; \ - hash4Value &= (kHash4Size - 1); } - -/* #define HASH_ZIP_CALC hashValue = ((cur[0] | ((UInt32)cur[1] << 8)) ^ crc32[cur[2]]) & 0xFFFF; */ -#define HASH_ZIP_CALC hashValue = ((cur[2] | ((UInt32)cur[0] << 8)) ^ crc32[cur[1]]) & 0xFFFF; - - -#define MT_HASH2_CALC \ - hash2Value = (crc32[cur[0]] ^ cur[1]) & (kHash2Size - 1); - -#define MT_HASH3_CALC { \ - UInt32 temp = crc32[cur[0]] ^ cur[1]; \ - hash2Value = temp & (kHash2Size - 1); \ - hash3Value = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); } - -#define MT_HASH4_CALC { \ - UInt32 temp = crc32[cur[0]] ^ cur[1]; \ - hash2Value = temp & (kHash2Size - 1); \ - hash3Value = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); \ - hash4Value = (temp ^ ((UInt32)cur[2] << 8) ^ (crc32[cur[3]] << 5)) & (kHash4Size - 1); } - -#endif diff --git a/LzmaDec.c b/LzmaDec.c index b4b7e27..5ea279f 100644 --- a/LzmaDec.c +++ b/LzmaDec.c @@ -1,960 +1,938 @@ -/* LzmaDec.c -- LZMA Decoder -2009-09-20 : Igor Pavlov : Public domain */ - -#define _FILE_OFFSET_BITS 64 - -#include "LzmaDec.h" - -#include - -#define kNumTopBits 24 -#define kTopValue ((UInt32)1 << kNumTopBits) - -#define kNumBitModelTotalBits 11 -#define kBitModelTotal (1 << kNumBitModelTotalBits) -#define kNumMoveBits 5 - -#define RC_INIT_SIZE 5 - -#define NORMALIZE if (range < kTopValue) { range <<= 8; code = (code << 8) | (*buf++); } - -#define IF_BIT_0(p) ttt = *(p); NORMALIZE; bound = (range >> kNumBitModelTotalBits) * ttt; if (code < bound) -#define UPDATE_0(p) range = bound; *(p) = (CLzmaProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits)); -#define UPDATE_1(p) range -= bound; code -= bound; *(p) = (CLzmaProb)(ttt - (ttt >> kNumMoveBits)); -#define GET_BIT2(p, i, A0, A1) IF_BIT_0(p) \ - { UPDATE_0(p); i = (i + i); A0; } else \ - { UPDATE_1(p); i = (i + i) + 1; A1; } -#define GET_BIT(p, i) GET_BIT2(p, i, ; , ;) - -#define TREE_GET_BIT(probs, i) { GET_BIT((probs + i), i); } -#define TREE_DECODE(probs, limit, i) \ - { i = 1; do { TREE_GET_BIT(probs, i); } while (i < limit); i -= limit; } - -/* #define _LZMA_SIZE_OPT */ - -#ifdef _LZMA_SIZE_OPT -#define TREE_6_DECODE(probs, i) TREE_DECODE(probs, (1 << 6), i) -#else -#define TREE_6_DECODE(probs, i) \ - { i = 1; \ - TREE_GET_BIT(probs, i); \ - TREE_GET_BIT(probs, i); \ - TREE_GET_BIT(probs, i); \ - TREE_GET_BIT(probs, i); \ - TREE_GET_BIT(probs, i); \ - TREE_GET_BIT(probs, i); \ - i -= 0x40; } -#endif - -#define NORMALIZE_CHECK if (range < kTopValue) { if (buf >= bufLimit) return DUMMY_ERROR; range <<= 8; code = (code << 8) | (*buf++); } - -#define IF_BIT_0_CHECK(p) ttt = *(p); NORMALIZE_CHECK; bound = (range >> kNumBitModelTotalBits) * ttt; if (code < bound) -#define UPDATE_0_CHECK range = bound; -#define UPDATE_1_CHECK range -= bound; code -= bound; -#define GET_BIT2_CHECK(p, i, A0, A1) IF_BIT_0_CHECK(p) \ - { UPDATE_0_CHECK; i = (i + i); A0; } else \ - { UPDATE_1_CHECK; i = (i + i) + 1; A1; } -#define GET_BIT_CHECK(p, i) GET_BIT2_CHECK(p, i, ; , ;) -#define TREE_DECODE_CHECK(probs, limit, i) \ - { i = 1; do { GET_BIT_CHECK(probs + i, i) } while (i < limit); i -= limit; } - - -#define kNumPosBitsMax 4 -#define kNumPosStatesMax (1 << kNumPosBitsMax) - -#define kLenNumLowBits 3 -#define kLenNumLowSymbols (1 << kLenNumLowBits) -#define kLenNumMidBits 3 -#define kLenNumMidSymbols (1 << kLenNumMidBits) -#define kLenNumHighBits 8 -#define kLenNumHighSymbols (1 << kLenNumHighBits) - -#define LenChoice 0 -#define LenChoice2 (LenChoice + 1) -#define LenLow (LenChoice2 + 1) -#define LenMid (LenLow + (kNumPosStatesMax << kLenNumLowBits)) -#define LenHigh (LenMid + (kNumPosStatesMax << kLenNumMidBits)) -#define kNumLenProbs (LenHigh + kLenNumHighSymbols) - - -#define kNumStates 12 -#define kNumLitStates 7 - -#define kStartPosModelIndex 4 -#define kEndPosModelIndex 14 -#define kNumFullDistances (1 << (kEndPosModelIndex >> 1)) - -#define kNumPosSlotBits 6 -#define kNumLenToPosStates 4 - -#define kNumAlignBits 4 -#define kAlignTableSize (1 << kNumAlignBits) - -#define kMatchMinLen 2 -#define kMatchSpecLenStart (kMatchMinLen + kLenNumLowSymbols + kLenNumMidSymbols + kLenNumHighSymbols) - -#define IsMatch 0 -#define IsRep (IsMatch + (kNumStates << kNumPosBitsMax)) -#define IsRepG0 (IsRep + kNumStates) -#define IsRepG1 (IsRepG0 + kNumStates) -#define IsRepG2 (IsRepG1 + kNumStates) -#define IsRep0Long (IsRepG2 + kNumStates) -#define PosSlot (IsRep0Long + (kNumStates << kNumPosBitsMax)) -#define SpecPos (PosSlot + (kNumLenToPosStates << kNumPosSlotBits)) -#define Align (SpecPos + kNumFullDistances - kEndPosModelIndex) -#define LenCoder (Align + kAlignTableSize) -#define RepLenCoder (LenCoder + kNumLenProbs) -#define Literal (RepLenCoder + kNumLenProbs) - -#define LZMA_BASE_SIZE 1846 -#define LZMA_LIT_SIZE 768 - -#define LzmaProps_GetNumProbs(p) ((UInt32)LZMA_BASE_SIZE + (LZMA_LIT_SIZE << ((p)->lc + (p)->lp))) - -#if Literal != LZMA_BASE_SIZE -StopCompilingDueBUG -#endif - -#define LZMA_DIC_MIN (1 << 12) - -/* First LZMA-symbol is always decoded. -And it decodes new LZMA-symbols while (buf < bufLimit), but "buf" is without last normalization -Out: - Result: - SZ_OK - OK - SZ_ERROR_DATA - Error - p->remainLen: - < kMatchSpecLenStart : normal remain - = kMatchSpecLenStart : finished - = kMatchSpecLenStart + 1 : Flush marker - = kMatchSpecLenStart + 2 : State Init Marker -*/ - -static int MY_FAST_CALL LzmaDec_DecodeReal(CLzmaDec *p, SizeT limit, const Byte *bufLimit) -{ - CLzmaProb *probs = p->probs; - - unsigned state = p->state; - UInt32 rep0 = p->reps[0], rep1 = p->reps[1], rep2 = p->reps[2], rep3 = p->reps[3]; - unsigned pbMask = ((unsigned)1 << (p->prop.pb)) - 1; - unsigned lpMask = ((unsigned)1 << (p->prop.lp)) - 1; - unsigned lc = p->prop.lc; - - Byte *dic = p->dic; - SizeT dicBufSize = p->dicBufSize; - SizeT dicPos = p->dicPos; - - UInt32 processedPos = p->processedPos; - UInt32 checkDicSize = p->checkDicSize; - unsigned len = 0; - - const Byte *buf = p->buf; - UInt32 range = p->range; - UInt32 code = p->code; - - do - { - CLzmaProb *prob; - UInt32 bound; - unsigned ttt; - unsigned posState = processedPos & pbMask; - - prob = probs + IsMatch + (state << kNumPosBitsMax) + posState; - IF_BIT_0(prob) - { - unsigned symbol; - UPDATE_0(prob); - prob = probs + Literal; - if (checkDicSize != 0 || processedPos != 0) - prob += (LZMA_LIT_SIZE * (((processedPos & lpMask) << lc) + - (dic[(dicPos == 0 ? dicBufSize : dicPos) - 1] >> (8 - lc)))); - - if (state < kNumLitStates) - { - state -= (state < 4) ? state : 3; - symbol = 1; - do { GET_BIT(prob + symbol, symbol) } while (symbol < 0x100); - } - else - { - unsigned matchByte = p->dic[(dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0)]; - unsigned offs = 0x100; - state -= (state < 10) ? 3 : 6; - symbol = 1; - do - { - unsigned bit; - CLzmaProb *probLit; - matchByte <<= 1; - bit = (matchByte & offs); - probLit = prob + offs + bit + symbol; - GET_BIT2(probLit, symbol, offs &= ~bit, offs &= bit) - } - while (symbol < 0x100); - } - dic[dicPos++] = (Byte)symbol; - processedPos++; - continue; - } - else - { - UPDATE_1(prob); - prob = probs + IsRep + state; - IF_BIT_0(prob) - { - UPDATE_0(prob); - state += kNumStates; - prob = probs + LenCoder; - } - else - { - UPDATE_1(prob); - if (checkDicSize == 0 && processedPos == 0) - return SZ_ERROR_DATA; - prob = probs + IsRepG0 + state; - IF_BIT_0(prob) - { - UPDATE_0(prob); - prob = probs + IsRep0Long + (state << kNumPosBitsMax) + posState; - IF_BIT_0(prob) - { - UPDATE_0(prob); - dic[dicPos] = dic[(dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0)]; - dicPos++; - processedPos++; - state = state < kNumLitStates ? 9 : 11; - continue; - } - UPDATE_1(prob); - } - else - { - UInt32 distance; - UPDATE_1(prob); - prob = probs + IsRepG1 + state; - IF_BIT_0(prob) - { - UPDATE_0(prob); - distance = rep1; - } - else - { - UPDATE_1(prob); - prob = probs + IsRepG2 + state; - IF_BIT_0(prob) - { - UPDATE_0(prob); - distance = rep2; - } - else - { - UPDATE_1(prob); - distance = rep3; - rep3 = rep2; - } - rep2 = rep1; - } - rep1 = rep0; - rep0 = distance; - } - state = state < kNumLitStates ? 8 : 11; - prob = probs + RepLenCoder; - } - { - unsigned limit, offset; - CLzmaProb *probLen = prob + LenChoice; - IF_BIT_0(probLen) - { - UPDATE_0(probLen); - probLen = prob + LenLow + (posState << kLenNumLowBits); - offset = 0; - limit = (1 << kLenNumLowBits); - } - else - { - UPDATE_1(probLen); - probLen = prob + LenChoice2; - IF_BIT_0(probLen) - { - UPDATE_0(probLen); - probLen = prob + LenMid + (posState << kLenNumMidBits); - offset = kLenNumLowSymbols; - limit = (1 << kLenNumMidBits); - } - else - { - UPDATE_1(probLen); - probLen = prob + LenHigh; - offset = kLenNumLowSymbols + kLenNumMidSymbols; - limit = (1 << kLenNumHighBits); - } - } - TREE_DECODE(probLen, limit, len); - len += offset; - } - - if (state >= kNumStates) - { - UInt32 distance; - prob = probs + PosSlot + - ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << kNumPosSlotBits); - TREE_6_DECODE(prob, distance); - if (distance >= kStartPosModelIndex) - { - unsigned posSlot = (unsigned)distance; - int numDirectBits = (int)(((distance >> 1) - 1)); - distance = (2 | (distance & 1)); - if (posSlot < kEndPosModelIndex) - { - distance <<= numDirectBits; - prob = probs + SpecPos + distance - posSlot - 1; - { - UInt32 mask = 1; - unsigned i = 1; - do - { - GET_BIT2(prob + i, i, ; , distance |= mask); - mask <<= 1; - } - while (--numDirectBits != 0); - } - } - else - { - numDirectBits -= kNumAlignBits; - do - { - NORMALIZE - range >>= 1; - - { - UInt32 t; - code -= range; - t = (0 - ((UInt32)code >> 31)); /* (UInt32)((Int32)code >> 31) */ - distance = (distance << 1) + (t + 1); - code += range & t; - } - /* - distance <<= 1; - if (code >= range) - { - code -= range; - distance |= 1; - } - */ - } - while (--numDirectBits != 0); - prob = probs + Align; - distance <<= kNumAlignBits; - { - unsigned i = 1; - GET_BIT2(prob + i, i, ; , distance |= 1); - GET_BIT2(prob + i, i, ; , distance |= 2); - GET_BIT2(prob + i, i, ; , distance |= 4); - GET_BIT2(prob + i, i, ; , distance |= 8); - } - if (distance == (UInt32)0xFFFFFFFF) - { - len += kMatchSpecLenStart; - state -= kNumStates; - break; - } - } - } - rep3 = rep2; - rep2 = rep1; - rep1 = rep0; - rep0 = distance + 1; - if (checkDicSize == 0) - { - if (distance >= processedPos) - return SZ_ERROR_DATA; - } - else if (distance >= checkDicSize) - return SZ_ERROR_DATA; - state = (state < kNumStates + kNumLitStates) ? kNumLitStates : kNumLitStates + 3; - } - - len += kMatchMinLen; - - if (limit == dicPos) - return SZ_ERROR_DATA; - { - SizeT rem = limit - dicPos; - unsigned curLen = ((rem < len) ? (unsigned)rem : len); - SizeT pos = (dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0); - - processedPos += curLen; - - len -= curLen; - if (pos + curLen <= dicBufSize) - { - Byte *dest = dic + dicPos; - ptrdiff_t src = (ptrdiff_t)pos - (ptrdiff_t)dicPos; - const Byte *lim = dest + curLen; - dicPos += curLen; - do - *(dest) = (Byte)*(dest + src); - while (++dest != lim); - } - else - { - do - { - dic[dicPos++] = dic[pos]; - if (++pos == dicBufSize) - pos = 0; - } - while (--curLen != 0); - } - } - } - } - while (dicPos < limit && buf < bufLimit); - NORMALIZE; - p->buf = buf; - p->range = range; - p->code = code; - p->remainLen = len; - p->dicPos = dicPos; - p->processedPos = processedPos; - p->reps[0] = rep0; - p->reps[1] = rep1; - p->reps[2] = rep2; - p->reps[3] = rep3; - p->state = state; - - return SZ_OK; -} - -static void MY_FAST_CALL LzmaDec_WriteRem(CLzmaDec *p, SizeT limit) -{ - if (p->remainLen != 0 && p->remainLen < kMatchSpecLenStart) - { - Byte *dic = p->dic; - SizeT dicPos = p->dicPos; - SizeT dicBufSize = p->dicBufSize; - unsigned len = p->remainLen; - UInt32 rep0 = p->reps[0]; - if (limit - dicPos < len) - len = (unsigned)(limit - dicPos); - - if (p->checkDicSize == 0 && p->prop.dicSize - p->processedPos <= len) - p->checkDicSize = p->prop.dicSize; - - p->processedPos += len; - p->remainLen -= len; - while (len-- != 0) - { - dic[dicPos] = dic[(dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0)]; - dicPos++; - } - p->dicPos = dicPos; - } -} - -static int MY_FAST_CALL LzmaDec_DecodeReal2(CLzmaDec *p, SizeT limit, const Byte *bufLimit) -{ - do - { - SizeT limit2 = limit; - if (p->checkDicSize == 0) - { - UInt32 rem = p->prop.dicSize - p->processedPos; - if (limit - p->dicPos > rem) - limit2 = p->dicPos + rem; - } - RINOK(LzmaDec_DecodeReal(p, limit2, bufLimit)); - if (p->processedPos >= p->prop.dicSize) - p->checkDicSize = p->prop.dicSize; - LzmaDec_WriteRem(p, limit); - } - while (p->dicPos < limit && p->buf < bufLimit && p->remainLen < kMatchSpecLenStart); - - if (p->remainLen > kMatchSpecLenStart) - { - p->remainLen = kMatchSpecLenStart; - } - return 0; -} - -typedef enum -{ - DUMMY_ERROR, /* unexpected end of input stream */ - DUMMY_LIT, - DUMMY_MATCH, - DUMMY_REP -} ELzmaDummy; - -static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inSize) -{ - UInt32 range = p->range; - UInt32 code = p->code; - const Byte *bufLimit = buf + inSize; - CLzmaProb *probs = p->probs; - unsigned state = p->state; - ELzmaDummy res; - - { - CLzmaProb *prob; - UInt32 bound; - unsigned ttt; - unsigned posState = (p->processedPos) & ((1 << p->prop.pb) - 1); - - prob = probs + IsMatch + (state << kNumPosBitsMax) + posState; - IF_BIT_0_CHECK(prob) - { - UPDATE_0_CHECK - - /* if (bufLimit - buf >= 7) return DUMMY_LIT; */ - - prob = probs + Literal; - if (p->checkDicSize != 0 || p->processedPos != 0) - prob += (LZMA_LIT_SIZE * - ((((p->processedPos) & ((1 << (p->prop.lp)) - 1)) << p->prop.lc) + - (p->dic[(p->dicPos == 0 ? p->dicBufSize : p->dicPos) - 1] >> (8 - p->prop.lc)))); - - if (state < kNumLitStates) - { - unsigned symbol = 1; - do { GET_BIT_CHECK(prob + symbol, symbol) } while (symbol < 0x100); - } - else - { - unsigned matchByte = p->dic[p->dicPos - p->reps[0] + - ((p->dicPos < p->reps[0]) ? p->dicBufSize : 0)]; - unsigned offs = 0x100; - unsigned symbol = 1; - do - { - unsigned bit; - CLzmaProb *probLit; - matchByte <<= 1; - bit = (matchByte & offs); - probLit = prob + offs + bit + symbol; - GET_BIT2_CHECK(probLit, symbol, offs &= ~bit, offs &= bit) - } - while (symbol < 0x100); - } - res = DUMMY_LIT; - } - else - { - unsigned len; - UPDATE_1_CHECK; - - prob = probs + IsRep + state; - IF_BIT_0_CHECK(prob) - { - UPDATE_0_CHECK; - state = 0; - prob = probs + LenCoder; - res = DUMMY_MATCH; - } - else - { - UPDATE_1_CHECK; - res = DUMMY_REP; - prob = probs + IsRepG0 + state; - IF_BIT_0_CHECK(prob) - { - UPDATE_0_CHECK; - prob = probs + IsRep0Long + (state << kNumPosBitsMax) + posState; - IF_BIT_0_CHECK(prob) - { - UPDATE_0_CHECK; - NORMALIZE_CHECK; - return DUMMY_REP; - } - else - { - UPDATE_1_CHECK; - } - } - else - { - UPDATE_1_CHECK; - prob = probs + IsRepG1 + state; - IF_BIT_0_CHECK(prob) - { - UPDATE_0_CHECK; - } - else - { - UPDATE_1_CHECK; - prob = probs + IsRepG2 + state; - IF_BIT_0_CHECK(prob) - { - UPDATE_0_CHECK; - } - else - { - UPDATE_1_CHECK; - } - } - } - state = kNumStates; - prob = probs + RepLenCoder; - } - { - unsigned limit, offset; - CLzmaProb *probLen = prob + LenChoice; - IF_BIT_0_CHECK(probLen) - { - UPDATE_0_CHECK; - probLen = prob + LenLow + (posState << kLenNumLowBits); - offset = 0; - limit = 1 << kLenNumLowBits; - } - else - { - UPDATE_1_CHECK; - probLen = prob + LenChoice2; - IF_BIT_0_CHECK(probLen) - { - UPDATE_0_CHECK; - probLen = prob + LenMid + (posState << kLenNumMidBits); - offset = kLenNumLowSymbols; - limit = 1 << kLenNumMidBits; - } - else - { - UPDATE_1_CHECK; - probLen = prob + LenHigh; - offset = kLenNumLowSymbols + kLenNumMidSymbols; - limit = 1 << kLenNumHighBits; - } - } - TREE_DECODE_CHECK(probLen, limit, len); - len += offset; - } - - if (state < 4) - { - unsigned posSlot; - prob = probs + PosSlot + - ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << - kNumPosSlotBits); - TREE_DECODE_CHECK(prob, 1 << kNumPosSlotBits, posSlot); - if (posSlot >= kStartPosModelIndex) - { - int numDirectBits = ((posSlot >> 1) - 1); - - /* if (bufLimit - buf >= 8) return DUMMY_MATCH; */ - - if (posSlot < kEndPosModelIndex) - { - prob = probs + SpecPos + ((2 | (posSlot & 1)) << numDirectBits) - posSlot - 1; - } - else - { - numDirectBits -= kNumAlignBits; - do - { - NORMALIZE_CHECK - range >>= 1; - code -= range & (((code - range) >> 31) - 1); - /* if (code >= range) code -= range; */ - } - while (--numDirectBits != 0); - prob = probs + Align; - numDirectBits = kNumAlignBits; - } - { - unsigned i = 1; - do - { - GET_BIT_CHECK(prob + i, i); - } - while (--numDirectBits != 0); - } - } - } - } - } - NORMALIZE_CHECK; - return res; -} - - -static void LzmaDec_InitRc(CLzmaDec *p, const Byte *data) -{ - p->code = ((UInt32)data[1] << 24) | ((UInt32)data[2] << 16) | ((UInt32)data[3] << 8) | ((UInt32)data[4]); - p->range = 0xFFFFFFFF; - p->needFlush = 0; -} - -void LzmaDec_InitDicAndState(CLzmaDec *p, Bool initDic, Bool initState) -{ - p->needFlush = 1; - p->remainLen = 0; - p->tempBufSize = 0; - - if (initDic) - { - p->processedPos = 0; - p->checkDicSize = 0; - p->needInitState = 1; - } - if (initState) - p->needInitState = 1; -} - -void LzmaDec_Init(CLzmaDec *p) -{ - p->dicPos = 0; - LzmaDec_InitDicAndState(p, True, True); -} - -static void LzmaDec_InitStateReal(CLzmaDec *p) -{ - UInt32 numProbs = Literal + ((UInt32)LZMA_LIT_SIZE << (p->prop.lc + p->prop.lp)); - UInt32 i; - CLzmaProb *probs = p->probs; - for (i = 0; i < numProbs; i++) - probs[i] = kBitModelTotal >> 1; - p->reps[0] = p->reps[1] = p->reps[2] = p->reps[3] = 1; - p->state = 0; - p->needInitState = 0; -} - -SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *srcLen, - ELzmaFinishMode finishMode, ELzmaStatus *status) -{ - SizeT inSize = *srcLen; - (*srcLen) = 0; - LzmaDec_WriteRem(p, dicLimit); - - *status = LZMA_STATUS_NOT_SPECIFIED; - - while (p->remainLen != kMatchSpecLenStart) - { - int checkEndMarkNow; - - if (p->needFlush != 0) - { - for (; inSize > 0 && p->tempBufSize < RC_INIT_SIZE; (*srcLen)++, inSize--) - p->tempBuf[p->tempBufSize++] = *src++; - if (p->tempBufSize < RC_INIT_SIZE) - { - *status = LZMA_STATUS_NEEDS_MORE_INPUT; - return SZ_OK; - } - if (p->tempBuf[0] != 0) - return SZ_ERROR_DATA; - - LzmaDec_InitRc(p, p->tempBuf); - p->tempBufSize = 0; - } - - checkEndMarkNow = 0; - if (p->dicPos >= dicLimit) - { - if (p->remainLen == 0 && p->code == 0) - { - *status = LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK; - return SZ_OK; - } - if (finishMode == LZMA_FINISH_ANY) - { - *status = LZMA_STATUS_NOT_FINISHED; - return SZ_OK; - } - if (p->remainLen != 0) - { - *status = LZMA_STATUS_NOT_FINISHED; - return SZ_ERROR_DATA; - } - checkEndMarkNow = 1; - } - - if (p->needInitState) - LzmaDec_InitStateReal(p); - - if (p->tempBufSize == 0) - { - SizeT processed; - const Byte *bufLimit; - if (inSize < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow) - { - int dummyRes = LzmaDec_TryDummy(p, src, inSize); - if (dummyRes == DUMMY_ERROR) - { - memcpy(p->tempBuf, src, inSize); - p->tempBufSize = (unsigned)inSize; - (*srcLen) += inSize; - *status = LZMA_STATUS_NEEDS_MORE_INPUT; - return SZ_OK; - } - if (checkEndMarkNow && dummyRes != DUMMY_MATCH) - { - *status = LZMA_STATUS_NOT_FINISHED; - return SZ_ERROR_DATA; - } - bufLimit = src; - } - else - bufLimit = src + inSize - LZMA_REQUIRED_INPUT_MAX; - p->buf = src; - if (LzmaDec_DecodeReal2(p, dicLimit, bufLimit) != 0) - return SZ_ERROR_DATA; - processed = (SizeT)(p->buf - src); - (*srcLen) += processed; - src += processed; - inSize -= processed; - } - else - { - unsigned rem = p->tempBufSize, lookAhead = 0; - while (rem < LZMA_REQUIRED_INPUT_MAX && lookAhead < inSize) - p->tempBuf[rem++] = src[lookAhead++]; - p->tempBufSize = rem; - if (rem < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow) - { - int dummyRes = LzmaDec_TryDummy(p, p->tempBuf, rem); - if (dummyRes == DUMMY_ERROR) - { - (*srcLen) += lookAhead; - *status = LZMA_STATUS_NEEDS_MORE_INPUT; - return SZ_OK; - } - if (checkEndMarkNow && dummyRes != DUMMY_MATCH) - { - *status = LZMA_STATUS_NOT_FINISHED; - return SZ_ERROR_DATA; - } - } - p->buf = p->tempBuf; - if (LzmaDec_DecodeReal2(p, dicLimit, p->buf) != 0) - return SZ_ERROR_DATA; - lookAhead -= (rem - (unsigned)(p->buf - p->tempBuf)); - (*srcLen) += lookAhead; - src += lookAhead; - inSize -= lookAhead; - p->tempBufSize = 0; - } - } - if (p->code == 0) - *status = LZMA_STATUS_FINISHED_WITH_MARK; - return (p->code == 0) ? SZ_OK : SZ_ERROR_DATA; -} - -SRes LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status) -{ - SizeT outSize = *destLen; - SizeT inSize = *srcLen; - *srcLen = *destLen = 0; - for (;;) - { - SizeT inSizeCur = inSize, outSizeCur, dicPos; - ELzmaFinishMode curFinishMode; - SRes res; - if (p->dicPos == p->dicBufSize) - p->dicPos = 0; - dicPos = p->dicPos; - if (outSize > p->dicBufSize - dicPos) - { - outSizeCur = p->dicBufSize; - curFinishMode = LZMA_FINISH_ANY; - } - else - { - outSizeCur = dicPos + outSize; - curFinishMode = finishMode; - } - - res = LzmaDec_DecodeToDic(p, outSizeCur, src, &inSizeCur, curFinishMode, status); - src += inSizeCur; - inSize -= inSizeCur; - *srcLen += inSizeCur; - outSizeCur = p->dicPos - dicPos; - memcpy(dest, p->dic + dicPos, outSizeCur); - dest += outSizeCur; - outSize -= outSizeCur; - *destLen += outSizeCur; - if (res != 0) - return res; - if (outSizeCur == 0 || outSize == 0) - return SZ_OK; - } -} - -void LzmaDec_FreeProbs(CLzmaDec *p, ISzAlloc *alloc) -{ - alloc->Free(alloc, p->probs); - p->probs = 0; -} - -static void LzmaDec_FreeDict(CLzmaDec *p, ISzAlloc *alloc) -{ - alloc->Free(alloc, p->dic); - p->dic = 0; -} - -void LzmaDec_Free(CLzmaDec *p, ISzAlloc *alloc) -{ - LzmaDec_FreeProbs(p, alloc); - LzmaDec_FreeDict(p, alloc); -} - -SRes LzmaProps_Decode(CLzmaProps *p, const Byte *data, unsigned size) -{ - UInt32 dicSize; - Byte d; - - if (size < LZMA_PROPS_SIZE) - return SZ_ERROR_UNSUPPORTED; - else - dicSize = data[1] | ((UInt32)data[2] << 8) | ((UInt32)data[3] << 16) | ((UInt32)data[4] << 24); - - if (dicSize < LZMA_DIC_MIN) - dicSize = LZMA_DIC_MIN; - p->dicSize = dicSize; - - d = data[0]; - if (d >= (9 * 5 * 5)) - return SZ_ERROR_UNSUPPORTED; - - p->lc = d % 9; - d /= 9; - p->pb = d / 5; - p->lp = d % 5; - - return SZ_OK; -} - -static SRes LzmaDec_AllocateProbs2(CLzmaDec *p, const CLzmaProps *propNew, ISzAlloc *alloc) -{ - UInt32 numProbs = LzmaProps_GetNumProbs(propNew); - if (p->probs == 0 || numProbs != p->numProbs) - { - LzmaDec_FreeProbs(p, alloc); - p->probs = (CLzmaProb *)alloc->Alloc(alloc, numProbs * sizeof(CLzmaProb)); - p->numProbs = numProbs; - if (p->probs == 0) - return SZ_ERROR_MEM; - } - return SZ_OK; -} - -SRes LzmaDec_Allocate(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAlloc *alloc) -{ - CLzmaProps propNew; - SizeT dicBufSize; - RINOK(LzmaProps_Decode(&propNew, props, propsSize)); - RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc)); - dicBufSize = propNew.dicSize; - if (p->dic == 0 || dicBufSize != p->dicBufSize) - { - LzmaDec_FreeDict(p, alloc); - p->dic = (Byte *)alloc->Alloc(alloc, dicBufSize); - if (p->dic == 0) - { - LzmaDec_FreeProbs(p, alloc); - return SZ_ERROR_MEM; - } - } - p->dicBufSize = dicBufSize; - p->prop = propNew; - return SZ_OK; -} +/* LzmaDec.c -- LZMA Decoder +2009-09-20 : Igor Pavlov : Public domain */ + +#define _FILE_OFFSET_BITS 64 + +#include +#include +#include +#include +#include +#include +#include + +#include "clzip.h" +#include "LzmaDec.h" + + +CRC32 crc32; + + +/* Returns the number of bytes really read. + If (returned value < size) and (errno == 0), means EOF was reached. +*/ +int readblock( const int fd, uint8_t * const buf, const int size ) + { + int rest = size; + errno = 0; + while( rest > 0 ) + { + const int n = read( fd, buf + size - rest, rest ); + if( n > 0 ) rest -= n; + else if( n == 0 ) break; /* EOF */ + else if( errno != EINTR && errno != EAGAIN ) break; + errno = 0; + } + return size - rest; + } + + +/* Returns the number of bytes really written. + If (returned value < size), it is always an error. +*/ +int writeblock( const int fd, const uint8_t * const buf, const int size ) + { + int rest = size; + errno = 0; + while( rest > 0 ) + { + const int n = write( fd, buf + size - rest, rest ); + if( n > 0 ) rest -= n; + else if( n < 0 && errno != EINTR && errno != EAGAIN ) break; + errno = 0; + } + return size - rest; + } + + +#define kNumTopBits 24 +#define kTopValue ((uint32_t)1 << kNumTopBits) + +#define kNumBitModelTotalBits 11 +#define kBitModelTotal (1 << kNumBitModelTotalBits) +#define kNumMoveBits 5 + +#define RC_INIT_SIZE 5 + +#define NORMALIZE if (range < kTopValue) { range <<= 8; code = (code << 8) | (*buf++); } + +#define IF_BIT_0(p) ttt = *(p); NORMALIZE; bound = (range >> kNumBitModelTotalBits) * ttt; if (code < bound) +#define UPDATE_0(p) range = bound; *(p) = (int)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits)); +#define UPDATE_1(p) range -= bound; code -= bound; *(p) = (int)(ttt - (ttt >> kNumMoveBits)); +#define GET_BIT2(p, i, A0, A1) IF_BIT_0(p) \ + { UPDATE_0(p); i = (i + i); A0; } else \ + { UPDATE_1(p); i = (i + i) + 1; A1; } +#define GET_BIT(p, i) GET_BIT2(p, i, ; , ;) + +#define TREE_GET_BIT(probs, i) { GET_BIT((probs + i), i); } +#define TREE_DECODE(probs, limit, i) \ + { i = 1; do { TREE_GET_BIT(probs, i); } while (i < limit); i -= limit; } + +/* #define _LZMA_SIZE_OPT */ + +#ifdef _LZMA_SIZE_OPT +#define TREE_6_DECODE(probs, i) TREE_DECODE(probs, (1 << 6), i) +#else +#define TREE_6_DECODE(probs, i) \ + { i = 1; \ + TREE_GET_BIT(probs, i); \ + TREE_GET_BIT(probs, i); \ + TREE_GET_BIT(probs, i); \ + TREE_GET_BIT(probs, i); \ + TREE_GET_BIT(probs, i); \ + TREE_GET_BIT(probs, i); \ + i -= 0x40; } +#endif + +#define NORMALIZE_CHECK if (range < kTopValue) { if (buf >= bufLimit) return DUMMY_ERROR; range <<= 8; code = (code << 8) | (*buf++); } + +#define IF_BIT_0_CHECK(p) ttt = *(p); NORMALIZE_CHECK; bound = (range >> kNumBitModelTotalBits) * ttt; if (code < bound) +#define UPDATE_0_CHECK range = bound; +#define UPDATE_1_CHECK range -= bound; code -= bound; +#define GET_BIT2_CHECK(p, i, A0, A1) IF_BIT_0_CHECK(p) \ + { UPDATE_0_CHECK; i = (i + i); A0; } else \ + { UPDATE_1_CHECK; i = (i + i) + 1; A1; } +#define GET_BIT_CHECK(p, i) GET_BIT2_CHECK(p, i, ; , ;) +#define TREE_DECODE_CHECK(probs, limit, i) \ + { i = 1; do { GET_BIT_CHECK(probs + i, i) } while (i < limit); i -= limit; } + + +#define kNumPosBitsMax 4 +#define kNumPosStatesMax (1 << kNumPosBitsMax) + +#define kLenNumLowBits 3 +#define kLenNumLowSymbols (1 << kLenNumLowBits) +#define kLenNumMidBits 3 +#define kLenNumMidSymbols (1 << kLenNumMidBits) +#define kLenNumHighBits 8 +#define kLenNumHighSymbols (1 << kLenNumHighBits) + +#define LenChoice 0 +#define LenChoice2 (LenChoice + 1) +#define LenLow (LenChoice2 + 1) +#define LenMid (LenLow + (kNumPosStatesMax << kLenNumLowBits)) +#define LenHigh (LenMid + (kNumPosStatesMax << kLenNumMidBits)) +#define kNumLenProbs (LenHigh + kLenNumHighSymbols) + + +#define kNumStates 12 +#define kNumLitStates 7 + +#define kStartPosModelIndex 4 +#define kEndPosModelIndex 14 +#define kNumFullDistances (1 << (kEndPosModelIndex >> 1)) + +#define kNumPosSlotBits 6 +#define kNumLenToPosStates 4 + +#define kNumAlignBits 4 +#define kAlignTableSize (1 << kNumAlignBits) + +#define kMatchMinLen 2 +#define kMatchSpecLenStart (kMatchMinLen + kLenNumLowSymbols + kLenNumMidSymbols + kLenNumHighSymbols) + +#define IsMatch 0 +#define IsRep (IsMatch + (kNumStates << kNumPosBitsMax)) +#define IsRepG0 (IsRep + kNumStates) +#define IsRepG1 (IsRepG0 + kNumStates) +#define IsRepG2 (IsRepG1 + kNumStates) +#define IsRep0Long (IsRepG2 + kNumStates) +#define PosSlot (IsRep0Long + (kNumStates << kNumPosBitsMax)) +#define SpecPos (PosSlot + (kNumLenToPosStates << kNumPosSlotBits)) +#define Align (SpecPos + kNumFullDistances - kEndPosModelIndex) +#define LenCoder (Align + kAlignTableSize) +#define RepLenCoder (LenCoder + kNumLenProbs) +#define Literal (RepLenCoder + kNumLenProbs) + +#define LZMA_BASE_SIZE 1846 +#define LZMA_LIT_SIZE 768 + +#define LzmaProps_GetNumProbs(p) ((uint32_t)LZMA_BASE_SIZE + (LZMA_LIT_SIZE << ((p)->lc + (p)->lp))) + +#if Literal != LZMA_BASE_SIZE +StopCompilingDueBUG +#endif + + +/* First LZMA-symbol is always decoded. +And it decodes new LZMA-symbols while (buf < bufLimit), but "buf" is without last normalization +Out: + Result: + true - OK + false - Error + p->remainLen: + < kMatchSpecLenStart : normal remain + = kMatchSpecLenStart : finished + = kMatchSpecLenStart + 1 : Flush marker + = kMatchSpecLenStart + 2 : State Init Marker +*/ + +static bool LzmaDec_DecodeReal(CLzmaDec *p, uint32_t limit, const uint8_t *bufLimit) +{ + int *probs = p->probs; + + State state = p->state; + uint32_t rep0 = p->reps[0], rep1 = p->reps[1], rep2 = p->reps[2], rep3 = p->reps[3]; + unsigned pbMask = ((unsigned)1 << (p->pb)) - 1; + unsigned lpMask = ((unsigned)1 << (p->lp)) - 1; + const unsigned lc = p->lc; + + uint8_t *dic = p->dic; + const uint32_t dicBufSize = p->dicBufSize; + uint32_t dicPos = p->dicPos; + + uint32_t processedPos = p->processedPos; + uint32_t checkDicSize = p->checkDicSize; + unsigned len = 0; + + const uint8_t *buf = p->buf; + uint32_t range = p->range; + uint32_t code = p->code; + + do + { + int *prob; + uint32_t bound; + unsigned ttt; + unsigned posState = processedPos & pbMask; + + prob = probs + IsMatch + (state << kNumPosBitsMax) + posState; + IF_BIT_0(prob) + { + unsigned symbol; + UPDATE_0(prob); + prob = probs + Literal; + if (checkDicSize != 0 || processedPos != 0) + prob += (LZMA_LIT_SIZE * (((processedPos & lpMask) << lc) + + (dic[(dicPos == 0 ? dicBufSize : dicPos) - 1] >> (8 - lc)))); + + if (state < kNumLitStates) + { + state -= (state < 4) ? state : 3; + symbol = 1; + do { GET_BIT(prob + symbol, symbol) } while (symbol < 0x100); + } + else + { + unsigned matchByte = p->dic[(dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0)]; + unsigned offs = 0x100; + state -= (state < 10) ? 3 : 6; + symbol = 1; + do + { + unsigned bit; + int *probLit; + matchByte <<= 1; + bit = (matchByte & offs); + probLit = prob + offs + bit + symbol; + GET_BIT2(probLit, symbol, offs &= ~bit, offs &= bit) + } + while (symbol < 0x100); + } + dic[dicPos++] = (uint8_t)symbol; + processedPos++; + continue; + } + else + { + UPDATE_1(prob); + prob = probs + IsRep + state; + IF_BIT_0(prob) + { + UPDATE_0(prob); + state += kNumStates; + prob = probs + LenCoder; + } + else + { + UPDATE_1(prob); + if (checkDicSize == 0 && processedPos == 0) + return false; + prob = probs + IsRepG0 + state; + IF_BIT_0(prob) + { + UPDATE_0(prob); + prob = probs + IsRep0Long + (state << kNumPosBitsMax) + posState; + IF_BIT_0(prob) + { + UPDATE_0(prob); + dic[dicPos] = dic[(dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0)]; + dicPos++; + processedPos++; + state = state < kNumLitStates ? 9 : 11; + continue; + } + UPDATE_1(prob); + } + else + { + uint32_t distance; + UPDATE_1(prob); + prob = probs + IsRepG1 + state; + IF_BIT_0(prob) + { + UPDATE_0(prob); + distance = rep1; + } + else + { + UPDATE_1(prob); + prob = probs + IsRepG2 + state; + IF_BIT_0(prob) + { + UPDATE_0(prob); + distance = rep2; + } + else + { + UPDATE_1(prob); + distance = rep3; + rep3 = rep2; + } + rep2 = rep1; + } + rep1 = rep0; + rep0 = distance; + } + state = state < kNumLitStates ? 8 : 11; + prob = probs + RepLenCoder; + } + { + unsigned limit, offset; + int *probLen = prob + LenChoice; + IF_BIT_0(probLen) + { + UPDATE_0(probLen); + probLen = prob + LenLow + (posState << kLenNumLowBits); + offset = 0; + limit = (1 << kLenNumLowBits); + } + else + { + UPDATE_1(probLen); + probLen = prob + LenChoice2; + IF_BIT_0(probLen) + { + UPDATE_0(probLen); + probLen = prob + LenMid + (posState << kLenNumMidBits); + offset = kLenNumLowSymbols; + limit = (1 << kLenNumMidBits); + } + else + { + UPDATE_1(probLen); + probLen = prob + LenHigh; + offset = kLenNumLowSymbols + kLenNumMidSymbols; + limit = (1 << kLenNumHighBits); + } + } + TREE_DECODE(probLen, limit, len); + len += offset; + } + + if (state >= kNumStates) + { + uint32_t distance; + prob = probs + PosSlot + + ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << kNumPosSlotBits); + TREE_6_DECODE(prob, distance); + if (distance >= kStartPosModelIndex) + { + unsigned posSlot = (unsigned)distance; + int numDirectBits = (int)(((distance >> 1) - 1)); + distance = (2 | (distance & 1)); + if (posSlot < kEndPosModelIndex) + { + distance <<= numDirectBits; + prob = probs + SpecPos + distance - posSlot - 1; + { + uint32_t mask = 1; + unsigned i = 1; + do + { + GET_BIT2(prob + i, i, ; , distance |= mask); + mask <<= 1; + } + while (--numDirectBits != 0); + } + } + else + { + numDirectBits -= kNumAlignBits; + do + { + NORMALIZE + range >>= 1; + + { + uint32_t t; + code -= range; + t = (0 - ((uint32_t)code >> 31)); /* (uint32_t)((int)code >> 31) */ + distance = (distance << 1) + (t + 1); + code += range & t; + } + /* + distance <<= 1; + if (code >= range) + { + code -= range; + distance |= 1; + } + */ + } + while (--numDirectBits != 0); + prob = probs + Align; + distance <<= kNumAlignBits; + { + unsigned i = 1; + GET_BIT2(prob + i, i, ; , distance |= 1); + GET_BIT2(prob + i, i, ; , distance |= 2); + GET_BIT2(prob + i, i, ; , distance |= 4); + GET_BIT2(prob + i, i, ; , distance |= 8); + } + if (distance == (uint32_t)0xFFFFFFFF) + { + len += kMatchSpecLenStart; + state -= kNumStates; + break; + } + } + } + rep3 = rep2; + rep2 = rep1; + rep1 = rep0; + rep0 = distance + 1; + if (checkDicSize == 0) + { + if (distance >= processedPos) + return false; + } + else if (distance >= checkDicSize) + return false; + state = (state < kNumStates + kNumLitStates) ? kNumLitStates : kNumLitStates + 3; + } + + len += kMatchMinLen; + + if (limit == dicPos) + return false; + { + uint32_t rem = limit - dicPos; + unsigned curLen = ((rem < len) ? (unsigned)rem : len); + uint32_t pos = (dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0); + + processedPos += curLen; + + len -= curLen; + if (pos + curLen <= dicBufSize) + { + uint8_t *dest = dic + dicPos; + ptrdiff_t src = (ptrdiff_t)pos - (ptrdiff_t)dicPos; + const uint8_t *lim = dest + curLen; + dicPos += curLen; + do + *(dest) = (uint8_t)*(dest + src); + while (++dest != lim); + } + else + { + do + { + dic[dicPos++] = dic[pos]; + if (++pos == dicBufSize) + pos = 0; + } + while (--curLen != 0); + } + } + } + } + while (dicPos < limit && buf < bufLimit); + NORMALIZE; + p->buf = buf; + p->range = range; + p->code = code; + p->remainLen = len; + p->dicPos = dicPos; + p->processedPos = processedPos; + p->reps[0] = rep0; + p->reps[1] = rep1; + p->reps[2] = rep2; + p->reps[3] = rep3; + p->state = state; + + return true; +} + +static void LzmaDec_WriteRem(CLzmaDec *p, uint32_t limit) +{ + if (p->remainLen != 0 && p->remainLen < kMatchSpecLenStart) + { + uint8_t *dic = p->dic; + uint32_t dicPos = p->dicPos; + const uint32_t dicBufSize = p->dicBufSize; + unsigned len = p->remainLen; + uint32_t rep0 = p->reps[0]; + if (limit - dicPos < len) + len = (unsigned)(limit - dicPos); + + if (p->checkDicSize == 0 && dicBufSize - p->processedPos <= len) + p->checkDicSize = dicBufSize; + + p->processedPos += len; + p->remainLen -= len; + while (len-- != 0) + { + dic[dicPos] = dic[(dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0)]; + dicPos++; + } + p->dicPos = dicPos; + } +} + +static int LzmaDec_DecodeReal2(CLzmaDec *p, uint32_t limit, const uint8_t *bufLimit) +{ + const uint32_t dicBufSize = p->dicBufSize; + do + { + uint32_t limit2 = limit; + if (p->checkDicSize == 0) + { + uint32_t rem = dicBufSize - p->processedPos; + if (limit - p->dicPos > rem) + limit2 = p->dicPos + rem; + } + if( !LzmaDec_DecodeReal(p, limit2, bufLimit) ) return false; + if (p->processedPos >= dicBufSize) + p->checkDicSize = dicBufSize; + LzmaDec_WriteRem(p, limit); + } + while (p->dicPos < limit && p->buf < bufLimit && p->remainLen < kMatchSpecLenStart); + + if (p->remainLen > kMatchSpecLenStart) + { + p->remainLen = kMatchSpecLenStart; + } + return true; +} + +typedef enum +{ + DUMMY_ERROR, /* unexpected end of input stream */ + DUMMY_LIT, + DUMMY_MATCH, + DUMMY_REP +} ELzmaDummy; + +static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const uint8_t *buf, uint32_t inSize) +{ + uint32_t range = p->range; + uint32_t code = p->code; + const uint8_t *bufLimit = buf + inSize; + int *probs = p->probs; + State state = p->state; + ELzmaDummy res; + + { + int *prob; + uint32_t bound; + unsigned ttt; + unsigned posState = (p->processedPos) & ((1 << p->pb) - 1); + + prob = probs + IsMatch + (state << kNumPosBitsMax) + posState; + IF_BIT_0_CHECK(prob) + { + UPDATE_0_CHECK + + /* if (bufLimit - buf >= 7) return DUMMY_LIT; */ + + prob = probs + Literal; + if (p->checkDicSize != 0 || p->processedPos != 0) + prob += (LZMA_LIT_SIZE * + ((((p->processedPos) & ((1 << (p->lp)) - 1)) << p->lc) + + (p->dic[(p->dicPos == 0 ? p->dicBufSize : p->dicPos) - 1] >> (8 - p->lc)))); + + if (state < kNumLitStates) + { + unsigned symbol = 1; + do { GET_BIT_CHECK(prob + symbol, symbol) } while (symbol < 0x100); + } + else + { + unsigned matchByte = p->dic[p->dicPos - p->reps[0] + + ((p->dicPos < p->reps[0]) ? p->dicBufSize : 0)]; + unsigned offs = 0x100; + unsigned symbol = 1; + do + { + unsigned bit; + int *probLit; + matchByte <<= 1; + bit = (matchByte & offs); + probLit = prob + offs + bit + symbol; + GET_BIT2_CHECK(probLit, symbol, offs &= ~bit, offs &= bit) + } + while (symbol < 0x100); + } + res = DUMMY_LIT; + } + else + { + unsigned len; + UPDATE_1_CHECK; + + prob = probs + IsRep + state; + IF_BIT_0_CHECK(prob) + { + UPDATE_0_CHECK; + state = 0; + prob = probs + LenCoder; + res = DUMMY_MATCH; + } + else + { + UPDATE_1_CHECK; + res = DUMMY_REP; + prob = probs + IsRepG0 + state; + IF_BIT_0_CHECK(prob) + { + UPDATE_0_CHECK; + prob = probs + IsRep0Long + (state << kNumPosBitsMax) + posState; + IF_BIT_0_CHECK(prob) + { + UPDATE_0_CHECK; + NORMALIZE_CHECK; + return DUMMY_REP; + } + else + { + UPDATE_1_CHECK; + } + } + else + { + UPDATE_1_CHECK; + prob = probs + IsRepG1 + state; + IF_BIT_0_CHECK(prob) + { + UPDATE_0_CHECK; + } + else + { + UPDATE_1_CHECK; + prob = probs + IsRepG2 + state; + IF_BIT_0_CHECK(prob) + { + UPDATE_0_CHECK; + } + else + { + UPDATE_1_CHECK; + } + } + } + state = kNumStates; + prob = probs + RepLenCoder; + } + { + unsigned limit, offset; + int *probLen = prob + LenChoice; + IF_BIT_0_CHECK(probLen) + { + UPDATE_0_CHECK; + probLen = prob + LenLow + (posState << kLenNumLowBits); + offset = 0; + limit = 1 << kLenNumLowBits; + } + else + { + UPDATE_1_CHECK; + probLen = prob + LenChoice2; + IF_BIT_0_CHECK(probLen) + { + UPDATE_0_CHECK; + probLen = prob + LenMid + (posState << kLenNumMidBits); + offset = kLenNumLowSymbols; + limit = 1 << kLenNumMidBits; + } + else + { + UPDATE_1_CHECK; + probLen = prob + LenHigh; + offset = kLenNumLowSymbols + kLenNumMidSymbols; + limit = 1 << kLenNumHighBits; + } + } + TREE_DECODE_CHECK(probLen, limit, len); + len += offset; + } + + if (state < 4) + { + unsigned posSlot; + prob = probs + PosSlot + + ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << + kNumPosSlotBits); + TREE_DECODE_CHECK(prob, 1 << kNumPosSlotBits, posSlot); + if (posSlot >= kStartPosModelIndex) + { + int numDirectBits = ((posSlot >> 1) - 1); + + /* if (bufLimit - buf >= 8) return DUMMY_MATCH; */ + + if (posSlot < kEndPosModelIndex) + { + prob = probs + SpecPos + ((2 | (posSlot & 1)) << numDirectBits) - posSlot - 1; + } + else + { + numDirectBits -= kNumAlignBits; + do + { + NORMALIZE_CHECK + range >>= 1; + code -= range & (((code - range) >> 31) - 1); + /* if (code >= range) code -= range; */ + } + while (--numDirectBits != 0); + prob = probs + Align; + numDirectBits = kNumAlignBits; + } + { + unsigned i = 1; + do + { + GET_BIT_CHECK(prob + i, i); + } + while (--numDirectBits != 0); + } + } + } + } + } + NORMALIZE_CHECK; + return res; +} + + +static void LzmaDec_InitRc(CLzmaDec *p, const uint8_t *data) +{ + p->code = ((uint32_t)data[1] << 24) | ((uint32_t)data[2] << 16) | ((uint32_t)data[3] << 8) | ((uint32_t)data[4]); + p->range = 0xFFFFFFFF; + p->needFlush = false; +} + + +static bool LzmaDec_DecodeToDic(CLzmaDec *p, uint32_t dicLimit, + const uint8_t *src, uint32_t *srcLen, + ELzmaFinishMode finishMode, ELzmaStatus *status) +{ + uint32_t inSize = *srcLen; + (*srcLen) = 0; + LzmaDec_WriteRem(p, dicLimit); + + *status = LZMA_STATUS_NOT_SPECIFIED; + + while (p->remainLen != kMatchSpecLenStart) + { + int checkEndMarkNow; + + if( p->needFlush ) + { + for (; inSize > 0 && p->tempBufSize < RC_INIT_SIZE; (*srcLen)++, inSize--) + p->tempBuf[p->tempBufSize++] = *src++; + if (p->tempBufSize < RC_INIT_SIZE) + { + *status = LZMA_STATUS_NEEDS_MORE_INPUT; + return true; + } + if (p->tempBuf[0] != 0) + return false; + + LzmaDec_InitRc(p, p->tempBuf); + p->tempBufSize = 0; + } + + checkEndMarkNow = 0; + if (p->dicPos >= dicLimit) + { + if (p->remainLen == 0 && p->code == 0) + { + *status = LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK; + return true; + } + if (finishMode == LZMA_FINISH_ANY) + { + *status = LZMA_STATUS_NOT_FINISHED; + return true; + } + if (p->remainLen != 0) + { + *status = LZMA_STATUS_NOT_FINISHED; + return false; + } + checkEndMarkNow = 1; + } + + if (p->tempBufSize == 0) + { + uint32_t processed; + const uint8_t *bufLimit; + if (inSize < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow) + { + int dummyRes = LzmaDec_TryDummy(p, src, inSize); + if (dummyRes == DUMMY_ERROR) + { + memcpy(p->tempBuf, src, inSize); + p->tempBufSize = (unsigned)inSize; + (*srcLen) += inSize; + *status = LZMA_STATUS_NEEDS_MORE_INPUT; + return true; + } + if (checkEndMarkNow && dummyRes != DUMMY_MATCH) + { + *status = LZMA_STATUS_NOT_FINISHED; + return false; + } + bufLimit = src; + } + else + bufLimit = src + inSize - LZMA_REQUIRED_INPUT_MAX; + p->buf = src; + if( !LzmaDec_DecodeReal2(p, dicLimit, bufLimit) ) + return false; + processed = (uint32_t)(p->buf - src); + (*srcLen) += processed; + src += processed; + inSize -= processed; + } + else + { + unsigned rem = p->tempBufSize, lookAhead = 0; + while (rem < LZMA_REQUIRED_INPUT_MAX && lookAhead < inSize) + p->tempBuf[rem++] = src[lookAhead++]; + p->tempBufSize = rem; + if (rem < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow) + { + int dummyRes = LzmaDec_TryDummy(p, p->tempBuf, rem); + if (dummyRes == DUMMY_ERROR) + { + (*srcLen) += lookAhead; + *status = LZMA_STATUS_NEEDS_MORE_INPUT; + return true; + } + if (checkEndMarkNow && dummyRes != DUMMY_MATCH) + { + *status = LZMA_STATUS_NOT_FINISHED; + return false; + } + } + p->buf = p->tempBuf; + if( !LzmaDec_DecodeReal2(p, dicLimit, p->buf) ) + return false; + lookAhead -= (rem - (unsigned)(p->buf - p->tempBuf)); + (*srcLen) += lookAhead; + src += lookAhead; + inSize -= lookAhead; + p->tempBufSize = 0; + } + } + if (p->code == 0) + *status = LZMA_STATUS_FINISHED_WITH_MARK; + return (p->code == 0); +} + +bool LzmaDec_DecodeToBuf( CLzmaDec *p, uint8_t *dest, uint32_t *destLen, + const uint8_t *src, uint32_t *srcLen, + ELzmaFinishMode finishMode, ELzmaStatus *status ) +{ + uint32_t outSize = *destLen; + uint32_t inSize = *srcLen; + *srcLen = *destLen = 0; + for (;;) + { + uint32_t inSizeCur = inSize, outSizeCur, dicPos; + ELzmaFinishMode curFinishMode; + bool res; + if (p->dicPos == p->dicBufSize) + p->dicPos = 0; + dicPos = p->dicPos; + if (outSize > p->dicBufSize - dicPos) + { + outSizeCur = p->dicBufSize; + curFinishMode = LZMA_FINISH_ANY; + } + else + { + outSizeCur = dicPos + outSize; + curFinishMode = finishMode; + } + + res = LzmaDec_DecodeToDic(p, outSizeCur, src, &inSizeCur, curFinishMode, status); + src += inSizeCur; + inSize -= inSizeCur; + *srcLen += inSizeCur; + outSizeCur = p->dicPos - dicPos; + memcpy(dest, p->dic + dicPos, outSizeCur); + dest += outSizeCur; + outSize -= outSizeCur; + *destLen += outSizeCur; + if( !res ) + return false; + if (outSizeCur == 0 || outSize == 0) + return true; + } +} + + +void LzmaDec_Free(CLzmaDec *p) +{ + free( p->dic ); + free( p->probs ); +} + + +bool LzmaDec_Init(CLzmaDec *p, const uint8_t *raw_props) +{ + uint32_t i; + uint8_t d = raw_props[0]; + + p->lc = d % 9; + d /= 9; + p->pb = d / 5; + p->lp = d % 5; + + p->dicBufSize = raw_props[1] | ((uint32_t)raw_props[2] << 8) | + ((uint32_t)raw_props[3] << 16) | ((uint32_t)raw_props[4] << 24); + if (p->dicBufSize < min_dictionary_size) p->dicBufSize = min_dictionary_size; + + p->numProbs = LzmaProps_GetNumProbs(p); + p->probs = (int *)malloc(p->numProbs * sizeof(int)); + if( !p->probs ) return false; + p->dic = (uint8_t *)malloc(p->dicBufSize); + if (p->dic == 0) + { + free( p->probs ); + return false; + } + p->dicPos = 0; + p->needFlush = true; + p->remainLen = 0; + p->tempBufSize = 0; + p->processedPos = 0; + p->checkDicSize = 0; + for( i = 0; i < p->numProbs; ++i ) p->probs[i] = kBitModelTotal >> 1; + p->reps[0] = p->reps[1] = p->reps[2] = p->reps[3] = 1; + p->state = 0; + return true; +} diff --git a/LzmaDec.h b/LzmaDec.h index 2a7741f..59f7400 100644 --- a/LzmaDec.h +++ b/LzmaDec.h @@ -1,191 +1,90 @@ -/* LzmaDec.h -- LZMA Decoder -2009-02-07 : Igor Pavlov : Public domain */ - -#ifndef __LZMA_DEC_H -#define __LZMA_DEC_H - -#include "Types.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/* #define _LZMA_PROB32 */ -/* _LZMA_PROB32 can increase the speed on some CPUs, - but memory usage for CLzmaDec::probs will be doubled in that case */ - -#ifdef _LZMA_PROB32 -#define CLzmaProb UInt32 -#else -#define CLzmaProb UInt16 -#endif - - -/* ---------- LZMA Properties ---------- */ - -#define LZMA_PROPS_SIZE 5 - -typedef struct _CLzmaProps -{ - unsigned lc, lp, pb; - UInt32 dicSize; -} CLzmaProps; - -/* LzmaProps_Decode - decodes properties -Returns: - SZ_OK - SZ_ERROR_UNSUPPORTED - Unsupported properties -*/ - -SRes LzmaProps_Decode(CLzmaProps *p, const Byte *data, unsigned size); - - -/* ---------- LZMA Decoder state ---------- */ - -/* LZMA_REQUIRED_INPUT_MAX = number of required input bytes for worst case. - Num bits = log2((2^11 / 31) ^ 22) + 26 < 134 + 26 = 160; */ - -#define LZMA_REQUIRED_INPUT_MAX 20 - -typedef struct -{ - CLzmaProps prop; - CLzmaProb *probs; - Byte *dic; - const Byte *buf; - UInt32 range, code; - SizeT dicPos; - SizeT dicBufSize; - UInt32 processedPos; - UInt32 checkDicSize; - unsigned state; - UInt32 reps[4]; - unsigned remainLen; - int needFlush; - int needInitState; - UInt32 numProbs; - unsigned tempBufSize; - Byte tempBuf[LZMA_REQUIRED_INPUT_MAX]; -} CLzmaDec; - -#define LzmaDec_Construct(p) { (p)->dic = 0; (p)->probs = 0; } - -void LzmaDec_Init(CLzmaDec *p); - -/* There are two types of LZMA streams: - 0) Stream with end mark. That end mark adds about 6 bytes to compressed size. - 1) Stream without end mark. You must know exact uncompressed size to decompress such stream. */ - -typedef enum -{ - LZMA_FINISH_ANY, /* finish at any point */ - LZMA_FINISH_END /* block must be finished at the end */ -} ELzmaFinishMode; - -/* ELzmaFinishMode has meaning only if the decoding reaches output limit !!! - - You must use LZMA_FINISH_END, when you know that current output buffer - covers last bytes of block. In other cases you must use LZMA_FINISH_ANY. - - If LZMA decoder sees end marker before reaching output limit, it returns SZ_OK, - and output value of destLen will be less than output buffer size limit. - You can check status result also. - - You can use multiple checks to test data integrity after full decompression: - 1) Check Result and "status" variable. - 2) Check that output(destLen) = uncompressedSize, if you know real uncompressedSize. - 3) Check that output(srcLen) = compressedSize, if you know real compressedSize. - You must use correct finish mode in that case. */ - -typedef enum -{ - LZMA_STATUS_NOT_SPECIFIED, /* use main error code instead */ - LZMA_STATUS_FINISHED_WITH_MARK, /* stream was finished with end mark. */ - LZMA_STATUS_NOT_FINISHED, /* stream was not finished */ - LZMA_STATUS_NEEDS_MORE_INPUT, /* you must provide more input bytes */ - LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK /* there is probability that stream was finished without end mark */ -} ELzmaStatus; - -/* ELzmaStatus is used only as output value for function call */ - - -/* ---------- Interfaces ---------- */ - -/* There are 3 levels of interfaces: - 1) Dictionary Interface - 2) Buffer Interface - 3) One Call Interface - You can select any of these interfaces, but don't mix functions from different - groups for same object. */ - - -SRes LzmaDec_Allocate(CLzmaDec *state, const Byte *prop, unsigned propsSize, ISzAlloc *alloc); -void LzmaDec_Free(CLzmaDec *state, ISzAlloc *alloc); - -/* ---------- Dictionary Interface ---------- */ - -/* You can use it, if you want to eliminate the overhead for data copying from - dictionary to some other external buffer. - You must work with CLzmaDec variables directly in this interface. - - STEPS: - LzmaDec_Constr() - LzmaDec_Allocate() - for (each new stream) - { - LzmaDec_Init() - while (it needs more decompression) - { - LzmaDec_DecodeToDic() - use data from CLzmaDec::dic and update CLzmaDec::dicPos - } - } - LzmaDec_Free() -*/ - -/* LzmaDec_DecodeToDic - - The decoding to internal dictionary buffer (CLzmaDec::dic). - You must manually update CLzmaDec::dicPos, if it reaches CLzmaDec::dicBufSize !!! - -finishMode: - It has meaning only if the decoding reaches output limit (dicLimit). - LZMA_FINISH_ANY - Decode just dicLimit bytes. - LZMA_FINISH_END - Stream must be finished after dicLimit. - -Returns: - SZ_OK - status: - LZMA_STATUS_FINISHED_WITH_MARK - LZMA_STATUS_NOT_FINISHED - LZMA_STATUS_NEEDS_MORE_INPUT - LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK - SZ_ERROR_DATA - Data error -*/ - -SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, - const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status); - - -/* ---------- Buffer Interface ---------- */ - -/* It's zlib-like interface. - See LzmaDec_DecodeToDic description for information about STEPS and return results, - but you must use LzmaDec_DecodeToBuf instead of LzmaDec_DecodeToDic and you don't need - to work with CLzmaDec variables manually. - -finishMode: - It has meaning only if the decoding reaches output limit (*destLen). - LZMA_FINISH_ANY - Decode just destLen bytes. - LZMA_FINISH_END - Stream must be finished after (*destLen). -*/ - -SRes LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen, - const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status); - - -#ifdef __cplusplus -} -#endif - -#endif +/* LzmaDec.h -- LZMA Decoder +2009-02-07 : Igor Pavlov : Public domain */ + +/* ---------- LZMA Properties ---------- */ + +#define LZMA_PROPS_SIZE 5 + + +/* ---------- LZMA Decoder state ---------- */ + +/* LZMA_REQUIRED_INPUT_MAX = number of required input bytes for worst case. + Num bits = log2((2^11 / 31) ^ 22) + 26 < 134 + 26 = 160; */ + +#define LZMA_REQUIRED_INPUT_MAX 20 + +typedef struct +{ + int *probs; + uint8_t *dic; + const uint8_t *buf; + uint32_t range, code; + uint32_t dicPos; + uint32_t dicBufSize; + uint32_t processedPos; + uint32_t checkDicSize; + unsigned lc, lp, pb; + State state; + uint32_t reps[4]; + unsigned remainLen; + uint32_t numProbs; + unsigned tempBufSize; + bool needFlush; + uint8_t tempBuf[LZMA_REQUIRED_INPUT_MAX]; +} CLzmaDec; + + +/* There are two types of LZMA streams: + 0) Stream with end mark. That end mark adds about 6 bytes to compressed size. + 1) Stream without end mark. You must know exact uncompressed size to decompress such stream. */ + +typedef enum +{ + LZMA_FINISH_ANY, /* finish at any point */ + LZMA_FINISH_END /* block must be finished at the end */ +} ELzmaFinishMode; + +/* ELzmaFinishMode has meaning only if the decoding reaches output limit !!! + + You must use LZMA_FINISH_END, when you know that current output buffer + covers last bytes of block. In other cases you must use LZMA_FINISH_ANY. + + If LZMA decoder sees end marker before reaching output limit, it returns SZ_OK, + and output value of destLen will be less than output buffer size limit. + You can check status result also. + + You can use multiple checks to test data integrity after full decompression: + 1) Check Result and "status" variable. + 2) Check that output(destLen) = uncompressedSize, if you know real uncompressedSize. + 3) Check that output(srcLen) = compressedSize, if you know real compressedSize. + You must use correct finish mode in that case. */ + +typedef enum +{ + LZMA_STATUS_NOT_SPECIFIED, /* use main error code instead */ + LZMA_STATUS_FINISHED_WITH_MARK, /* stream was finished with end mark. */ + LZMA_STATUS_NOT_FINISHED, /* stream was not finished */ + LZMA_STATUS_NEEDS_MORE_INPUT, /* you must provide more input bytes */ + LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK /* there is probability that stream was finished without end mark */ +} ELzmaStatus; + +/* ELzmaStatus is used only as output value for function call */ + + +bool LzmaDec_Init(CLzmaDec *p, const uint8_t *raw_props); +void LzmaDec_Free(CLzmaDec *p); + + +/* ---------- Buffer Interface ---------- */ + +/* It's zlib-like interface. + +finishMode: + It has meaning only if the decoding reaches output limit (*destLen). + LZMA_FINISH_ANY - Decode just destLen bytes. + LZMA_FINISH_END - Stream must be finished after (*destLen). +*/ + +bool LzmaDec_DecodeToBuf( CLzmaDec *p, uint8_t *dest, uint32_t *destLen, + const uint8_t *src, uint32_t *srcLen, + ELzmaFinishMode finishMode, ELzmaStatus *status ); diff --git a/LzmaEnc.c b/LzmaEnc.c index fba2349..e5125d9 100644 --- a/LzmaEnc.c +++ b/LzmaEnc.c @@ -1,2074 +1,1706 @@ -/* LzmaEnc.c -- LZMA Encoder -2009-11-24 : Igor Pavlov : Public domain */ - -#define _FILE_OFFSET_BITS 64 - -#include -#include -#include - -/* #define SHOW_STAT */ -/* #define SHOW_STAT2 */ - -#include "pdlzip.h" -#include "LzmaEnc.h" -#include "LzFind.h" - -#ifdef SHOW_STAT -static int ttt = 0; -#endif - -#define kBlockSizeMax ((1 << LZMA_NUM_BLOCK_SIZE_BITS) - 1) - -#define kBlockSize (9 << 10) -#define kUnpackBlockSize (1 << 18) -#define kMatchArraySize (1 << 21) -#define kMatchRecordMaxSize ((LZMA_MATCH_LEN_MAX * 2 + 3) * LZMA_MATCH_LEN_MAX) - -#define kNumMaxDirectBits (31) - -#define kNumTopBits 24 -#define kTopValue ((UInt32)1 << kNumTopBits) - -#define kNumBitModelTotalBits 11 -#define kBitModelTotal (1 << kNumBitModelTotalBits) -#define kNumMoveBits 5 -#define kProbInitValue (kBitModelTotal >> 1) - -#define kNumMoveReducingBits 4 -#define kNumBitPriceShiftBits 4 -#define kBitPrice (1 << kNumBitPriceShiftBits) - -void LzmaEncProps_Init(CLzmaEncProps *p) -{ - p->level = 5; - p->dictSize = p->mc = 0; - p->lc = p->lp = p->pb = p->algo = p->fb = p->btMode = p->numHashBytes = p->numThreads = -1; -} - -void LzmaEncProps_Normalize(CLzmaEncProps *p) -{ - int level = p->level; - if (level < 0) level = 5; - p->level = level; - if (p->dictSize == 0) p->dictSize = (level <= 5 ? (1 << (level * 2 + 14)) : (level == 6 ? (1 << 25) : (1 << 26))); - if (p->lc < 0) p->lc = 3; - if (p->lp < 0) p->lp = 0; - if (p->pb < 0) p->pb = 2; - if (p->algo < 0) p->algo = (level < 5 ? 0 : 1); - if (p->fb < 0) p->fb = (level < 7 ? 32 : 64); - if (p->btMode < 0) p->btMode = (p->algo == 0 ? 0 : 1); - if (p->numHashBytes < 0) p->numHashBytes = 4; - if (p->mc == 0) p->mc = (16 + (p->fb >> 1)) >> (p->btMode ? 0 : 1); - if (p->numThreads < 0) - p->numThreads = 1; -} - -UInt32 LzmaEncProps_GetDictSize(const CLzmaEncProps *props2) -{ - CLzmaEncProps props = *props2; - LzmaEncProps_Normalize(&props); - return props.dictSize; -} - -/* #define LZMA_LOG_BSR */ -/* Define it for Intel's CPU */ - - -#ifdef LZMA_LOG_BSR - -#define kDicLogSizeMaxCompress 30 - -#define BSR2_RET(pos, res) { unsigned long i; _BitScanReverse(&i, (pos)); res = (i + i) + ((pos >> (i - 1)) & 1); } - -UInt32 GetPosSlot1(UInt32 pos) -{ - UInt32 res; - BSR2_RET(pos, res); - return res; -} -#define GetPosSlot2(pos, res) { BSR2_RET(pos, res); } -#define GetPosSlot(pos, res) { if (pos < 2) res = pos; else BSR2_RET(pos, res); } - -#else - -#define kNumLogBits (9 + (int)sizeof(size_t) / 2) -#define kDicLogSizeMaxCompress ((kNumLogBits - 1) * 2 + 7) - -void LzmaEnc_FastPosInit(Byte *g_FastPos) -{ - int c = 2, slotFast; - g_FastPos[0] = 0; - g_FastPos[1] = 1; - - for (slotFast = 2; slotFast < kNumLogBits * 2; slotFast++) - { - UInt32 k = (1 << ((slotFast >> 1) - 1)); - UInt32 j; - for (j = 0; j < k; j++, c++) - g_FastPos[c] = (Byte)slotFast; - } -} - -#define BSR2_RET(pos, res) { UInt32 i = 6 + ((kNumLogBits - 1) & \ - (0 - (((((UInt32)1 << (kNumLogBits + 6)) - 1) - pos) >> 31))); \ - res = p->g_FastPos[pos >> i] + (i * 2); } -/* -#define BSR2_RET(pos, res) { res = (pos < (1 << (kNumLogBits + 6))) ? \ - p->g_FastPos[pos >> 6] + 12 : \ - p->g_FastPos[pos >> (6 + kNumLogBits - 1)] + (6 + (kNumLogBits - 1)) * 2; } -*/ - -#define GetPosSlot1(pos) p->g_FastPos[pos] -#define GetPosSlot2(pos, res) { BSR2_RET(pos, res); } -#define GetPosSlot(pos, res) { if (pos < kNumFullDistances) res = p->g_FastPos[pos]; else BSR2_RET(pos, res); } - -#endif - - -#define LZMA_NUM_REPS 4 - -typedef unsigned CState; - -typedef struct -{ - UInt32 price; - - CState state; - int prev1IsChar; - int prev2; - - UInt32 posPrev2; - UInt32 backPrev2; - - UInt32 posPrev; - UInt32 backPrev; - UInt32 backs[LZMA_NUM_REPS]; -} COptimal; - -#define kNumOpts (1 << 12) - -#define kNumLenToPosStates 4 -#define kNumPosSlotBits 6 -#define kDicLogSizeMin 0 -#define kDicLogSizeMax 32 -#define kDistTableSizeMax (kDicLogSizeMax * 2) - - -#define kNumAlignBits 4 -#define kAlignTableSize (1 << kNumAlignBits) -#define kAlignMask (kAlignTableSize - 1) - -#define kStartPosModelIndex 4 -#define kEndPosModelIndex 14 -#define kNumPosModels (kEndPosModelIndex - kStartPosModelIndex) - -#define kNumFullDistances (1 << (kEndPosModelIndex >> 1)) - -#ifdef _LZMA_PROB32 -#define CLzmaProb UInt32 -#else -#define CLzmaProb UInt16 -#endif - -#define LZMA_PB_MAX 4 -#define LZMA_LC_MAX 8 -#define LZMA_LP_MAX 4 - -#define LZMA_NUM_PB_STATES_MAX (1 << LZMA_PB_MAX) - - -#define kLenNumLowBits 3 -#define kLenNumLowSymbols (1 << kLenNumLowBits) -#define kLenNumMidBits 3 -#define kLenNumMidSymbols (1 << kLenNumMidBits) -#define kLenNumHighBits 8 -#define kLenNumHighSymbols (1 << kLenNumHighBits) - -#define kLenNumSymbolsTotal (kLenNumLowSymbols + kLenNumMidSymbols + kLenNumHighSymbols) - -#define LZMA_MATCH_LEN_MIN 2 -#define LZMA_MATCH_LEN_MAX (LZMA_MATCH_LEN_MIN + kLenNumSymbolsTotal - 1) - -#define kNumStates 12 - -typedef struct -{ - CLzmaProb choice; - CLzmaProb choice2; - CLzmaProb low[LZMA_NUM_PB_STATES_MAX << kLenNumLowBits]; - CLzmaProb mid[LZMA_NUM_PB_STATES_MAX << kLenNumMidBits]; - CLzmaProb high[kLenNumHighSymbols]; -} CLenEnc; - -typedef struct -{ - CLenEnc p; - UInt32 prices[LZMA_NUM_PB_STATES_MAX][kLenNumSymbolsTotal]; - UInt32 tableSize; - UInt32 counters[LZMA_NUM_PB_STATES_MAX]; -} CLenPriceEnc; - -typedef struct -{ - UInt32 range; - Byte cache; - UInt64 low; - UInt64 cacheSize; - Byte *buf; - Byte *bufLim; - Byte *bufBase; - ISeqOutStream *outStream; - UInt64 processed; - SRes res; -} CRangeEnc; - -typedef struct -{ - CLzmaProb *litProbs; - - CLzmaProb isMatch[kNumStates][LZMA_NUM_PB_STATES_MAX]; - CLzmaProb isRep[kNumStates]; - CLzmaProb isRepG0[kNumStates]; - CLzmaProb isRepG1[kNumStates]; - CLzmaProb isRepG2[kNumStates]; - CLzmaProb isRep0Long[kNumStates][LZMA_NUM_PB_STATES_MAX]; - - CLzmaProb posSlotEncoder[kNumLenToPosStates][1 << kNumPosSlotBits]; - CLzmaProb posEncoders[kNumFullDistances - kEndPosModelIndex]; - CLzmaProb posAlignEncoder[1 << kNumAlignBits]; - - CLenPriceEnc lenEnc; - CLenPriceEnc repLenEnc; - - UInt32 reps[LZMA_NUM_REPS]; - UInt32 state; -} CSaveState; - -typedef struct -{ - IMatchFinder matchFinder; - void *matchFinderObj; - - CMatchFinder matchFinderBase; - - UInt32 optimumEndIndex; - UInt32 optimumCurrentIndex; - - UInt32 longestMatchLength; - UInt32 numPairs; - UInt32 numAvail; - COptimal opt[kNumOpts]; - - #ifndef LZMA_LOG_BSR - Byte g_FastPos[1 << kNumLogBits]; - #endif - - UInt32 ProbPrices[kBitModelTotal >> kNumMoveReducingBits]; - UInt32 matches[LZMA_MATCH_LEN_MAX * 2 + 2 + 1]; - UInt32 numFastBytes; - UInt32 additionalOffset; - UInt32 reps[LZMA_NUM_REPS]; - UInt32 state; - - UInt32 posSlotPrices[kNumLenToPosStates][kDistTableSizeMax]; - UInt32 distancesPrices[kNumLenToPosStates][kNumFullDistances]; - UInt32 alignPrices[kAlignTableSize]; - UInt32 alignPriceCount; - - UInt32 distTableSize; - - unsigned lc, lp, pb; - unsigned lpMask, pbMask; - - CLzmaProb *litProbs; - - CLzmaProb isMatch[kNumStates][LZMA_NUM_PB_STATES_MAX]; - CLzmaProb isRep[kNumStates]; - CLzmaProb isRepG0[kNumStates]; - CLzmaProb isRepG1[kNumStates]; - CLzmaProb isRepG2[kNumStates]; - CLzmaProb isRep0Long[kNumStates][LZMA_NUM_PB_STATES_MAX]; - - CLzmaProb posSlotEncoder[kNumLenToPosStates][1 << kNumPosSlotBits]; - CLzmaProb posEncoders[kNumFullDistances - kEndPosModelIndex]; - CLzmaProb posAlignEncoder[1 << kNumAlignBits]; - - CLenPriceEnc lenEnc; - CLenPriceEnc repLenEnc; - - unsigned lclp; - - Bool fastMode; - - CRangeEnc rc; - - UInt64 nowPos64; - UInt32 matchPriceCount; - Bool finished; - Bool multiThread; - - SRes result; - UInt32 dictSize; - UInt32 matchFinderCycles; - - int needInit; - - CSaveState saveState; -} CLzmaEnc; - -void LzmaEnc_SaveState(CLzmaEncHandle pp) -{ - CLzmaEnc *p = (CLzmaEnc *)pp; - CSaveState *dest = &p->saveState; - int i; - dest->lenEnc = p->lenEnc; - dest->repLenEnc = p->repLenEnc; - dest->state = p->state; - - for (i = 0; i < kNumStates; i++) - { - memcpy(dest->isMatch[i], p->isMatch[i], sizeof(p->isMatch[i])); - memcpy(dest->isRep0Long[i], p->isRep0Long[i], sizeof(p->isRep0Long[i])); - } - for (i = 0; i < kNumLenToPosStates; i++) - memcpy(dest->posSlotEncoder[i], p->posSlotEncoder[i], sizeof(p->posSlotEncoder[i])); - memcpy(dest->isRep, p->isRep, sizeof(p->isRep)); - memcpy(dest->isRepG0, p->isRepG0, sizeof(p->isRepG0)); - memcpy(dest->isRepG1, p->isRepG1, sizeof(p->isRepG1)); - memcpy(dest->isRepG2, p->isRepG2, sizeof(p->isRepG2)); - memcpy(dest->posEncoders, p->posEncoders, sizeof(p->posEncoders)); - memcpy(dest->posAlignEncoder, p->posAlignEncoder, sizeof(p->posAlignEncoder)); - memcpy(dest->reps, p->reps, sizeof(p->reps)); - memcpy(dest->litProbs, p->litProbs, (0x300 << p->lclp) * sizeof(CLzmaProb)); -} - -void LzmaEnc_RestoreState(CLzmaEncHandle pp) -{ - CLzmaEnc *dest = (CLzmaEnc *)pp; - const CSaveState *p = &dest->saveState; - int i; - dest->lenEnc = p->lenEnc; - dest->repLenEnc = p->repLenEnc; - dest->state = p->state; - - for (i = 0; i < kNumStates; i++) - { - memcpy(dest->isMatch[i], p->isMatch[i], sizeof(p->isMatch[i])); - memcpy(dest->isRep0Long[i], p->isRep0Long[i], sizeof(p->isRep0Long[i])); - } - for (i = 0; i < kNumLenToPosStates; i++) - memcpy(dest->posSlotEncoder[i], p->posSlotEncoder[i], sizeof(p->posSlotEncoder[i])); - memcpy(dest->isRep, p->isRep, sizeof(p->isRep)); - memcpy(dest->isRepG0, p->isRepG0, sizeof(p->isRepG0)); - memcpy(dest->isRepG1, p->isRepG1, sizeof(p->isRepG1)); - memcpy(dest->isRepG2, p->isRepG2, sizeof(p->isRepG2)); - memcpy(dest->posEncoders, p->posEncoders, sizeof(p->posEncoders)); - memcpy(dest->posAlignEncoder, p->posAlignEncoder, sizeof(p->posAlignEncoder)); - memcpy(dest->reps, p->reps, sizeof(p->reps)); - memcpy(dest->litProbs, p->litProbs, (0x300 << dest->lclp) * sizeof(CLzmaProb)); -} - -void LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2) - { - CLzmaEnc *p = (CLzmaEnc *)pp; - CLzmaEncProps props = *props2; - LzmaEncProps_Normalize(&props); - - if (props.lc > LZMA_LC_MAX || props.lp > LZMA_LP_MAX || props.pb > LZMA_PB_MAX || - props.dictSize > (1 << kDicLogSizeMaxCompress) || props.dictSize > (1 << 30)) - internal_error( "invalid argument to encoder library" ); - p->dictSize = props.dictSize; - p->matchFinderCycles = props.mc; - { - unsigned fb = props.fb; - if (fb < 5) - fb = 5; - if (fb > LZMA_MATCH_LEN_MAX) - fb = LZMA_MATCH_LEN_MAX; - p->numFastBytes = fb; - } - p->lc = props.lc; - p->lp = props.lp; - p->pb = props.pb; - p->fastMode = (props.algo == 0); - p->matchFinderBase.btMode = props.btMode; - { - UInt32 numHashBytes = 4; - if (props.btMode) - { - if (props.numHashBytes < 2) - numHashBytes = 2; - else if (props.numHashBytes < 4) - numHashBytes = props.numHashBytes; - } - p->matchFinderBase.numHashBytes = numHashBytes; - } - p->matchFinderBase.cutValue = props.mc; - } - -static const int kLiteralNextStates[kNumStates] = {0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5}; -static const int kMatchNextStates[kNumStates] = {7, 7, 7, 7, 7, 7, 7, 10, 10, 10, 10, 10}; -static const int kRepNextStates[kNumStates] = {8, 8, 8, 8, 8, 8, 8, 11, 11, 11, 11, 11}; -static const int kShortRepNextStates[kNumStates]= {9, 9, 9, 9, 9, 9, 9, 11, 11, 11, 11, 11}; - -#define IsCharState(s) ((s) < 7) - -#define GetLenToPosState(len) (((len) < kNumLenToPosStates + 1) ? (len) - 2 : kNumLenToPosStates - 1) - -#define kInfinityPrice (1 << 30) - -static void RangeEnc_Construct(CRangeEnc *p) -{ - p->outStream = 0; - p->bufBase = 0; -} - -#define RangeEnc_GetProcessed(p) ((p)->processed + ((p)->buf - (p)->bufBase) + (p)->cacheSize) - -#define RC_BUF_SIZE (1 << 16) -static int RangeEnc_Alloc(CRangeEnc *p, ISzAlloc *alloc) -{ - if (p->bufBase == 0) - { - p->bufBase = (Byte *)alloc->Alloc(alloc, RC_BUF_SIZE); - if (p->bufBase == 0) - return 0; - p->bufLim = p->bufBase + RC_BUF_SIZE; - } - return 1; -} - -static void RangeEnc_Free(CRangeEnc *p, ISzAlloc *alloc) -{ - alloc->Free(alloc, p->bufBase); - p->bufBase = 0; -} - -static void RangeEnc_Init(CRangeEnc *p) -{ - /* Stream.Init(); */ - p->low = 0; - p->range = 0xFFFFFFFF; - p->cacheSize = 1; - p->cache = 0; - - p->buf = p->bufBase; - - p->processed = 0; - p->res = SZ_OK; -} - -static void RangeEnc_FlushStream(CRangeEnc *p) -{ - size_t num; - if (p->res != SZ_OK) - return; - num = p->buf - p->bufBase; - if (num != p->outStream->Write(p->outStream, p->bufBase, num)) - p->res = SZ_ERROR_WRITE; - p->processed += num; - p->buf = p->bufBase; -} - -static void MY_FAST_CALL RangeEnc_ShiftLow(CRangeEnc *p) -{ - if ((UInt32)p->low < (UInt32)0xFF000000 || (int)(p->low >> 32) != 0) - { - Byte temp = p->cache; - do - { - Byte *buf = p->buf; - *buf++ = (Byte)(temp + (Byte)(p->low >> 32)); - p->buf = buf; - if (buf == p->bufLim) - RangeEnc_FlushStream(p); - temp = 0xFF; - } - while (--p->cacheSize != 0); - p->cache = (Byte)((UInt32)p->low >> 24); - } - p->cacheSize++; - p->low = (UInt32)p->low << 8; -} - -static void RangeEnc_FlushData(CRangeEnc *p) -{ - int i; - for (i = 0; i < 5; i++) - RangeEnc_ShiftLow(p); -} - -static void RangeEnc_EncodeDirectBits(CRangeEnc *p, UInt32 value, int numBits) -{ - do - { - p->range >>= 1; - p->low += p->range & (0 - ((value >> --numBits) & 1)); - if (p->range < kTopValue) - { - p->range <<= 8; - RangeEnc_ShiftLow(p); - } - } - while (numBits != 0); -} - -static void RangeEnc_EncodeBit(CRangeEnc *p, CLzmaProb *prob, UInt32 symbol) -{ - UInt32 ttt = *prob; - UInt32 newBound = (p->range >> kNumBitModelTotalBits) * ttt; - if (symbol == 0) - { - p->range = newBound; - ttt += (kBitModelTotal - ttt) >> kNumMoveBits; - } - else - { - p->low += newBound; - p->range -= newBound; - ttt -= ttt >> kNumMoveBits; - } - *prob = (CLzmaProb)ttt; - if (p->range < kTopValue) - { - p->range <<= 8; - RangeEnc_ShiftLow(p); - } -} - -static void LitEnc_Encode(CRangeEnc *p, CLzmaProb *probs, UInt32 symbol) -{ - symbol |= 0x100; - do - { - RangeEnc_EncodeBit(p, probs + (symbol >> 8), (symbol >> 7) & 1); - symbol <<= 1; - } - while (symbol < 0x10000); -} - -static void LitEnc_EncodeMatched(CRangeEnc *p, CLzmaProb *probs, UInt32 symbol, UInt32 matchByte) -{ - UInt32 offs = 0x100; - symbol |= 0x100; - do - { - matchByte <<= 1; - RangeEnc_EncodeBit(p, probs + (offs + (matchByte & offs) + (symbol >> 8)), (symbol >> 7) & 1); - symbol <<= 1; - offs &= ~(matchByte ^ symbol); - } - while (symbol < 0x10000); -} - -void LzmaEnc_InitPriceTables(UInt32 *ProbPrices) -{ - UInt32 i; - for (i = (1 << kNumMoveReducingBits) / 2; i < kBitModelTotal; i += (1 << kNumMoveReducingBits)) - { - const int kCyclesBits = kNumBitPriceShiftBits; - UInt32 w = i; - UInt32 bitCount = 0; - int j; - for (j = 0; j < kCyclesBits; j++) - { - w = w * w; - bitCount <<= 1; - while (w >= ((UInt32)1 << 16)) - { - w >>= 1; - bitCount++; - } - } - ProbPrices[i >> kNumMoveReducingBits] = ((kNumBitModelTotalBits << kCyclesBits) - 15 - bitCount); - } -} - - -#define GET_PRICE(prob, symbol) \ - p->ProbPrices[((prob) ^ (((-(int)(symbol))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits]; - -#define GET_PRICEa(prob, symbol) \ - ProbPrices[((prob) ^ ((-((int)(symbol))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits]; - -#define GET_PRICE_0(prob) p->ProbPrices[(prob) >> kNumMoveReducingBits] -#define GET_PRICE_1(prob) p->ProbPrices[((prob) ^ (kBitModelTotal - 1)) >> kNumMoveReducingBits] - -#define GET_PRICE_0a(prob) ProbPrices[(prob) >> kNumMoveReducingBits] -#define GET_PRICE_1a(prob) ProbPrices[((prob) ^ (kBitModelTotal - 1)) >> kNumMoveReducingBits] - -static UInt32 LitEnc_GetPrice(const CLzmaProb *probs, UInt32 symbol, UInt32 *ProbPrices) -{ - UInt32 price = 0; - symbol |= 0x100; - do - { - price += GET_PRICEa(probs[symbol >> 8], (symbol >> 7) & 1); - symbol <<= 1; - } - while (symbol < 0x10000); - return price; -} - -static UInt32 LitEnc_GetPriceMatched(const CLzmaProb *probs, UInt32 symbol, UInt32 matchByte, UInt32 *ProbPrices) -{ - UInt32 price = 0; - UInt32 offs = 0x100; - symbol |= 0x100; - do - { - matchByte <<= 1; - price += GET_PRICEa(probs[offs + (matchByte & offs) + (symbol >> 8)], (symbol >> 7) & 1); - symbol <<= 1; - offs &= ~(matchByte ^ symbol); - } - while (symbol < 0x10000); - return price; -} - - -static void RcTree_Encode(CRangeEnc *rc, CLzmaProb *probs, int numBitLevels, UInt32 symbol) -{ - UInt32 m = 1; - int i; - for (i = numBitLevels; i != 0;) - { - UInt32 bit; - i--; - bit = (symbol >> i) & 1; - RangeEnc_EncodeBit(rc, probs + m, bit); - m = (m << 1) | bit; - } -} - -static void RcTree_ReverseEncode(CRangeEnc *rc, CLzmaProb *probs, int numBitLevels, UInt32 symbol) -{ - UInt32 m = 1; - int i; - for (i = 0; i < numBitLevels; i++) - { - UInt32 bit = symbol & 1; - RangeEnc_EncodeBit(rc, probs + m, bit); - m = (m << 1) | bit; - symbol >>= 1; - } -} - -static UInt32 RcTree_GetPrice(const CLzmaProb *probs, int numBitLevels, UInt32 symbol, UInt32 *ProbPrices) -{ - UInt32 price = 0; - symbol |= (1 << numBitLevels); - while (symbol != 1) - { - price += GET_PRICEa(probs[symbol >> 1], symbol & 1); - symbol >>= 1; - } - return price; -} - -static UInt32 RcTree_ReverseGetPrice(const CLzmaProb *probs, int numBitLevels, UInt32 symbol, UInt32 *ProbPrices) -{ - UInt32 price = 0; - UInt32 m = 1; - int i; - for (i = numBitLevels; i != 0; i--) - { - UInt32 bit = symbol & 1; - symbol >>= 1; - price += GET_PRICEa(probs[m], bit); - m = (m << 1) | bit; - } - return price; -} - - -static void LenEnc_Init(CLenEnc *p) -{ - unsigned i; - p->choice = p->choice2 = kProbInitValue; - for (i = 0; i < (LZMA_NUM_PB_STATES_MAX << kLenNumLowBits); i++) - p->low[i] = kProbInitValue; - for (i = 0; i < (LZMA_NUM_PB_STATES_MAX << kLenNumMidBits); i++) - p->mid[i] = kProbInitValue; - for (i = 0; i < kLenNumHighSymbols; i++) - p->high[i] = kProbInitValue; -} - -static void LenEnc_Encode(CLenEnc *p, CRangeEnc *rc, UInt32 symbol, UInt32 posState) -{ - if (symbol < kLenNumLowSymbols) - { - RangeEnc_EncodeBit(rc, &p->choice, 0); - RcTree_Encode(rc, p->low + (posState << kLenNumLowBits), kLenNumLowBits, symbol); - } - else - { - RangeEnc_EncodeBit(rc, &p->choice, 1); - if (symbol < kLenNumLowSymbols + kLenNumMidSymbols) - { - RangeEnc_EncodeBit(rc, &p->choice2, 0); - RcTree_Encode(rc, p->mid + (posState << kLenNumMidBits), kLenNumMidBits, symbol - kLenNumLowSymbols); - } - else - { - RangeEnc_EncodeBit(rc, &p->choice2, 1); - RcTree_Encode(rc, p->high, kLenNumHighBits, symbol - kLenNumLowSymbols - kLenNumMidSymbols); - } - } -} - -static void LenEnc_SetPrices(CLenEnc *p, UInt32 posState, UInt32 numSymbols, UInt32 *prices, UInt32 *ProbPrices) -{ - UInt32 a0 = GET_PRICE_0a(p->choice); - UInt32 a1 = GET_PRICE_1a(p->choice); - UInt32 b0 = a1 + GET_PRICE_0a(p->choice2); - UInt32 b1 = a1 + GET_PRICE_1a(p->choice2); - UInt32 i = 0; - for (i = 0; i < kLenNumLowSymbols; i++) - { - if (i >= numSymbols) - return; - prices[i] = a0 + RcTree_GetPrice(p->low + (posState << kLenNumLowBits), kLenNumLowBits, i, ProbPrices); - } - for (; i < kLenNumLowSymbols + kLenNumMidSymbols; i++) - { - if (i >= numSymbols) - return; - prices[i] = b0 + RcTree_GetPrice(p->mid + (posState << kLenNumMidBits), kLenNumMidBits, i - kLenNumLowSymbols, ProbPrices); - } - for (; i < numSymbols; i++) - prices[i] = b1 + RcTree_GetPrice(p->high, kLenNumHighBits, i - kLenNumLowSymbols - kLenNumMidSymbols, ProbPrices); -} - -static void MY_FAST_CALL LenPriceEnc_UpdateTable(CLenPriceEnc *p, UInt32 posState, UInt32 *ProbPrices) -{ - LenEnc_SetPrices(&p->p, posState, p->tableSize, p->prices[posState], ProbPrices); - p->counters[posState] = p->tableSize; -} - -static void LenPriceEnc_UpdateTables(CLenPriceEnc *p, UInt32 numPosStates, UInt32 *ProbPrices) -{ - UInt32 posState; - for (posState = 0; posState < numPosStates; posState++) - LenPriceEnc_UpdateTable(p, posState, ProbPrices); -} - -static void LenEnc_Encode2(CLenPriceEnc *p, CRangeEnc *rc, UInt32 symbol, UInt32 posState, Bool updatePrice, UInt32 *ProbPrices) -{ - LenEnc_Encode(&p->p, rc, symbol, posState); - if (updatePrice) - if (--p->counters[posState] == 0) - LenPriceEnc_UpdateTable(p, posState, ProbPrices); -} - - - - -static void MovePos(CLzmaEnc *p, UInt32 num) -{ - #ifdef SHOW_STAT - ttt += num; - printf("\n MovePos %d", num); - #endif - if (num != 0) - { - p->additionalOffset += num; - p->matchFinder.Skip(p->matchFinderObj, num); - } -} - -static UInt32 ReadMatchDistances(CLzmaEnc *p, UInt32 *numDistancePairsRes) -{ - UInt32 lenRes = 0, numPairs; - p->numAvail = p->matchFinder.GetNumAvailableBytes(p->matchFinderObj); - numPairs = p->matchFinder.GetMatches(p->matchFinderObj, p->matches); - #ifdef SHOW_STAT - printf("\n i = %d numPairs = %d ", ttt, numPairs / 2); - ttt++; - { - UInt32 i; - for (i = 0; i < numPairs; i += 2) - printf("%2d %6d | ", p->matches[i], p->matches[i + 1]); - } - #endif - if (numPairs > 0) - { - lenRes = p->matches[numPairs - 2]; - if (lenRes == p->numFastBytes) - { - const Byte *pby = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; - UInt32 distance = p->matches[numPairs - 1] + 1; - UInt32 numAvail = p->numAvail; - if (numAvail > LZMA_MATCH_LEN_MAX) - numAvail = LZMA_MATCH_LEN_MAX; - { - const Byte *pby2 = pby - distance; - for (; lenRes < numAvail && pby[lenRes] == pby2[lenRes]; lenRes++) ; - } - } - } - p->additionalOffset++; - *numDistancePairsRes = numPairs; - return lenRes; -} - - -#define MakeAsChar(p) (p)->backPrev = (UInt32)(-1); (p)->prev1IsChar = False; -#define MakeAsShortRep(p) (p)->backPrev = 0; (p)->prev1IsChar = False; -#define IsShortRep(p) ((p)->backPrev == 0) - -static UInt32 GetRepLen1Price(CLzmaEnc *p, UInt32 state, UInt32 posState) -{ - return - GET_PRICE_0(p->isRepG0[state]) + - GET_PRICE_0(p->isRep0Long[state][posState]); -} - -static UInt32 GetPureRepPrice(CLzmaEnc *p, UInt32 repIndex, UInt32 state, UInt32 posState) -{ - UInt32 price; - if (repIndex == 0) - { - price = GET_PRICE_0(p->isRepG0[state]); - price += GET_PRICE_1(p->isRep0Long[state][posState]); - } - else - { - price = GET_PRICE_1(p->isRepG0[state]); - if (repIndex == 1) - price += GET_PRICE_0(p->isRepG1[state]); - else - { - price += GET_PRICE_1(p->isRepG1[state]); - price += GET_PRICE(p->isRepG2[state], repIndex - 2); - } - } - return price; -} - -static UInt32 GetRepPrice(CLzmaEnc *p, UInt32 repIndex, UInt32 len, UInt32 state, UInt32 posState) -{ - return p->repLenEnc.prices[posState][len - LZMA_MATCH_LEN_MIN] + - GetPureRepPrice(p, repIndex, state, posState); -} - -static UInt32 Backward(CLzmaEnc *p, UInt32 *backRes, UInt32 cur) -{ - UInt32 posMem = p->opt[cur].posPrev; - UInt32 backMem = p->opt[cur].backPrev; - p->optimumEndIndex = cur; - do - { - if (p->opt[cur].prev1IsChar) - { - MakeAsChar(&p->opt[posMem]) - p->opt[posMem].posPrev = posMem - 1; - if (p->opt[cur].prev2) - { - p->opt[posMem - 1].prev1IsChar = False; - p->opt[posMem - 1].posPrev = p->opt[cur].posPrev2; - p->opt[posMem - 1].backPrev = p->opt[cur].backPrev2; - } - } - { - UInt32 posPrev = posMem; - UInt32 backCur = backMem; - - backMem = p->opt[posPrev].backPrev; - posMem = p->opt[posPrev].posPrev; - - p->opt[posPrev].backPrev = backCur; - p->opt[posPrev].posPrev = cur; - cur = posPrev; - } - } - while (cur != 0); - *backRes = p->opt[0].backPrev; - p->optimumCurrentIndex = p->opt[0].posPrev; - return p->optimumCurrentIndex; -} - -#define LIT_PROBS(pos, prevByte) (p->litProbs + ((((pos) & p->lpMask) << p->lc) + ((prevByte) >> (8 - p->lc))) * 0x300) - -static UInt32 GetOptimum(CLzmaEnc *p, UInt32 position, UInt32 *backRes) -{ - UInt32 numAvail, mainLen, numPairs, repMaxIndex, i, posState, lenEnd, len, cur; - UInt32 matchPrice, repMatchPrice, normalMatchPrice; - UInt32 reps[LZMA_NUM_REPS], repLens[LZMA_NUM_REPS]; - UInt32 *matches; - const Byte *data; - Byte curByte, matchByte; - if (p->optimumEndIndex != p->optimumCurrentIndex) - { - const COptimal *opt = &p->opt[p->optimumCurrentIndex]; - UInt32 lenRes = opt->posPrev - p->optimumCurrentIndex; - *backRes = opt->backPrev; - p->optimumCurrentIndex = opt->posPrev; - return lenRes; - } - p->optimumCurrentIndex = p->optimumEndIndex = 0; - - if (p->additionalOffset == 0) - mainLen = ReadMatchDistances(p, &numPairs); - else - { - mainLen = p->longestMatchLength; - numPairs = p->numPairs; - } - - numAvail = p->numAvail; - if (numAvail < 2) - { - *backRes = (UInt32)(-1); - return 1; - } - if (numAvail > LZMA_MATCH_LEN_MAX) - numAvail = LZMA_MATCH_LEN_MAX; - - data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; - repMaxIndex = 0; - for (i = 0; i < LZMA_NUM_REPS; i++) - { - UInt32 lenTest; - const Byte *data2; - reps[i] = p->reps[i]; - data2 = data - (reps[i] + 1); - if (data[0] != data2[0] || data[1] != data2[1]) - { - repLens[i] = 0; - continue; - } - for (lenTest = 2; lenTest < numAvail && data[lenTest] == data2[lenTest]; lenTest++) ; - repLens[i] = lenTest; - if (lenTest > repLens[repMaxIndex]) - repMaxIndex = i; - } - if (repLens[repMaxIndex] >= p->numFastBytes) - { - UInt32 lenRes; - *backRes = repMaxIndex; - lenRes = repLens[repMaxIndex]; - MovePos(p, lenRes - 1); - return lenRes; - } - - matches = p->matches; - if (mainLen >= p->numFastBytes) - { - *backRes = matches[numPairs - 1] + LZMA_NUM_REPS; - MovePos(p, mainLen - 1); - return mainLen; - } - curByte = *data; - matchByte = *(data - (reps[0] + 1)); - - if (mainLen < 2 && curByte != matchByte && repLens[repMaxIndex] < 2) - { - *backRes = (UInt32)-1; - return 1; - } - - p->opt[0].state = (CState)p->state; - - posState = (position & p->pbMask); - - { - const CLzmaProb *probs = LIT_PROBS(position, *(data - 1)); - p->opt[1].price = GET_PRICE_0(p->isMatch[p->state][posState]) + - (!IsCharState(p->state) ? - LitEnc_GetPriceMatched(probs, curByte, matchByte, p->ProbPrices) : - LitEnc_GetPrice(probs, curByte, p->ProbPrices)); - } - - MakeAsChar(&p->opt[1]); - - matchPrice = GET_PRICE_1(p->isMatch[p->state][posState]); - repMatchPrice = matchPrice + GET_PRICE_1(p->isRep[p->state]); - - if (matchByte == curByte) - { - UInt32 shortRepPrice = repMatchPrice + GetRepLen1Price(p, p->state, posState); - if (shortRepPrice < p->opt[1].price) - { - p->opt[1].price = shortRepPrice; - MakeAsShortRep(&p->opt[1]); - } - } - lenEnd = ((mainLen >= repLens[repMaxIndex]) ? mainLen : repLens[repMaxIndex]); - - if (lenEnd < 2) - { - *backRes = p->opt[1].backPrev; - return 1; - } - - p->opt[1].posPrev = 0; - for (i = 0; i < LZMA_NUM_REPS; i++) - p->opt[0].backs[i] = reps[i]; - - len = lenEnd; - do - p->opt[len--].price = kInfinityPrice; - while (len >= 2); - - for (i = 0; i < LZMA_NUM_REPS; i++) - { - UInt32 repLen = repLens[i]; - UInt32 price; - if (repLen < 2) - continue; - price = repMatchPrice + GetPureRepPrice(p, i, p->state, posState); - do - { - UInt32 curAndLenPrice = price + p->repLenEnc.prices[posState][repLen - 2]; - COptimal *opt = &p->opt[repLen]; - if (curAndLenPrice < opt->price) - { - opt->price = curAndLenPrice; - opt->posPrev = 0; - opt->backPrev = i; - opt->prev1IsChar = False; - } - } - while (--repLen >= 2); - } - - normalMatchPrice = matchPrice + GET_PRICE_0(p->isRep[p->state]); - - len = ((repLens[0] >= 2) ? repLens[0] + 1 : 2); - if (len <= mainLen) - { - UInt32 offs = 0; - while (len > matches[offs]) - offs += 2; - for (; ; len++) - { - COptimal *opt; - UInt32 distance = matches[offs + 1]; - - UInt32 curAndLenPrice = normalMatchPrice + p->lenEnc.prices[posState][len - LZMA_MATCH_LEN_MIN]; - UInt32 lenToPosState = GetLenToPosState(len); - if (distance < kNumFullDistances) - curAndLenPrice += p->distancesPrices[lenToPosState][distance]; - else - { - UInt32 slot; - GetPosSlot2(distance, slot); - curAndLenPrice += p->alignPrices[distance & kAlignMask] + p->posSlotPrices[lenToPosState][slot]; - } - opt = &p->opt[len]; - if (curAndLenPrice < opt->price) - { - opt->price = curAndLenPrice; - opt->posPrev = 0; - opt->backPrev = distance + LZMA_NUM_REPS; - opt->prev1IsChar = False; - } - if (len == matches[offs]) - { - offs += 2; - if (offs == numPairs) - break; - } - } - } - - cur = 0; - - #ifdef SHOW_STAT2 - if (position >= 0) - { - unsigned i; - printf("\n pos = %4X", position); - for (i = cur; i <= lenEnd; i++) - printf("\nprice[%4X] = %d", position - cur + i, p->opt[i].price); - } - #endif - - for (;;) - { - UInt32 numAvailFull, newLen, numPairs, posPrev, state, posState, startLen; - UInt32 curPrice, curAnd1Price, matchPrice, repMatchPrice; - Bool nextIsChar; - Byte curByte, matchByte; - const Byte *data; - COptimal *curOpt; - COptimal *nextOpt; - - cur++; - if (cur == lenEnd) - return Backward(p, backRes, cur); - - newLen = ReadMatchDistances(p, &numPairs); - if (newLen >= p->numFastBytes) - { - p->numPairs = numPairs; - p->longestMatchLength = newLen; - return Backward(p, backRes, cur); - } - position++; - curOpt = &p->opt[cur]; - posPrev = curOpt->posPrev; - if (curOpt->prev1IsChar) - { - posPrev--; - if (curOpt->prev2) - { - state = p->opt[curOpt->posPrev2].state; - if (curOpt->backPrev2 < LZMA_NUM_REPS) - state = kRepNextStates[state]; - else - state = kMatchNextStates[state]; - } - else - state = p->opt[posPrev].state; - state = kLiteralNextStates[state]; - } - else - state = p->opt[posPrev].state; - if (posPrev == cur - 1) - { - if (IsShortRep(curOpt)) - state = kShortRepNextStates[state]; - else - state = kLiteralNextStates[state]; - } - else - { - UInt32 pos; - const COptimal *prevOpt; - if (curOpt->prev1IsChar && curOpt->prev2) - { - posPrev = curOpt->posPrev2; - pos = curOpt->backPrev2; - state = kRepNextStates[state]; - } - else - { - pos = curOpt->backPrev; - if (pos < LZMA_NUM_REPS) - state = kRepNextStates[state]; - else - state = kMatchNextStates[state]; - } - prevOpt = &p->opt[posPrev]; - if (pos < LZMA_NUM_REPS) - { - UInt32 i; - reps[0] = prevOpt->backs[pos]; - for (i = 1; i <= pos; i++) - reps[i] = prevOpt->backs[i - 1]; - for (; i < LZMA_NUM_REPS; i++) - reps[i] = prevOpt->backs[i]; - } - else - { - UInt32 i; - reps[0] = (pos - LZMA_NUM_REPS); - for (i = 1; i < LZMA_NUM_REPS; i++) - reps[i] = prevOpt->backs[i - 1]; - } - } - curOpt->state = (CState)state; - - curOpt->backs[0] = reps[0]; - curOpt->backs[1] = reps[1]; - curOpt->backs[2] = reps[2]; - curOpt->backs[3] = reps[3]; - - curPrice = curOpt->price; - nextIsChar = False; - data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; - curByte = *data; - matchByte = *(data - (reps[0] + 1)); - - posState = (position & p->pbMask); - - curAnd1Price = curPrice + GET_PRICE_0(p->isMatch[state][posState]); - { - const CLzmaProb *probs = LIT_PROBS(position, *(data - 1)); - curAnd1Price += - (!IsCharState(state) ? - LitEnc_GetPriceMatched(probs, curByte, matchByte, p->ProbPrices) : - LitEnc_GetPrice(probs, curByte, p->ProbPrices)); - } - - nextOpt = &p->opt[cur + 1]; - - if (curAnd1Price < nextOpt->price) - { - nextOpt->price = curAnd1Price; - nextOpt->posPrev = cur; - MakeAsChar(nextOpt); - nextIsChar = True; - } - - matchPrice = curPrice + GET_PRICE_1(p->isMatch[state][posState]); - repMatchPrice = matchPrice + GET_PRICE_1(p->isRep[state]); - - if (matchByte == curByte && !(nextOpt->posPrev < cur && nextOpt->backPrev == 0)) - { - UInt32 shortRepPrice = repMatchPrice + GetRepLen1Price(p, state, posState); - if (shortRepPrice <= nextOpt->price) - { - nextOpt->price = shortRepPrice; - nextOpt->posPrev = cur; - MakeAsShortRep(nextOpt); - nextIsChar = True; - } - } - numAvailFull = p->numAvail; - { - UInt32 temp = kNumOpts - 1 - cur; - if (temp < numAvailFull) - numAvailFull = temp; - } - - if (numAvailFull < 2) - continue; - numAvail = (numAvailFull <= p->numFastBytes ? numAvailFull : p->numFastBytes); - - if (!nextIsChar && matchByte != curByte) /* speed optimization */ - { - /* try Literal + rep0 */ - UInt32 temp; - UInt32 lenTest2; - const Byte *data2 = data - (reps[0] + 1); - UInt32 limit = p->numFastBytes + 1; - if (limit > numAvailFull) - limit = numAvailFull; - - for (temp = 1; temp < limit && data[temp] == data2[temp]; temp++) ; - lenTest2 = temp - 1; - if (lenTest2 >= 2) - { - UInt32 state2 = kLiteralNextStates[state]; - UInt32 posStateNext = (position + 1) & p->pbMask; - UInt32 nextRepMatchPrice = curAnd1Price + - GET_PRICE_1(p->isMatch[state2][posStateNext]) + - GET_PRICE_1(p->isRep[state2]); - /* for (; lenTest2 >= 2; lenTest2--) */ - { - UInt32 curAndLenPrice; - COptimal *opt; - UInt32 offset = cur + 1 + lenTest2; - while (lenEnd < offset) - p->opt[++lenEnd].price = kInfinityPrice; - curAndLenPrice = nextRepMatchPrice + GetRepPrice(p, 0, lenTest2, state2, posStateNext); - opt = &p->opt[offset]; - if (curAndLenPrice < opt->price) - { - opt->price = curAndLenPrice; - opt->posPrev = cur + 1; - opt->backPrev = 0; - opt->prev1IsChar = True; - opt->prev2 = False; - } - } - } - } - - startLen = 2; /* speed optimization */ - { - UInt32 repIndex; - for (repIndex = 0; repIndex < LZMA_NUM_REPS; repIndex++) - { - UInt32 lenTest; - UInt32 lenTestTemp; - UInt32 price; - const Byte *data2 = data - (reps[repIndex] + 1); - if (data[0] != data2[0] || data[1] != data2[1]) - continue; - for (lenTest = 2; lenTest < numAvail && data[lenTest] == data2[lenTest]; lenTest++) ; - while (lenEnd < cur + lenTest) - p->opt[++lenEnd].price = kInfinityPrice; - lenTestTemp = lenTest; - price = repMatchPrice + GetPureRepPrice(p, repIndex, state, posState); - do - { - UInt32 curAndLenPrice = price + p->repLenEnc.prices[posState][lenTest - 2]; - COptimal *opt = &p->opt[cur + lenTest]; - if (curAndLenPrice < opt->price) - { - opt->price = curAndLenPrice; - opt->posPrev = cur; - opt->backPrev = repIndex; - opt->prev1IsChar = False; - } - } - while (--lenTest >= 2); - lenTest = lenTestTemp; - - if (repIndex == 0) - startLen = lenTest + 1; - - /* if (_maxMode) */ - { - UInt32 lenTest2 = lenTest + 1; - UInt32 limit = lenTest2 + p->numFastBytes; - UInt32 nextRepMatchPrice; - if (limit > numAvailFull) - limit = numAvailFull; - for (; lenTest2 < limit && data[lenTest2] == data2[lenTest2]; lenTest2++) ; - lenTest2 -= lenTest + 1; - if (lenTest2 >= 2) - { - UInt32 state2 = kRepNextStates[state]; - UInt32 posStateNext = (position + lenTest) & p->pbMask; - UInt32 curAndLenCharPrice = - price + p->repLenEnc.prices[posState][lenTest - 2] + - GET_PRICE_0(p->isMatch[state2][posStateNext]) + - LitEnc_GetPriceMatched(LIT_PROBS(position + lenTest, data[lenTest - 1]), - data[lenTest], data2[lenTest], p->ProbPrices); - state2 = kLiteralNextStates[state2]; - posStateNext = (position + lenTest + 1) & p->pbMask; - nextRepMatchPrice = curAndLenCharPrice + - GET_PRICE_1(p->isMatch[state2][posStateNext]) + - GET_PRICE_1(p->isRep[state2]); - - /* for (; lenTest2 >= 2; lenTest2--) */ - { - UInt32 curAndLenPrice; - COptimal *opt; - UInt32 offset = cur + lenTest + 1 + lenTest2; - while (lenEnd < offset) - p->opt[++lenEnd].price = kInfinityPrice; - curAndLenPrice = nextRepMatchPrice + GetRepPrice(p, 0, lenTest2, state2, posStateNext); - opt = &p->opt[offset]; - if (curAndLenPrice < opt->price) - { - opt->price = curAndLenPrice; - opt->posPrev = cur + lenTest + 1; - opt->backPrev = 0; - opt->prev1IsChar = True; - opt->prev2 = True; - opt->posPrev2 = cur; - opt->backPrev2 = repIndex; - } - } - } - } - } - } - /* for (UInt32 lenTest = 2; lenTest <= newLen; lenTest++) */ - if (newLen > numAvail) - { - newLen = numAvail; - for (numPairs = 0; newLen > matches[numPairs]; numPairs += 2) ; - matches[numPairs] = newLen; - numPairs += 2; - } - if (newLen >= startLen) - { - UInt32 normalMatchPrice = matchPrice + GET_PRICE_0(p->isRep[state]); - UInt32 offs, curBack, posSlot; - UInt32 lenTest; - while (lenEnd < cur + newLen) - p->opt[++lenEnd].price = kInfinityPrice; - - offs = 0; - while (startLen > matches[offs]) - offs += 2; - curBack = matches[offs + 1]; - GetPosSlot2(curBack, posSlot); - for (lenTest = /*2*/ startLen; ; lenTest++) - { - UInt32 curAndLenPrice = normalMatchPrice + p->lenEnc.prices[posState][lenTest - LZMA_MATCH_LEN_MIN]; - UInt32 lenToPosState = GetLenToPosState(lenTest); - COptimal *opt; - if (curBack < kNumFullDistances) - curAndLenPrice += p->distancesPrices[lenToPosState][curBack]; - else - curAndLenPrice += p->posSlotPrices[lenToPosState][posSlot] + p->alignPrices[curBack & kAlignMask]; - - opt = &p->opt[cur + lenTest]; - if (curAndLenPrice < opt->price) - { - opt->price = curAndLenPrice; - opt->posPrev = cur; - opt->backPrev = curBack + LZMA_NUM_REPS; - opt->prev1IsChar = False; - } - - if (/*_maxMode && */lenTest == matches[offs]) - { - /* Try Match + Literal + Rep0 */ - const Byte *data2 = data - (curBack + 1); - UInt32 lenTest2 = lenTest + 1; - UInt32 limit = lenTest2 + p->numFastBytes; - UInt32 nextRepMatchPrice; - if (limit > numAvailFull) - limit = numAvailFull; - for (; lenTest2 < limit && data[lenTest2] == data2[lenTest2]; lenTest2++) ; - lenTest2 -= lenTest + 1; - if (lenTest2 >= 2) - { - UInt32 state2 = kMatchNextStates[state]; - UInt32 posStateNext = (position + lenTest) & p->pbMask; - UInt32 curAndLenCharPrice = curAndLenPrice + - GET_PRICE_0(p->isMatch[state2][posStateNext]) + - LitEnc_GetPriceMatched(LIT_PROBS(position + lenTest, data[lenTest - 1]), - data[lenTest], data2[lenTest], p->ProbPrices); - state2 = kLiteralNextStates[state2]; - posStateNext = (posStateNext + 1) & p->pbMask; - nextRepMatchPrice = curAndLenCharPrice + - GET_PRICE_1(p->isMatch[state2][posStateNext]) + - GET_PRICE_1(p->isRep[state2]); - - /* for (; lenTest2 >= 2; lenTest2--) */ - { - UInt32 offset = cur + lenTest + 1 + lenTest2; - UInt32 curAndLenPrice; - COptimal *opt; - while (lenEnd < offset) - p->opt[++lenEnd].price = kInfinityPrice; - curAndLenPrice = nextRepMatchPrice + GetRepPrice(p, 0, lenTest2, state2, posStateNext); - opt = &p->opt[offset]; - if (curAndLenPrice < opt->price) - { - opt->price = curAndLenPrice; - opt->posPrev = cur + lenTest + 1; - opt->backPrev = 0; - opt->prev1IsChar = True; - opt->prev2 = True; - opt->posPrev2 = cur; - opt->backPrev2 = curBack + LZMA_NUM_REPS; - } - } - } - offs += 2; - if (offs == numPairs) - break; - curBack = matches[offs + 1]; - if (curBack >= kNumFullDistances) - GetPosSlot2(curBack, posSlot); - } - } - } - } -} - -#define ChangePair(smallDist, bigDist) (((bigDist) >> 7) > (smallDist)) - -static UInt32 GetOptimumFast(CLzmaEnc *p, UInt32 *backRes) -{ - UInt32 numAvail, mainLen, mainDist, numPairs, repIndex, repLen, i; - const Byte *data; - const UInt32 *matches; - - if (p->additionalOffset == 0) - mainLen = ReadMatchDistances(p, &numPairs); - else - { - mainLen = p->longestMatchLength; - numPairs = p->numPairs; - } - - numAvail = p->numAvail; - *backRes = (UInt32)-1; - if (numAvail < 2) - return 1; - if (numAvail > LZMA_MATCH_LEN_MAX) - numAvail = LZMA_MATCH_LEN_MAX; - data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; - - repLen = repIndex = 0; - for (i = 0; i < LZMA_NUM_REPS; i++) - { - UInt32 len; - const Byte *data2 = data - (p->reps[i] + 1); - if (data[0] != data2[0] || data[1] != data2[1]) - continue; - for (len = 2; len < numAvail && data[len] == data2[len]; len++) ; - if (len >= p->numFastBytes) - { - *backRes = i; - MovePos(p, len - 1); - return len; - } - if (len > repLen) - { - repIndex = i; - repLen = len; - } - } - - matches = p->matches; - if (mainLen >= p->numFastBytes) - { - *backRes = matches[numPairs - 1] + LZMA_NUM_REPS; - MovePos(p, mainLen - 1); - return mainLen; - } - - mainDist = 0; /* for GCC */ - if (mainLen >= 2) - { - mainDist = matches[numPairs - 1]; - while (numPairs > 2 && mainLen == matches[numPairs - 4] + 1) - { - if (!ChangePair(matches[numPairs - 3], mainDist)) - break; - numPairs -= 2; - mainLen = matches[numPairs - 2]; - mainDist = matches[numPairs - 1]; - } - if (mainLen == 2 && mainDist >= 0x80) - mainLen = 1; - } - - if (repLen >= 2 && ( - (repLen + 1 >= mainLen) || - (repLen + 2 >= mainLen && mainDist >= (1 << 9)) || - (repLen + 3 >= mainLen && mainDist >= (1 << 15)))) - { - *backRes = repIndex; - MovePos(p, repLen - 1); - return repLen; - } - - if (mainLen < 2 || numAvail <= 2) - return 1; - - p->longestMatchLength = ReadMatchDistances(p, &p->numPairs); - if (p->longestMatchLength >= 2) - { - UInt32 newDistance = matches[p->numPairs - 1]; - if ((p->longestMatchLength >= mainLen && newDistance < mainDist) || - (p->longestMatchLength == mainLen + 1 && !ChangePair(mainDist, newDistance)) || - (p->longestMatchLength > mainLen + 1) || - (p->longestMatchLength + 1 >= mainLen && mainLen >= 3 && ChangePair(newDistance, mainDist))) - return 1; - } - - data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; - for (i = 0; i < LZMA_NUM_REPS; i++) - { - UInt32 len, limit; - const Byte *data2 = data - (p->reps[i] + 1); - if (data[0] != data2[0] || data[1] != data2[1]) - continue; - limit = mainLen - 1; - for (len = 2; len < limit && data[len] == data2[len]; len++) ; - if (len >= limit) - return 1; - } - *backRes = mainDist + LZMA_NUM_REPS; - MovePos(p, mainLen - 2); - return mainLen; -} - -static void LZe_full_flush(CLzmaEnc *p, UInt32 posState) - { - const UInt32 len = LZMA_MATCH_LEN_MIN; - File_trailer trailer; - RangeEnc_EncodeBit(&p->rc, &p->isMatch[p->state][posState], 1); - RangeEnc_EncodeBit(&p->rc, &p->isRep[p->state], 0); - p->state = kMatchNextStates[p->state]; - LenEnc_Encode2(&p->lenEnc, &p->rc, len - LZMA_MATCH_LEN_MIN, posState, !p->fastMode, p->ProbPrices); - RcTree_Encode(&p->rc, p->posSlotEncoder[GetLenToPosState(len)], kNumPosSlotBits, (1 << kNumPosSlotBits) - 1); - RangeEnc_EncodeDirectBits(&p->rc, (((UInt32)1 << 30) - 1) >> kNumAlignBits, 30 - kNumAlignBits); - RcTree_ReverseEncode(&p->rc, p->posAlignEncoder, kNumAlignBits, kAlignMask); - RangeEnc_FlushData(&p->rc); - RangeEnc_FlushStream(&p->rc); - Ft_set_data_crc( trailer, p->matchFinderBase.crc ^ 0xFFFFFFFFU ); - Ft_set_data_size( trailer, p->nowPos64 ); - Ft_set_member_size( trailer, p->rc.processed + Fh_size + Ft_size ); - if( p->rc.outStream->Write( p->rc.outStream, trailer, Ft_size ) != Ft_size ) - p->rc.res = SZ_ERROR_WRITE; - if( verbosity >= 1 ) - { - long long in_size = p->nowPos64; - long long out_size = p->rc.processed + Fh_size + Ft_size; - if( in_size <= 0 || out_size <= 0 ) - fprintf( stderr, "no data compressed.\n" ); - else - fprintf( stderr, "%6.3f:1, %6.3f bits/byte, " - "%5.2f%% saved, %lld in, %lld out.\n", - (double)in_size / out_size, - ( 8.0 * out_size ) / in_size, - 100.0 * ( 1.0 - ( (double)out_size / in_size ) ), - in_size, out_size ); - } - } - -static SRes CheckErrors(CLzmaEnc *p) -{ - if (p->result != SZ_OK) - return p->result; - if (p->rc.res != SZ_OK) - p->result = SZ_ERROR_WRITE; - if (p->matchFinderBase.result != SZ_OK) - p->result = SZ_ERROR_READ; - if (p->result != SZ_OK) - p->finished = True; - return p->result; -} - -static SRes Flush(CLzmaEnc *p, UInt32 nowPos) -{ - /* ReleaseMFStream(); */ - p->finished = True; - LZe_full_flush(p, nowPos & p->pbMask); - return CheckErrors(p); -} - -static void FillAlignPrices(CLzmaEnc *p) -{ - UInt32 i; - for (i = 0; i < kAlignTableSize; i++) - p->alignPrices[i] = RcTree_ReverseGetPrice(p->posAlignEncoder, kNumAlignBits, i, p->ProbPrices); - p->alignPriceCount = 0; -} - -static void FillDistancesPrices(CLzmaEnc *p) -{ - UInt32 tempPrices[kNumFullDistances]; - UInt32 i, lenToPosState; - for (i = kStartPosModelIndex; i < kNumFullDistances; i++) - { - UInt32 posSlot = GetPosSlot1(i); - UInt32 footerBits = ((posSlot >> 1) - 1); - UInt32 base = ((2 | (posSlot & 1)) << footerBits); - tempPrices[i] = RcTree_ReverseGetPrice(p->posEncoders + base - posSlot - 1, footerBits, i - base, p->ProbPrices); - } - - for (lenToPosState = 0; lenToPosState < kNumLenToPosStates; lenToPosState++) - { - UInt32 posSlot; - const CLzmaProb *encoder = p->posSlotEncoder[lenToPosState]; - UInt32 *posSlotPrices = p->posSlotPrices[lenToPosState]; - for (posSlot = 0; posSlot < p->distTableSize; posSlot++) - posSlotPrices[posSlot] = RcTree_GetPrice(encoder, kNumPosSlotBits, posSlot, p->ProbPrices); - for (posSlot = kEndPosModelIndex; posSlot < p->distTableSize; posSlot++) - posSlotPrices[posSlot] += ((((posSlot >> 1) - 1) - kNumAlignBits) << kNumBitPriceShiftBits); - - { - UInt32 *distancesPrices = p->distancesPrices[lenToPosState]; - UInt32 i; - for (i = 0; i < kStartPosModelIndex; i++) - distancesPrices[i] = posSlotPrices[i]; - for (; i < kNumFullDistances; i++) - distancesPrices[i] = posSlotPrices[GetPosSlot1(i)] + tempPrices[i]; - } - } - p->matchPriceCount = 0; -} - -void LzmaEnc_Construct(CLzmaEnc *p) -{ - RangeEnc_Construct(&p->rc); - MatchFinder_Construct(&p->matchFinderBase); - - { - CLzmaEncProps props; - LzmaEncProps_Init(&props); - LzmaEnc_SetProps(p, &props); - } - - #ifndef LZMA_LOG_BSR - LzmaEnc_FastPosInit(p->g_FastPos); - #endif - - LzmaEnc_InitPriceTables(p->ProbPrices); - p->litProbs = 0; - p->saveState.litProbs = 0; -} - -CLzmaEncHandle LzmaEnc_Create(ISzAlloc *alloc) -{ - void *p; - p = alloc->Alloc(alloc, sizeof(CLzmaEnc)); - if (p != 0) - LzmaEnc_Construct((CLzmaEnc *)p); - return p; -} - -void LzmaEnc_FreeLits(CLzmaEnc *p, ISzAlloc *alloc) -{ - alloc->Free(alloc, p->litProbs); - alloc->Free(alloc, p->saveState.litProbs); - p->litProbs = 0; - p->saveState.litProbs = 0; -} - -void LzmaEnc_Destruct(CLzmaEnc *p, ISzAlloc *alloc, ISzAlloc *allocBig) -{ - MatchFinder_Free(&p->matchFinderBase, allocBig); - LzmaEnc_FreeLits(p, alloc); - RangeEnc_Free(&p->rc, alloc); -} - -void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAlloc *alloc, ISzAlloc *allocBig) -{ - LzmaEnc_Destruct((CLzmaEnc *)p, alloc, allocBig); - alloc->Free(alloc, p); -} - -static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, Bool useLimits, UInt32 maxPackSize, UInt32 maxUnpackSize) -{ - UInt32 nowPos32, startPos32; - if (p->needInit) - { - p->matchFinder.Init(p->matchFinderObj); - p->needInit = 0; - } - - if (p->finished) - return p->result; - RINOK(CheckErrors(p)); - - nowPos32 = (UInt32)p->nowPos64; - startPos32 = nowPos32; - - if (p->nowPos64 == 0) - { - UInt32 numPairs; - Byte curByte; - if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) == 0) - return Flush(p, nowPos32); - ReadMatchDistances(p, &numPairs); - RangeEnc_EncodeBit(&p->rc, &p->isMatch[p->state][0], 0); - p->state = kLiteralNextStates[p->state]; - curByte = p->matchFinder.GetIndexByte(p->matchFinderObj, 0 - p->additionalOffset); - LitEnc_Encode(&p->rc, p->litProbs, curByte); - p->additionalOffset--; - nowPos32++; - } - - if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) != 0) - for (;;) - { - UInt32 pos, len, posState; - - if (p->fastMode) - len = GetOptimumFast(p, &pos); - else - len = GetOptimum(p, nowPos32, &pos); - - #ifdef SHOW_STAT2 - printf("\n pos = %4X, len = %d pos = %d", nowPos32, len, pos); - #endif - - posState = nowPos32 & p->pbMask; - if (len == 1 && pos == (UInt32)-1) - { - Byte curByte; - CLzmaProb *probs; - const Byte *data; - - RangeEnc_EncodeBit(&p->rc, &p->isMatch[p->state][posState], 0); - data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset; - curByte = *data; - probs = LIT_PROBS(nowPos32, *(data - 1)); - if (IsCharState(p->state)) - LitEnc_Encode(&p->rc, probs, curByte); - else - LitEnc_EncodeMatched(&p->rc, probs, curByte, *(data - p->reps[0] - 1)); - p->state = kLiteralNextStates[p->state]; - } - else - { - RangeEnc_EncodeBit(&p->rc, &p->isMatch[p->state][posState], 1); - if (pos < LZMA_NUM_REPS) - { - RangeEnc_EncodeBit(&p->rc, &p->isRep[p->state], 1); - if (pos == 0) - { - RangeEnc_EncodeBit(&p->rc, &p->isRepG0[p->state], 0); - RangeEnc_EncodeBit(&p->rc, &p->isRep0Long[p->state][posState], ((len == 1) ? 0 : 1)); - } - else - { - UInt32 distance = p->reps[pos]; - RangeEnc_EncodeBit(&p->rc, &p->isRepG0[p->state], 1); - if (pos == 1) - RangeEnc_EncodeBit(&p->rc, &p->isRepG1[p->state], 0); - else - { - RangeEnc_EncodeBit(&p->rc, &p->isRepG1[p->state], 1); - RangeEnc_EncodeBit(&p->rc, &p->isRepG2[p->state], pos - 2); - if (pos == 3) - p->reps[3] = p->reps[2]; - p->reps[2] = p->reps[1]; - } - p->reps[1] = p->reps[0]; - p->reps[0] = distance; - } - if (len == 1) - p->state = kShortRepNextStates[p->state]; - else - { - LenEnc_Encode2(&p->repLenEnc, &p->rc, len - LZMA_MATCH_LEN_MIN, posState, !p->fastMode, p->ProbPrices); - p->state = kRepNextStates[p->state]; - } - } - else - { - UInt32 posSlot; - RangeEnc_EncodeBit(&p->rc, &p->isRep[p->state], 0); - p->state = kMatchNextStates[p->state]; - LenEnc_Encode2(&p->lenEnc, &p->rc, len - LZMA_MATCH_LEN_MIN, posState, !p->fastMode, p->ProbPrices); - pos -= LZMA_NUM_REPS; - GetPosSlot(pos, posSlot); - RcTree_Encode(&p->rc, p->posSlotEncoder[GetLenToPosState(len)], kNumPosSlotBits, posSlot); - - if (posSlot >= kStartPosModelIndex) - { - UInt32 footerBits = ((posSlot >> 1) - 1); - UInt32 base = ((2 | (posSlot & 1)) << footerBits); - UInt32 posReduced = pos - base; - - if (posSlot < kEndPosModelIndex) - RcTree_ReverseEncode(&p->rc, p->posEncoders + base - posSlot - 1, footerBits, posReduced); - else - { - RangeEnc_EncodeDirectBits(&p->rc, posReduced >> kNumAlignBits, footerBits - kNumAlignBits); - RcTree_ReverseEncode(&p->rc, p->posAlignEncoder, kNumAlignBits, posReduced & kAlignMask); - p->alignPriceCount++; - } - } - p->reps[3] = p->reps[2]; - p->reps[2] = p->reps[1]; - p->reps[1] = p->reps[0]; - p->reps[0] = pos; - p->matchPriceCount++; - } - } - p->additionalOffset -= len; - nowPos32 += len; - if (p->additionalOffset == 0) - { - UInt32 processed; - if (!p->fastMode) - { - if (p->matchPriceCount >= (1 << 7)) - FillDistancesPrices(p); - if (p->alignPriceCount >= kAlignTableSize) - FillAlignPrices(p); - } - if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) == 0) - break; - processed = nowPos32 - startPos32; - if (useLimits) - { - if (processed + kNumOpts + 300 >= maxUnpackSize || - RangeEnc_GetProcessed(&p->rc) + kNumOpts * 2 >= maxPackSize) - break; - } - else if (processed >= (1 << 15)) - { - p->nowPos64 += nowPos32 - startPos32; - return CheckErrors(p); - } - } - } - p->nowPos64 += nowPos32 - startPos32; - return Flush(p, nowPos32); -} - -#define kBigHashDicLimit ((UInt32)1 << 24) - -static SRes LzmaEnc_Alloc(CLzmaEnc *p, UInt32 keepWindowSize, ISzAlloc *alloc, ISzAlloc *allocBig) -{ - UInt32 beforeSize = kNumOpts; - Bool btMode; - if (!RangeEnc_Alloc(&p->rc, alloc)) - return SZ_ERROR_MEM; - btMode = (p->matchFinderBase.btMode != 0); - - { - unsigned lclp = p->lc + p->lp; - if (p->litProbs == 0 || p->saveState.litProbs == 0 || p->lclp != lclp) - { - LzmaEnc_FreeLits(p, alloc); - p->litProbs = (CLzmaProb *)alloc->Alloc(alloc, (0x300 << lclp) * sizeof(CLzmaProb)); - p->saveState.litProbs = (CLzmaProb *)alloc->Alloc(alloc, (0x300 << lclp) * sizeof(CLzmaProb)); - if (p->litProbs == 0 || p->saveState.litProbs == 0) - { - LzmaEnc_FreeLits(p, alloc); - return SZ_ERROR_MEM; - } - p->lclp = lclp; - } - } - - p->matchFinderBase.bigHash = (p->dictSize > kBigHashDicLimit); - - if (beforeSize + p->dictSize < keepWindowSize) - beforeSize = keepWindowSize - p->dictSize; - - { - if (!MatchFinder_Create(&p->matchFinderBase, p->dictSize, beforeSize, p->numFastBytes, LZMA_MATCH_LEN_MAX, allocBig)) - return SZ_ERROR_MEM; - p->matchFinderObj = &p->matchFinderBase; - MatchFinder_CreateVTable(&p->matchFinderBase, &p->matchFinder); - } - return SZ_OK; -} - -void LzmaEnc_Init(CLzmaEnc *p) -{ - UInt32 i; - p->state = 0; - for (i = 0 ; i < LZMA_NUM_REPS; i++) - p->reps[i] = 0; - - RangeEnc_Init(&p->rc); - - - for (i = 0; i < kNumStates; i++) - { - UInt32 j; - for (j = 0; j < LZMA_NUM_PB_STATES_MAX; j++) - { - p->isMatch[i][j] = kProbInitValue; - p->isRep0Long[i][j] = kProbInitValue; - } - p->isRep[i] = kProbInitValue; - p->isRepG0[i] = kProbInitValue; - p->isRepG1[i] = kProbInitValue; - p->isRepG2[i] = kProbInitValue; - } - - { - UInt32 num = 0x300 << (p->lp + p->lc); - for (i = 0; i < num; i++) - p->litProbs[i] = kProbInitValue; - } - - { - for (i = 0; i < kNumLenToPosStates; i++) - { - CLzmaProb *probs = p->posSlotEncoder[i]; - UInt32 j; - for (j = 0; j < (1 << kNumPosSlotBits); j++) - probs[j] = kProbInitValue; - } - } - { - for (i = 0; i < kNumFullDistances - kEndPosModelIndex; i++) - p->posEncoders[i] = kProbInitValue; - } - - LenEnc_Init(&p->lenEnc.p); - LenEnc_Init(&p->repLenEnc.p); - - for (i = 0; i < (1 << kNumAlignBits); i++) - p->posAlignEncoder[i] = kProbInitValue; - - p->optimumEndIndex = 0; - p->optimumCurrentIndex = 0; - p->additionalOffset = 0; - - p->pbMask = (1 << p->pb) - 1; - p->lpMask = (1 << p->lp) - 1; -} - -void LzmaEnc_InitPrices(CLzmaEnc *p) -{ - if (!p->fastMode) - { - FillDistancesPrices(p); - FillAlignPrices(p); - } - - p->lenEnc.tableSize = - p->repLenEnc.tableSize = - p->numFastBytes + 1 - LZMA_MATCH_LEN_MIN; - LenPriceEnc_UpdateTables(&p->lenEnc, 1 << p->pb, p->ProbPrices); - LenPriceEnc_UpdateTables(&p->repLenEnc, 1 << p->pb, p->ProbPrices); -} - -static SRes LzmaEnc_AllocAndInit(CLzmaEnc *p, UInt32 keepWindowSize, ISzAlloc *alloc, ISzAlloc *allocBig) -{ - UInt32 i; - for (i = 0; i < (UInt32)kDicLogSizeMaxCompress; i++) - if (p->dictSize <= ((UInt32)1 << i)) - break; - p->distTableSize = i * 2; - - p->finished = False; - p->result = SZ_OK; - RINOK(LzmaEnc_Alloc(p, keepWindowSize, alloc, allocBig)); - LzmaEnc_Init(p); - LzmaEnc_InitPrices(p); - p->nowPos64 = 0; - return SZ_OK; -} - -static SRes LzmaEnc_Prepare(CLzmaEncHandle pp, ISeqOutStream *outStream, ISeqInStream *inStream, - ISzAlloc *alloc, ISzAlloc *allocBig) -{ - CLzmaEnc *p = (CLzmaEnc *)pp; - p->matchFinderBase.stream = inStream; - p->needInit = 1; - p->rc.outStream = outStream; - return LzmaEnc_AllocAndInit(p, 0, alloc, allocBig); -} - -static void LzmaEnc_SetInputBuf(CLzmaEnc *p, const Byte *src, SizeT srcLen) -{ - p->matchFinderBase.directInput = 1; - p->matchFinderBase.bufferBase = (Byte *)src; - p->matchFinderBase.directInputRem = srcLen; -} - -SRes LzmaEnc_MemPrepare(CLzmaEncHandle pp, const Byte *src, SizeT srcLen, - UInt32 keepWindowSize, ISzAlloc *alloc, ISzAlloc *allocBig) -{ - CLzmaEnc *p = (CLzmaEnc *)pp; - LzmaEnc_SetInputBuf(p, src, srcLen); - p->needInit = 1; - - return LzmaEnc_AllocAndInit(p, keepWindowSize, alloc, allocBig); -} - -void LzmaEnc_Finish(CLzmaEncHandle pp) -{ - pp = pp; -} - - -UInt32 LzmaEnc_GetNumAvailableBytes(CLzmaEncHandle pp) -{ - const CLzmaEnc *p = (CLzmaEnc *)pp; - return p->matchFinder.GetNumAvailableBytes(p->matchFinderObj); -} - -const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle pp) -{ - const CLzmaEnc *p = (CLzmaEnc *)pp; - return p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset; -} - -static SRes LzmaEnc_Encode2(CLzmaEnc *p, ICompressProgress *progress) -{ - SRes res = SZ_OK; - - for (;;) - { - res = LzmaEnc_CodeOneBlock(p, False, 0, 0); - if (res != SZ_OK || p->finished != 0) - break; - if (progress != 0) - { - res = progress->Progress(progress, p->nowPos64, RangeEnc_GetProcessed(&p->rc)); - if (res != SZ_OK) - { - res = SZ_ERROR_PROGRESS; - break; - } - } - } - LzmaEnc_Finish(p); - return res; -} - -SRes LzmaEnc_Encode(CLzmaEncHandle pp, ISeqOutStream *outStream, ISeqInStream *inStream, ICompressProgress *progress, - ISzAlloc *alloc, ISzAlloc *allocBig) -{ - RINOK(LzmaEnc_Prepare(pp, outStream, inStream, alloc, allocBig)); - return LzmaEnc_Encode2((CLzmaEnc *)pp, progress); -} +/* LzmaEnc.c -- LZMA Encoder +2009-11-24 : Igor Pavlov : Public domain */ + +#define _FILE_OFFSET_BITS 64 + +#include +#include +#include +#include + +/* #define SHOW_STAT */ +/* #define SHOW_STAT2 */ + +#include "clzip.h" +#include "LzmaEnc.h" +#include "LzFind.h" + +#ifdef SHOW_STAT +static int ttt = 0; +#endif + +#define kNumTopBits 24 +#define kTopValue ((uint32_t)1 << kNumTopBits) + +#define kNumBitModelTotalBits 11 +#define kBitModelTotal (1 << kNumBitModelTotalBits) +#define kNumMoveBits 5 +#define kProbInitValue (kBitModelTotal >> 1) + +#define kNumMoveReducingBits 4 +#define kNumBitPriceShiftBits 4 + +#define kNumLogBits (9 + (int)sizeof(uint32_t) / 2) +#define kDicLogSizeMaxCompress ((kNumLogBits - 1) * 2 + 7) + + +static void LzmaEnc_FastPosInit(uint8_t *g_FastPos) +{ + int c = 2, slotFast; + g_FastPos[0] = 0; + g_FastPos[1] = 1; + + for (slotFast = 2; slotFast < kNumLogBits * 2; slotFast++) + { + uint32_t k = (1 << ((slotFast >> 1) - 1)); + uint32_t j; + for (j = 0; j < k; j++, c++) + g_FastPos[c] = (uint8_t)slotFast; + } +} + +#define BSR2_RET(pos, res) { uint32_t i = 6 + ((kNumLogBits - 1) & \ + (0 - (((((uint32_t)1 << (kNumLogBits + 6)) - 1) - pos) >> 31))); \ + res = p->g_FastPos[pos >> i] + (i * 2); } +/* +#define BSR2_RET(pos, res) { res = (pos < (1 << (kNumLogBits + 6))) ? \ + p->g_FastPos[pos >> 6] + 12 : \ + p->g_FastPos[pos >> (6 + kNumLogBits - 1)] + (6 + (kNumLogBits - 1)) * 2; } +*/ + +#define GetPosSlot1(pos) p->g_FastPos[pos] +#define GetPosSlot2(pos, res) { BSR2_RET(pos, res); } +#define GetPosSlot(pos, res) { if (pos < kNumFullDistances) res = p->g_FastPos[pos]; else BSR2_RET(pos, res); } + + +#define LZMA_NUM_REPS 4 + +typedef struct +{ + uint32_t price; + + State state; + + uint32_t posPrev2; + uint32_t backPrev2; + + uint32_t posPrev; + uint32_t backPrev; + uint32_t backs[LZMA_NUM_REPS]; + + bool prev1IsChar; + bool prev2; +} COptimal; + +#define kNumOpts (1 << 12) + +#define kNumLenToPosStates 4 +#define kNumPosSlotBits 6 +#define kDicLogSizeMin 0 +#define kDicLogSizeMax 32 +#define kDistTableSizeMax (kDicLogSizeMax * 2) + + +#define kNumAlignBits 4 +#define kAlignTableSize (1 << kNumAlignBits) +#define kAlignMask (kAlignTableSize - 1) + +#define kStartPosModelIndex 4 +#define kEndPosModelIndex 14 +#define kNumPosModels (kEndPosModelIndex - kStartPosModelIndex) + +#define kNumFullDistances (1 << (kEndPosModelIndex >> 1)) + +#define LZMA_LC_MAX 8 +#define LZMA_LP_MAX 4 +#define LZMA_PB_MAX 4 + +#define LZMA_NUM_PB_STATES_MAX (1 << LZMA_PB_MAX) + + +#define kLenNumLowBits 3 +#define kLenNumLowSymbols (1 << kLenNumLowBits) +#define kLenNumMidBits 3 +#define kLenNumMidSymbols (1 << kLenNumMidBits) +#define kLenNumHighBits 8 +#define kLenNumHighSymbols (1 << kLenNumHighBits) + +#define kLenNumSymbolsTotal (kLenNumLowSymbols + kLenNumMidSymbols + kLenNumHighSymbols) + +#define LZMA_MATCH_LEN_MIN 2 +#define LZMA_MATCH_LEN_MAX (LZMA_MATCH_LEN_MIN + kLenNumSymbolsTotal - 1) + +#define kNumStates 12 + +typedef struct +{ + int choice; + int choice2; + int low[LZMA_NUM_PB_STATES_MAX << kLenNumLowBits]; + int mid[LZMA_NUM_PB_STATES_MAX << kLenNumMidBits]; + int high[kLenNumHighSymbols]; +} CLenEnc; + +typedef struct +{ + CLenEnc p; + uint32_t prices[LZMA_NUM_PB_STATES_MAX][kLenNumSymbolsTotal]; + uint32_t tableSize; + uint32_t counters[LZMA_NUM_PB_STATES_MAX]; +} CLenPriceEnc; + +typedef struct +{ + uint64_t low; + uint64_t processed; + uint8_t *bufBase; + uint8_t *buf; + uint8_t *bufLim; + uint32_t range; + uint32_t cacheSize; + int outfd; + int res; + uint8_t cache; +} CRangeEnc; + + +typedef struct +{ + uint64_t nowPos64; + int *litProbs; + IMatchFinder matchFinder; + CMatchFinder matchFinderBase; + + uint32_t optimumEndIndex; + uint32_t optimumCurrentIndex; + + uint32_t longestMatchLength; + uint32_t numPairs; + uint32_t numAvail; + COptimal opt[kNumOpts]; + + uint8_t g_FastPos[1 << kNumLogBits]; + + uint32_t ProbPrices[kBitModelTotal >> kNumMoveReducingBits]; + uint32_t matches[LZMA_MATCH_LEN_MAX * 2 + 2 + 1]; + uint32_t numFastBytes; + uint32_t additionalOffset; + uint32_t reps[LZMA_NUM_REPS]; + State state; + + uint32_t posSlotPrices[kNumLenToPosStates][kDistTableSizeMax]; + uint32_t distancesPrices[kNumLenToPosStates][kNumFullDistances]; + uint32_t alignPrices[kAlignTableSize]; + uint32_t alignPriceCount; + + uint32_t distTableSize; + + unsigned lc, lp, pb; + unsigned lpMask, pbMask; + + int isMatch[kNumStates][LZMA_NUM_PB_STATES_MAX]; + int isRep[kNumStates]; + int isRepG0[kNumStates]; + int isRepG1[kNumStates]; + int isRepG2[kNumStates]; + int isRep0Long[kNumStates][LZMA_NUM_PB_STATES_MAX]; + + int posSlotEncoder[kNumLenToPosStates][1 << kNumPosSlotBits]; + int posEncoders[kNumFullDistances - kEndPosModelIndex]; + int posAlignEncoder[1 << kNumAlignBits]; + + CLenPriceEnc lenEnc; + CLenPriceEnc repLenEnc; + + CRangeEnc rc; + + uint32_t matchPriceCount; + + int result; + uint32_t dictSize; + bool fastMode; + bool finished; +} CLzmaEnc; + + +static const int kLiteralNextStates[kNumStates] = {0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5}; +static const int kMatchNextStates[kNumStates] = {7, 7, 7, 7, 7, 7, 7, 10, 10, 10, 10, 10}; +static const int kRepNextStates[kNumStates] = {8, 8, 8, 8, 8, 8, 8, 11, 11, 11, 11, 11}; +static const int kShortRepNextStates[kNumStates]= {9, 9, 9, 9, 9, 9, 9, 11, 11, 11, 11, 11}; + +#define IsCharState(s) ((s) < 7) + +#define GetLenToPosState(len) (((len) < kNumLenToPosStates + 1) ? (len) - 2 : kNumLenToPosStates - 1) + +#define kInfinityPrice (1 << 30) + +#define RC_BUF_SIZE (1 << 16) + + +static int RangeEnc_Init( CRangeEnc *p, const int outfd ) + { + p->low = 0; + p->processed = 0; + p->range = 0xFFFFFFFF; + p->cacheSize = 1; + p->outfd = outfd; + p->res = SZ_OK; + p->cache = 0; + p->buf = p->bufBase = (uint8_t *)malloc( RC_BUF_SIZE ); + if( !p->bufBase ) return 0; + p->bufLim = p->bufBase + RC_BUF_SIZE; + return 1; + } + + +static void RangeEnc_Free(CRangeEnc *p) +{ + free(p->bufBase); + p->bufBase = 0; +} + + +static void RangeEnc_FlushStream(CRangeEnc *p) +{ + int num; + if (p->res != SZ_OK) + return; + num = p->buf - p->bufBase; + if (num != writeblock(p->outfd, p->bufBase, num)) + p->res = SZ_ERROR_WRITE; + p->processed += num; + p->buf = p->bufBase; +} + +static void RangeEnc_ShiftLow(CRangeEnc *p) +{ + if ((uint32_t)p->low < (uint32_t)0xFF000000 || (int)(p->low >> 32) != 0) + { + uint8_t temp = p->cache; + do + { + uint8_t *buf = p->buf; + *buf++ = (uint8_t)(temp + (uint8_t)(p->low >> 32)); + p->buf = buf; + if (buf == p->bufLim) + RangeEnc_FlushStream(p); + temp = 0xFF; + } + while (--p->cacheSize != 0); + p->cache = (uint8_t)((uint32_t)p->low >> 24); + } + p->cacheSize++; + p->low = (uint32_t)p->low << 8; +} + +static void RangeEnc_FlushData(CRangeEnc *p) +{ + int i; + for (i = 0; i < 5; i++) + RangeEnc_ShiftLow(p); +} + +static void RangeEnc_EncodeDirectBits(CRangeEnc *p, uint32_t value, int numBits) +{ + do + { + p->range >>= 1; + p->low += p->range & (0 - ((value >> --numBits) & 1)); + if (p->range < kTopValue) + { + p->range <<= 8; + RangeEnc_ShiftLow(p); + } + } + while (numBits != 0); +} + +static void RangeEnc_EncodeBit(CRangeEnc *p, int *prob, uint32_t symbol) +{ + uint32_t ttt = *prob; + uint32_t newBound = (p->range >> kNumBitModelTotalBits) * ttt; + if (symbol == 0) + { + p->range = newBound; + ttt += (kBitModelTotal - ttt) >> kNumMoveBits; + } + else + { + p->low += newBound; + p->range -= newBound; + ttt -= ttt >> kNumMoveBits; + } + *prob = (int)ttt; + if (p->range < kTopValue) + { + p->range <<= 8; + RangeEnc_ShiftLow(p); + } +} + +static void LitEnc_Encode(CRangeEnc *p, int *probs, uint32_t symbol) +{ + symbol |= 0x100; + do + { + RangeEnc_EncodeBit(p, probs + (symbol >> 8), (symbol >> 7) & 1); + symbol <<= 1; + } + while (symbol < 0x10000); +} + +static void LitEnc_EncodeMatched(CRangeEnc *p, int *probs, uint32_t symbol, uint32_t matchByte) +{ + uint32_t offs = 0x100; + symbol |= 0x100; + do + { + matchByte <<= 1; + RangeEnc_EncodeBit(p, probs + (offs + (matchByte & offs) + (symbol >> 8)), (symbol >> 7) & 1); + symbol <<= 1; + offs &= ~(matchByte ^ symbol); + } + while (symbol < 0x10000); +} + +static void LzmaEnc_InitPriceTables(uint32_t *ProbPrices) +{ + uint32_t i; + for (i = (1 << kNumMoveReducingBits) / 2; i < kBitModelTotal; i += (1 << kNumMoveReducingBits)) + { + const int kCyclesBits = kNumBitPriceShiftBits; + uint32_t w = i; + uint32_t bitCount = 0; + int j; + for (j = 0; j < kCyclesBits; j++) + { + w = w * w; + bitCount <<= 1; + while (w >= ((uint32_t)1 << 16)) + { + w >>= 1; + bitCount++; + } + } + ProbPrices[i >> kNumMoveReducingBits] = ((kNumBitModelTotalBits << kCyclesBits) - 15 - bitCount); + } +} + + +#define GET_PRICE(prob, symbol) \ + p->ProbPrices[((prob) ^ (((-(int)(symbol))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits]; + +#define GET_PRICEa(prob, symbol) \ + ProbPrices[((prob) ^ ((-((int)(symbol))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits]; + +#define GET_PRICE_0(prob) p->ProbPrices[(prob) >> kNumMoveReducingBits] +#define GET_PRICE_1(prob) p->ProbPrices[((prob) ^ (kBitModelTotal - 1)) >> kNumMoveReducingBits] + +#define GET_PRICE_0a(prob) ProbPrices[(prob) >> kNumMoveReducingBits] +#define GET_PRICE_1a(prob) ProbPrices[((prob) ^ (kBitModelTotal - 1)) >> kNumMoveReducingBits] + +static uint32_t LitEnc_GetPrice(const int *probs, uint32_t symbol, uint32_t *ProbPrices) +{ + uint32_t price = 0; + symbol |= 0x100; + do + { + price += GET_PRICEa(probs[symbol >> 8], (symbol >> 7) & 1); + symbol <<= 1; + } + while (symbol < 0x10000); + return price; +} + +static uint32_t LitEnc_GetPriceMatched(const int *probs, uint32_t symbol, uint32_t matchByte, uint32_t *ProbPrices) +{ + uint32_t price = 0; + uint32_t offs = 0x100; + symbol |= 0x100; + do + { + matchByte <<= 1; + price += GET_PRICEa(probs[offs + (matchByte & offs) + (symbol >> 8)], (symbol >> 7) & 1); + symbol <<= 1; + offs &= ~(matchByte ^ symbol); + } + while (symbol < 0x10000); + return price; +} + + +static void RcTree_Encode(CRangeEnc *rc, int *probs, int numBitLevels, uint32_t symbol) +{ + uint32_t m = 1; + int i; + for (i = numBitLevels; i != 0;) + { + uint32_t bit; + i--; + bit = (symbol >> i) & 1; + RangeEnc_EncodeBit(rc, probs + m, bit); + m = (m << 1) | bit; + } +} + +static void RcTree_ReverseEncode(CRangeEnc *rc, int *probs, int numBitLevels, uint32_t symbol) +{ + uint32_t m = 1; + int i; + for (i = 0; i < numBitLevels; i++) + { + uint32_t bit = symbol & 1; + RangeEnc_EncodeBit(rc, probs + m, bit); + m = (m << 1) | bit; + symbol >>= 1; + } +} + +static uint32_t RcTree_GetPrice(const int *probs, int numBitLevels, uint32_t symbol, uint32_t *ProbPrices) +{ + uint32_t price = 0; + symbol |= (1 << numBitLevels); + while (symbol != 1) + { + price += GET_PRICEa(probs[symbol >> 1], symbol & 1); + symbol >>= 1; + } + return price; +} + +static uint32_t RcTree_ReverseGetPrice(const int *probs, int numBitLevels, uint32_t symbol, uint32_t *ProbPrices) +{ + uint32_t price = 0; + uint32_t m = 1; + int i; + for (i = numBitLevels; i != 0; i--) + { + uint32_t bit = symbol & 1; + symbol >>= 1; + price += GET_PRICEa(probs[m], bit); + m = (m << 1) | bit; + } + return price; +} + + +static void LenEnc_Init(CLenEnc *p) +{ + unsigned i; + p->choice = p->choice2 = kProbInitValue; + for (i = 0; i < (LZMA_NUM_PB_STATES_MAX << kLenNumLowBits); i++) + p->low[i] = kProbInitValue; + for (i = 0; i < (LZMA_NUM_PB_STATES_MAX << kLenNumMidBits); i++) + p->mid[i] = kProbInitValue; + for (i = 0; i < kLenNumHighSymbols; i++) + p->high[i] = kProbInitValue; +} + +static void LenEnc_Encode(CLenEnc *p, CRangeEnc *rc, uint32_t symbol, uint32_t posState) +{ + if (symbol < kLenNumLowSymbols) + { + RangeEnc_EncodeBit(rc, &p->choice, 0); + RcTree_Encode(rc, p->low + (posState << kLenNumLowBits), kLenNumLowBits, symbol); + } + else + { + RangeEnc_EncodeBit(rc, &p->choice, 1); + if (symbol < kLenNumLowSymbols + kLenNumMidSymbols) + { + RangeEnc_EncodeBit(rc, &p->choice2, 0); + RcTree_Encode(rc, p->mid + (posState << kLenNumMidBits), kLenNumMidBits, symbol - kLenNumLowSymbols); + } + else + { + RangeEnc_EncodeBit(rc, &p->choice2, 1); + RcTree_Encode(rc, p->high, kLenNumHighBits, symbol - kLenNumLowSymbols - kLenNumMidSymbols); + } + } +} + +static void LenEnc_SetPrices(CLenEnc *p, uint32_t posState, uint32_t numSymbols, uint32_t *prices, uint32_t *ProbPrices) +{ + uint32_t a0 = GET_PRICE_0a(p->choice); + uint32_t a1 = GET_PRICE_1a(p->choice); + uint32_t b0 = a1 + GET_PRICE_0a(p->choice2); + uint32_t b1 = a1 + GET_PRICE_1a(p->choice2); + uint32_t i = 0; + for (i = 0; i < kLenNumLowSymbols; i++) + { + if (i >= numSymbols) + return; + prices[i] = a0 + RcTree_GetPrice(p->low + (posState << kLenNumLowBits), kLenNumLowBits, i, ProbPrices); + } + for (; i < kLenNumLowSymbols + kLenNumMidSymbols; i++) + { + if (i >= numSymbols) + return; + prices[i] = b0 + RcTree_GetPrice(p->mid + (posState << kLenNumMidBits), kLenNumMidBits, i - kLenNumLowSymbols, ProbPrices); + } + for (; i < numSymbols; i++) + prices[i] = b1 + RcTree_GetPrice(p->high, kLenNumHighBits, i - kLenNumLowSymbols - kLenNumMidSymbols, ProbPrices); +} + +static void LenPriceEnc_UpdateTable(CLenPriceEnc *p, uint32_t posState, uint32_t *ProbPrices) +{ + LenEnc_SetPrices(&p->p, posState, p->tableSize, p->prices[posState], ProbPrices); + p->counters[posState] = p->tableSize; +} + +static void LenPriceEnc_UpdateTables(CLenPriceEnc *p, uint32_t numPosStates, uint32_t *ProbPrices) +{ + uint32_t posState; + for (posState = 0; posState < numPosStates; posState++) + LenPriceEnc_UpdateTable(p, posState, ProbPrices); +} + +static void LenEnc_Encode2(CLenPriceEnc *p, CRangeEnc *rc, uint32_t symbol, uint32_t posState, bool updatePrice, uint32_t *ProbPrices) +{ + LenEnc_Encode(&p->p, rc, symbol, posState); + if (updatePrice) + if (--p->counters[posState] == 0) + LenPriceEnc_UpdateTable(p, posState, ProbPrices); +} + + +static void MovePos(CLzmaEnc *p, uint32_t num) +{ + #ifdef SHOW_STAT + ttt += num; + printf("\n MovePos %d", num); + #endif + if (num != 0) + { + p->additionalOffset += num; + p->matchFinder.Skip(&p->matchFinderBase, num); + } +} + +static uint32_t ReadMatchDistances(CLzmaEnc *p, uint32_t *numDistancePairsRes) +{ + uint32_t lenRes = 0, numPairs; + p->numAvail = Mf_GetNumAvailableBytes(&p->matchFinderBase); + numPairs = p->matchFinder.GetMatches(&p->matchFinderBase, p->matches); + #ifdef SHOW_STAT + printf("\n i = %d numPairs = %d ", ttt, numPairs / 2); + ttt++; + { + uint32_t i; + for (i = 0; i < numPairs; i += 2) + printf("%2d %6d | ", p->matches[i], p->matches[i + 1]); + } + #endif + if (numPairs > 0) + { + lenRes = p->matches[numPairs - 2]; + if (lenRes == p->numFastBytes) + { + const uint8_t *pby = Mf_GetPointerToCurrentPos(&p->matchFinderBase) - 1; + uint32_t distance = p->matches[numPairs - 1] + 1; + uint32_t numAvail = p->numAvail; + if (numAvail > LZMA_MATCH_LEN_MAX) + numAvail = LZMA_MATCH_LEN_MAX; + { + const uint8_t *pby2 = pby - distance; + for (; lenRes < numAvail && pby[lenRes] == pby2[lenRes]; lenRes++) ; + } + } + } + p->additionalOffset++; + *numDistancePairsRes = numPairs; + return lenRes; +} + + +#define MakeAsChar(p) (p)->backPrev = (uint32_t)(-1); (p)->prev1IsChar = false; +#define MakeAsShortRep(p) (p)->backPrev = 0; (p)->prev1IsChar = false; +#define IsShortRep(p) ((p)->backPrev == 0) + +static uint32_t GetRepLen1Price(CLzmaEnc *p, State state, uint32_t posState) +{ + return + GET_PRICE_0(p->isRepG0[state]) + + GET_PRICE_0(p->isRep0Long[state][posState]); +} + +static uint32_t GetPureRepPrice(CLzmaEnc *p, uint32_t repIndex, State state, uint32_t posState) +{ + uint32_t price; + if (repIndex == 0) + { + price = GET_PRICE_0(p->isRepG0[state]); + price += GET_PRICE_1(p->isRep0Long[state][posState]); + } + else + { + price = GET_PRICE_1(p->isRepG0[state]); + if (repIndex == 1) + price += GET_PRICE_0(p->isRepG1[state]); + else + { + price += GET_PRICE_1(p->isRepG1[state]); + price += GET_PRICE(p->isRepG2[state], repIndex - 2); + } + } + return price; +} + +static uint32_t GetRepPrice(CLzmaEnc *p, uint32_t repIndex, uint32_t len, State state, uint32_t posState) +{ + return p->repLenEnc.prices[posState][len - LZMA_MATCH_LEN_MIN] + + GetPureRepPrice(p, repIndex, state, posState); +} + +static uint32_t Backward(CLzmaEnc *p, uint32_t *backRes, uint32_t cur) +{ + uint32_t posMem = p->opt[cur].posPrev; + uint32_t backMem = p->opt[cur].backPrev; + p->optimumEndIndex = cur; + do + { + if (p->opt[cur].prev1IsChar) + { + MakeAsChar(&p->opt[posMem]) + p->opt[posMem].posPrev = posMem - 1; + if (p->opt[cur].prev2) + { + p->opt[posMem - 1].prev1IsChar = false; + p->opt[posMem - 1].posPrev = p->opt[cur].posPrev2; + p->opt[posMem - 1].backPrev = p->opt[cur].backPrev2; + } + } + { + uint32_t posPrev = posMem; + uint32_t backCur = backMem; + + backMem = p->opt[posPrev].backPrev; + posMem = p->opt[posPrev].posPrev; + + p->opt[posPrev].backPrev = backCur; + p->opt[posPrev].posPrev = cur; + cur = posPrev; + } + } + while (cur != 0); + *backRes = p->opt[0].backPrev; + p->optimumCurrentIndex = p->opt[0].posPrev; + return p->optimumCurrentIndex; +} + +#define LIT_PROBS(pos, prevByte) (p->litProbs + ((((pos) & p->lpMask) << p->lc) + ((prevByte) >> (8 - p->lc))) * 0x300) + +static uint32_t GetOptimum(CLzmaEnc *p, uint32_t position, uint32_t *backRes) +{ + uint32_t numAvail, mainLen, numPairs, repMaxIndex, i, posState, lenEnd, len, cur; + uint32_t matchPrice, repMatchPrice, normalMatchPrice; + uint32_t reps[LZMA_NUM_REPS], repLens[LZMA_NUM_REPS]; + uint32_t *matches; + const uint8_t *data; + uint8_t curByte, matchByte; + if (p->optimumEndIndex != p->optimumCurrentIndex) + { + const COptimal *opt = &p->opt[p->optimumCurrentIndex]; + uint32_t lenRes = opt->posPrev - p->optimumCurrentIndex; + *backRes = opt->backPrev; + p->optimumCurrentIndex = opt->posPrev; + return lenRes; + } + p->optimumCurrentIndex = p->optimumEndIndex = 0; + + if (p->additionalOffset == 0) + mainLen = ReadMatchDistances(p, &numPairs); + else + { + mainLen = p->longestMatchLength; + numPairs = p->numPairs; + } + + numAvail = p->numAvail; + if (numAvail < 2) + { + *backRes = (uint32_t)(-1); + return 1; + } + if (numAvail > LZMA_MATCH_LEN_MAX) + numAvail = LZMA_MATCH_LEN_MAX; + + data = Mf_GetPointerToCurrentPos(&p->matchFinderBase) - 1; + repMaxIndex = 0; + for (i = 0; i < LZMA_NUM_REPS; i++) + { + uint32_t lenTest; + const uint8_t *data2; + reps[i] = p->reps[i]; + data2 = data - (reps[i] + 1); + if (data[0] != data2[0] || data[1] != data2[1]) + { + repLens[i] = 0; + continue; + } + for (lenTest = 2; lenTest < numAvail && data[lenTest] == data2[lenTest]; lenTest++) ; + repLens[i] = lenTest; + if (lenTest > repLens[repMaxIndex]) + repMaxIndex = i; + } + if (repLens[repMaxIndex] >= p->numFastBytes) + { + uint32_t lenRes; + *backRes = repMaxIndex; + lenRes = repLens[repMaxIndex]; + MovePos(p, lenRes - 1); + return lenRes; + } + + matches = p->matches; + if (mainLen >= p->numFastBytes) + { + *backRes = matches[numPairs - 1] + LZMA_NUM_REPS; + MovePos(p, mainLen - 1); + return mainLen; + } + curByte = *data; + matchByte = *(data - (reps[0] + 1)); + + if (mainLen < 2 && curByte != matchByte && repLens[repMaxIndex] < 2) + { + *backRes = (uint32_t)-1; + return 1; + } + + p->opt[0].state = p->state; + + posState = (position & p->pbMask); + + { + const int *probs = LIT_PROBS(position, *(data - 1)); + p->opt[1].price = GET_PRICE_0(p->isMatch[p->state][posState]) + + (!IsCharState(p->state) ? + LitEnc_GetPriceMatched(probs, curByte, matchByte, p->ProbPrices) : + LitEnc_GetPrice(probs, curByte, p->ProbPrices)); + } + + MakeAsChar(&p->opt[1]); + + matchPrice = GET_PRICE_1(p->isMatch[p->state][posState]); + repMatchPrice = matchPrice + GET_PRICE_1(p->isRep[p->state]); + + if (matchByte == curByte) + { + uint32_t shortRepPrice = repMatchPrice + GetRepLen1Price(p, p->state, posState); + if (shortRepPrice < p->opt[1].price) + { + p->opt[1].price = shortRepPrice; + MakeAsShortRep(&p->opt[1]); + } + } + lenEnd = ((mainLen >= repLens[repMaxIndex]) ? mainLen : repLens[repMaxIndex]); + + if (lenEnd < 2) + { + *backRes = p->opt[1].backPrev; + return 1; + } + + p->opt[1].posPrev = 0; + for (i = 0; i < LZMA_NUM_REPS; i++) + p->opt[0].backs[i] = reps[i]; + + len = lenEnd; + do + p->opt[len--].price = kInfinityPrice; + while (len >= 2); + + for (i = 0; i < LZMA_NUM_REPS; i++) + { + uint32_t repLen = repLens[i]; + uint32_t price; + if (repLen < 2) + continue; + price = repMatchPrice + GetPureRepPrice(p, i, p->state, posState); + do + { + uint32_t curAndLenPrice = price + p->repLenEnc.prices[posState][repLen - 2]; + COptimal *opt = &p->opt[repLen]; + if (curAndLenPrice < opt->price) + { + opt->price = curAndLenPrice; + opt->posPrev = 0; + opt->backPrev = i; + opt->prev1IsChar = false; + } + } + while (--repLen >= 2); + } + + normalMatchPrice = matchPrice + GET_PRICE_0(p->isRep[p->state]); + + len = ((repLens[0] >= 2) ? repLens[0] + 1 : 2); + if (len <= mainLen) + { + uint32_t offs = 0; + while (len > matches[offs]) + offs += 2; + for (; ; len++) + { + COptimal *opt; + uint32_t distance = matches[offs + 1]; + + uint32_t curAndLenPrice = normalMatchPrice + p->lenEnc.prices[posState][len - LZMA_MATCH_LEN_MIN]; + uint32_t lenToPosState = GetLenToPosState(len); + if (distance < kNumFullDistances) + curAndLenPrice += p->distancesPrices[lenToPosState][distance]; + else + { + uint32_t slot; + GetPosSlot2(distance, slot); + curAndLenPrice += p->alignPrices[distance & kAlignMask] + p->posSlotPrices[lenToPosState][slot]; + } + opt = &p->opt[len]; + if (curAndLenPrice < opt->price) + { + opt->price = curAndLenPrice; + opt->posPrev = 0; + opt->backPrev = distance + LZMA_NUM_REPS; + opt->prev1IsChar = false; + } + if (len == matches[offs]) + { + offs += 2; + if (offs == numPairs) + break; + } + } + } + + cur = 0; + + #ifdef SHOW_STAT2 + if (position >= 0) + { + unsigned i; + printf("\n pos = %4X", position); + for (i = cur; i <= lenEnd; i++) + printf("\nprice[%4X] = %d", position - cur + i, p->opt[i].price); + } + #endif + + for (;;) + { + uint32_t numAvailFull, newLen, numPairs, posPrev, state, posState, startLen; + uint32_t curPrice, curAnd1Price, matchPrice, repMatchPrice; + bool nextIsChar; + uint8_t curByte, matchByte; + const uint8_t *data; + COptimal *curOpt; + COptimal *nextOpt; + + cur++; + if (cur == lenEnd) + return Backward(p, backRes, cur); + + newLen = ReadMatchDistances(p, &numPairs); + if (newLen >= p->numFastBytes) + { + p->numPairs = numPairs; + p->longestMatchLength = newLen; + return Backward(p, backRes, cur); + } + position++; + curOpt = &p->opt[cur]; + posPrev = curOpt->posPrev; + if (curOpt->prev1IsChar) + { + posPrev--; + if (curOpt->prev2) + { + state = p->opt[curOpt->posPrev2].state; + if (curOpt->backPrev2 < LZMA_NUM_REPS) + state = kRepNextStates[state]; + else + state = kMatchNextStates[state]; + } + else + state = p->opt[posPrev].state; + state = kLiteralNextStates[state]; + } + else + state = p->opt[posPrev].state; + if (posPrev == cur - 1) + { + if (IsShortRep(curOpt)) + state = kShortRepNextStates[state]; + else + state = kLiteralNextStates[state]; + } + else + { + uint32_t pos; + const COptimal *prevOpt; + if (curOpt->prev1IsChar && curOpt->prev2) + { + posPrev = curOpt->posPrev2; + pos = curOpt->backPrev2; + state = kRepNextStates[state]; + } + else + { + pos = curOpt->backPrev; + if (pos < LZMA_NUM_REPS) + state = kRepNextStates[state]; + else + state = kMatchNextStates[state]; + } + prevOpt = &p->opt[posPrev]; + if (pos < LZMA_NUM_REPS) + { + uint32_t i; + reps[0] = prevOpt->backs[pos]; + for (i = 1; i <= pos; i++) + reps[i] = prevOpt->backs[i - 1]; + for (; i < LZMA_NUM_REPS; i++) + reps[i] = prevOpt->backs[i]; + } + else + { + uint32_t i; + reps[0] = (pos - LZMA_NUM_REPS); + for (i = 1; i < LZMA_NUM_REPS; i++) + reps[i] = prevOpt->backs[i - 1]; + } + } + curOpt->state = state; + + curOpt->backs[0] = reps[0]; + curOpt->backs[1] = reps[1]; + curOpt->backs[2] = reps[2]; + curOpt->backs[3] = reps[3]; + + curPrice = curOpt->price; + nextIsChar = false; + data = Mf_GetPointerToCurrentPos(&p->matchFinderBase) - 1; + curByte = *data; + matchByte = *(data - (reps[0] + 1)); + + posState = (position & p->pbMask); + + curAnd1Price = curPrice + GET_PRICE_0(p->isMatch[state][posState]); + { + const int *probs = LIT_PROBS(position, *(data - 1)); + curAnd1Price += + (!IsCharState(state) ? + LitEnc_GetPriceMatched(probs, curByte, matchByte, p->ProbPrices) : + LitEnc_GetPrice(probs, curByte, p->ProbPrices)); + } + + nextOpt = &p->opt[cur + 1]; + + if (curAnd1Price < nextOpt->price) + { + nextOpt->price = curAnd1Price; + nextOpt->posPrev = cur; + MakeAsChar(nextOpt); + nextIsChar = true; + } + + matchPrice = curPrice + GET_PRICE_1(p->isMatch[state][posState]); + repMatchPrice = matchPrice + GET_PRICE_1(p->isRep[state]); + + if (matchByte == curByte && !(nextOpt->posPrev < cur && nextOpt->backPrev == 0)) + { + uint32_t shortRepPrice = repMatchPrice + GetRepLen1Price(p, state, posState); + if (shortRepPrice <= nextOpt->price) + { + nextOpt->price = shortRepPrice; + nextOpt->posPrev = cur; + MakeAsShortRep(nextOpt); + nextIsChar = true; + } + } + numAvailFull = p->numAvail; + { + uint32_t temp = kNumOpts - 1 - cur; + if (temp < numAvailFull) + numAvailFull = temp; + } + + if (numAvailFull < 2) + continue; + numAvail = (numAvailFull <= p->numFastBytes ? numAvailFull : p->numFastBytes); + + if (!nextIsChar && matchByte != curByte) /* speed optimization */ + { + /* try Literal + rep0 */ + uint32_t temp; + uint32_t lenTest2; + const uint8_t *data2 = data - (reps[0] + 1); + uint32_t limit = p->numFastBytes + 1; + if (limit > numAvailFull) + limit = numAvailFull; + + for (temp = 1; temp < limit && data[temp] == data2[temp]; temp++) ; + lenTest2 = temp - 1; + if (lenTest2 >= 2) + { + State state2 = kLiteralNextStates[state]; + uint32_t posStateNext = (position + 1) & p->pbMask; + uint32_t nextRepMatchPrice = curAnd1Price + + GET_PRICE_1(p->isMatch[state2][posStateNext]) + + GET_PRICE_1(p->isRep[state2]); + /* for (; lenTest2 >= 2; lenTest2--) */ + { + uint32_t curAndLenPrice; + COptimal *opt; + uint32_t offset = cur + 1 + lenTest2; + while (lenEnd < offset) + p->opt[++lenEnd].price = kInfinityPrice; + curAndLenPrice = nextRepMatchPrice + GetRepPrice(p, 0, lenTest2, state2, posStateNext); + opt = &p->opt[offset]; + if (curAndLenPrice < opt->price) + { + opt->price = curAndLenPrice; + opt->posPrev = cur + 1; + opt->backPrev = 0; + opt->prev1IsChar = true; + opt->prev2 = false; + } + } + } + } + + startLen = 2; /* speed optimization */ + { + uint32_t repIndex; + for (repIndex = 0; repIndex < LZMA_NUM_REPS; repIndex++) + { + uint32_t lenTest; + uint32_t lenTestTemp; + uint32_t price; + const uint8_t *data2 = data - (reps[repIndex] + 1); + if (data[0] != data2[0] || data[1] != data2[1]) + continue; + for (lenTest = 2; lenTest < numAvail && data[lenTest] == data2[lenTest]; lenTest++) ; + while (lenEnd < cur + lenTest) + p->opt[++lenEnd].price = kInfinityPrice; + lenTestTemp = lenTest; + price = repMatchPrice + GetPureRepPrice(p, repIndex, state, posState); + do + { + uint32_t curAndLenPrice = price + p->repLenEnc.prices[posState][lenTest - 2]; + COptimal *opt = &p->opt[cur + lenTest]; + if (curAndLenPrice < opt->price) + { + opt->price = curAndLenPrice; + opt->posPrev = cur; + opt->backPrev = repIndex; + opt->prev1IsChar = false; + } + } + while (--lenTest >= 2); + lenTest = lenTestTemp; + + if (repIndex == 0) + startLen = lenTest + 1; + + /* if (_maxMode) */ + { + uint32_t lenTest2 = lenTest + 1; + uint32_t limit = lenTest2 + p->numFastBytes; + uint32_t nextRepMatchPrice; + if (limit > numAvailFull) + limit = numAvailFull; + for (; lenTest2 < limit && data[lenTest2] == data2[lenTest2]; lenTest2++) ; + lenTest2 -= lenTest + 1; + if (lenTest2 >= 2) + { + State state2 = kRepNextStates[state]; + uint32_t posStateNext = (position + lenTest) & p->pbMask; + uint32_t curAndLenCharPrice = + price + p->repLenEnc.prices[posState][lenTest - 2] + + GET_PRICE_0(p->isMatch[state2][posStateNext]) + + LitEnc_GetPriceMatched(LIT_PROBS(position + lenTest, data[lenTest - 1]), + data[lenTest], data2[lenTest], p->ProbPrices); + state2 = kLiteralNextStates[state2]; + posStateNext = (position + lenTest + 1) & p->pbMask; + nextRepMatchPrice = curAndLenCharPrice + + GET_PRICE_1(p->isMatch[state2][posStateNext]) + + GET_PRICE_1(p->isRep[state2]); + + /* for (; lenTest2 >= 2; lenTest2--) */ + { + uint32_t curAndLenPrice; + COptimal *opt; + uint32_t offset = cur + lenTest + 1 + lenTest2; + while (lenEnd < offset) + p->opt[++lenEnd].price = kInfinityPrice; + curAndLenPrice = nextRepMatchPrice + GetRepPrice(p, 0, lenTest2, state2, posStateNext); + opt = &p->opt[offset]; + if (curAndLenPrice < opt->price) + { + opt->price = curAndLenPrice; + opt->posPrev = cur + lenTest + 1; + opt->backPrev = 0; + opt->prev1IsChar = true; + opt->prev2 = true; + opt->posPrev2 = cur; + opt->backPrev2 = repIndex; + } + } + } + } + } + } + /* for (uint32_t lenTest = 2; lenTest <= newLen; lenTest++) */ + if (newLen > numAvail) + { + newLen = numAvail; + for (numPairs = 0; newLen > matches[numPairs]; numPairs += 2) ; + matches[numPairs] = newLen; + numPairs += 2; + } + if (newLen >= startLen) + { + uint32_t normalMatchPrice = matchPrice + GET_PRICE_0(p->isRep[state]); + uint32_t offs, curBack, posSlot; + uint32_t lenTest; + while (lenEnd < cur + newLen) + p->opt[++lenEnd].price = kInfinityPrice; + + offs = 0; + while (startLen > matches[offs]) + offs += 2; + curBack = matches[offs + 1]; + GetPosSlot2(curBack, posSlot); + for (lenTest = /*2*/ startLen; ; lenTest++) + { + uint32_t curAndLenPrice = normalMatchPrice + p->lenEnc.prices[posState][lenTest - LZMA_MATCH_LEN_MIN]; + uint32_t lenToPosState = GetLenToPosState(lenTest); + COptimal *opt; + if (curBack < kNumFullDistances) + curAndLenPrice += p->distancesPrices[lenToPosState][curBack]; + else + curAndLenPrice += p->posSlotPrices[lenToPosState][posSlot] + p->alignPrices[curBack & kAlignMask]; + + opt = &p->opt[cur + lenTest]; + if (curAndLenPrice < opt->price) + { + opt->price = curAndLenPrice; + opt->posPrev = cur; + opt->backPrev = curBack + LZMA_NUM_REPS; + opt->prev1IsChar = false; + } + + if (/*_maxMode && */lenTest == matches[offs]) + { + /* Try Match + Literal + Rep0 */ + const uint8_t *data2 = data - (curBack + 1); + uint32_t lenTest2 = lenTest + 1; + uint32_t limit = lenTest2 + p->numFastBytes; + uint32_t nextRepMatchPrice; + if (limit > numAvailFull) + limit = numAvailFull; + for (; lenTest2 < limit && data[lenTest2] == data2[lenTest2]; lenTest2++) ; + lenTest2 -= lenTest + 1; + if (lenTest2 >= 2) + { + State state2 = kMatchNextStates[state]; + uint32_t posStateNext = (position + lenTest) & p->pbMask; + uint32_t curAndLenCharPrice = curAndLenPrice + + GET_PRICE_0(p->isMatch[state2][posStateNext]) + + LitEnc_GetPriceMatched(LIT_PROBS(position + lenTest, data[lenTest - 1]), + data[lenTest], data2[lenTest], p->ProbPrices); + state2 = kLiteralNextStates[state2]; + posStateNext = (posStateNext + 1) & p->pbMask; + nextRepMatchPrice = curAndLenCharPrice + + GET_PRICE_1(p->isMatch[state2][posStateNext]) + + GET_PRICE_1(p->isRep[state2]); + + /* for (; lenTest2 >= 2; lenTest2--) */ + { + uint32_t offset = cur + lenTest + 1 + lenTest2; + uint32_t curAndLenPrice; + COptimal *opt; + while (lenEnd < offset) + p->opt[++lenEnd].price = kInfinityPrice; + curAndLenPrice = nextRepMatchPrice + GetRepPrice(p, 0, lenTest2, state2, posStateNext); + opt = &p->opt[offset]; + if (curAndLenPrice < opt->price) + { + opt->price = curAndLenPrice; + opt->posPrev = cur + lenTest + 1; + opt->backPrev = 0; + opt->prev1IsChar = true; + opt->prev2 = true; + opt->posPrev2 = cur; + opt->backPrev2 = curBack + LZMA_NUM_REPS; + } + } + } + offs += 2; + if (offs == numPairs) + break; + curBack = matches[offs + 1]; + if (curBack >= kNumFullDistances) + GetPosSlot2(curBack, posSlot); + } + } + } + } +} + +#define ChangePair(smallDist, bigDist) (((bigDist) >> 7) > (smallDist)) + +static uint32_t GetOptimumFast(CLzmaEnc *p, uint32_t *backRes) +{ + uint32_t numAvail, mainLen, mainDist, numPairs, repIndex, repLen, i; + const uint8_t *data; + const uint32_t *matches; + + if (p->additionalOffset == 0) + mainLen = ReadMatchDistances(p, &numPairs); + else + { + mainLen = p->longestMatchLength; + numPairs = p->numPairs; + } + + numAvail = p->numAvail; + *backRes = (uint32_t)-1; + if (numAvail < 2) + return 1; + if (numAvail > LZMA_MATCH_LEN_MAX) + numAvail = LZMA_MATCH_LEN_MAX; + data = Mf_GetPointerToCurrentPos(&p->matchFinderBase) - 1; + + repLen = repIndex = 0; + for (i = 0; i < LZMA_NUM_REPS; i++) + { + uint32_t len; + const uint8_t *data2 = data - (p->reps[i] + 1); + if (data[0] != data2[0] || data[1] != data2[1]) + continue; + for (len = 2; len < numAvail && data[len] == data2[len]; len++) ; + if (len >= p->numFastBytes) + { + *backRes = i; + MovePos(p, len - 1); + return len; + } + if (len > repLen) + { + repIndex = i; + repLen = len; + } + } + + matches = p->matches; + if (mainLen >= p->numFastBytes) + { + *backRes = matches[numPairs - 1] + LZMA_NUM_REPS; + MovePos(p, mainLen - 1); + return mainLen; + } + + mainDist = 0; /* for GCC */ + if (mainLen >= 2) + { + mainDist = matches[numPairs - 1]; + while (numPairs > 2 && mainLen == matches[numPairs - 4] + 1) + { + if (!ChangePair(matches[numPairs - 3], mainDist)) + break; + numPairs -= 2; + mainLen = matches[numPairs - 2]; + mainDist = matches[numPairs - 1]; + } + if (mainLen == 2 && mainDist >= 0x80) + mainLen = 1; + } + + if (repLen >= 2 && ( + (repLen + 1 >= mainLen) || + (repLen + 2 >= mainLen && mainDist >= (1 << 9)) || + (repLen + 3 >= mainLen && mainDist >= (1 << 15)))) + { + *backRes = repIndex; + MovePos(p, repLen - 1); + return repLen; + } + + if (mainLen < 2 || numAvail <= 2) + return 1; + + p->longestMatchLength = ReadMatchDistances(p, &p->numPairs); + if (p->longestMatchLength >= 2) + { + uint32_t newDistance = matches[p->numPairs - 1]; + if ((p->longestMatchLength >= mainLen && newDistance < mainDist) || + (p->longestMatchLength == mainLen + 1 && !ChangePair(mainDist, newDistance)) || + (p->longestMatchLength > mainLen + 1) || + (p->longestMatchLength + 1 >= mainLen && mainLen >= 3 && ChangePair(newDistance, mainDist))) + return 1; + } + + data = Mf_GetPointerToCurrentPos(&p->matchFinderBase) - 1; + for (i = 0; i < LZMA_NUM_REPS; i++) + { + uint32_t len, limit; + const uint8_t *data2 = data - (p->reps[i] + 1); + if (data[0] != data2[0] || data[1] != data2[1]) + continue; + limit = mainLen - 1; + for (len = 2; len < limit && data[len] == data2[len]; len++) ; + if (len >= limit) + return 1; + } + *backRes = mainDist + LZMA_NUM_REPS; + MovePos(p, mainLen - 2); + return mainLen; +} + +static void LZe_full_flush(CLzmaEnc *p, uint32_t posState) + { + const uint32_t len = LZMA_MATCH_LEN_MIN; + File_trailer trailer; + RangeEnc_EncodeBit(&p->rc, &p->isMatch[p->state][posState], 1); + RangeEnc_EncodeBit(&p->rc, &p->isRep[p->state], 0); + p->state = kMatchNextStates[p->state]; + LenEnc_Encode2(&p->lenEnc, &p->rc, len - LZMA_MATCH_LEN_MIN, posState, !p->fastMode, p->ProbPrices); + RcTree_Encode(&p->rc, p->posSlotEncoder[GetLenToPosState(len)], kNumPosSlotBits, (1 << kNumPosSlotBits) - 1); + RangeEnc_EncodeDirectBits(&p->rc, (((uint32_t)1 << 30) - 1) >> kNumAlignBits, 30 - kNumAlignBits); + RcTree_ReverseEncode(&p->rc, p->posAlignEncoder, kNumAlignBits, kAlignMask); + RangeEnc_FlushData(&p->rc); + RangeEnc_FlushStream(&p->rc); + Ft_set_data_crc( trailer, p->matchFinderBase.crc ^ 0xFFFFFFFFU ); + Ft_set_data_size( trailer, p->nowPos64 ); + Ft_set_member_size( trailer, p->rc.processed + Fh_size + Ft_size ); + if( writeblock( p->rc.outfd, trailer, Ft_size ) != Ft_size ) + p->rc.res = SZ_ERROR_WRITE; + if( verbosity >= 1 ) + { + unsigned long long in_size = p->nowPos64; + unsigned long long out_size = p->rc.processed + Fh_size + Ft_size; + if( in_size <= 0 || out_size <= 0 ) + fprintf( stderr, " no data compressed.\n" ); + else + fprintf( stderr, "%6.3f:1, %6.3f bits/byte, " + "%5.2f%% saved, %llu in, %llu out.\n", + (double)in_size / out_size, + ( 8.0 * out_size ) / in_size, + 100.0 * ( 1.0 - ( (double)out_size / in_size ) ), + in_size, out_size ); + } + } + +static int CheckErrors(CLzmaEnc *p) +{ + if (p->result != SZ_OK) + return p->result; + if (p->rc.res != SZ_OK) + p->result = SZ_ERROR_WRITE; + if (p->matchFinderBase.result != SZ_OK) + p->result = SZ_ERROR_READ; + if (p->result != SZ_OK) + p->finished = true; + return p->result; +} + +static int Flush(CLzmaEnc *p, uint32_t nowPos) +{ + /* ReleaseMFStream(); */ + p->finished = true; + LZe_full_flush(p, nowPos & p->pbMask); + return CheckErrors(p); +} + +static void FillAlignPrices(CLzmaEnc *p) +{ + uint32_t i; + for (i = 0; i < kAlignTableSize; i++) + p->alignPrices[i] = RcTree_ReverseGetPrice(p->posAlignEncoder, kNumAlignBits, i, p->ProbPrices); + p->alignPriceCount = 0; +} + +static void FillDistancesPrices(CLzmaEnc *p) +{ + uint32_t tempPrices[kNumFullDistances]; + uint32_t i, lenToPosState; + for (i = kStartPosModelIndex; i < kNumFullDistances; i++) + { + uint32_t posSlot = GetPosSlot1(i); + uint32_t footerBits = ((posSlot >> 1) - 1); + uint32_t base = ((2 | (posSlot & 1)) << footerBits); + tempPrices[i] = RcTree_ReverseGetPrice(p->posEncoders + base - posSlot - 1, footerBits, i - base, p->ProbPrices); + } + + for (lenToPosState = 0; lenToPosState < kNumLenToPosStates; lenToPosState++) + { + uint32_t posSlot; + const int *encoder = p->posSlotEncoder[lenToPosState]; + uint32_t *posSlotPrices = p->posSlotPrices[lenToPosState]; + for (posSlot = 0; posSlot < p->distTableSize; posSlot++) + posSlotPrices[posSlot] = RcTree_GetPrice(encoder, kNumPosSlotBits, posSlot, p->ProbPrices); + for (posSlot = kEndPosModelIndex; posSlot < p->distTableSize; posSlot++) + posSlotPrices[posSlot] += ((((posSlot >> 1) - 1) - kNumAlignBits) << kNumBitPriceShiftBits); + + { + uint32_t *distancesPrices = p->distancesPrices[lenToPosState]; + uint32_t i; + for (i = 0; i < kStartPosModelIndex; i++) + distancesPrices[i] = posSlotPrices[i]; + for (; i < kNumFullDistances; i++) + distancesPrices[i] = posSlotPrices[GetPosSlot1(i)] + tempPrices[i]; + } + } + p->matchPriceCount = 0; +} + + +static int LzmaEnc_CodeOneBlock(CLzmaEnc *p) +{ + uint32_t nowPos32, startPos32; + + if (p->finished) + return p->result; + if( CheckErrors(p) != 0 ) return p->result; + + nowPos32 = (uint32_t)p->nowPos64; + startPos32 = nowPos32; + + if (p->nowPos64 == 0) + { + uint32_t numPairs; + uint8_t curByte; + if (Mf_GetNumAvailableBytes(&p->matchFinderBase) == 0) + return Flush(p, nowPos32); + ReadMatchDistances(p, &numPairs); + RangeEnc_EncodeBit(&p->rc, &p->isMatch[p->state][0], 0); + p->state = kLiteralNextStates[p->state]; + curByte = Mf_GetIndexByte(&p->matchFinderBase, 0 - p->additionalOffset); + LitEnc_Encode(&p->rc, p->litProbs, curByte); + p->additionalOffset--; + nowPos32++; + } + + if (Mf_GetNumAvailableBytes(&p->matchFinderBase) != 0) + for (;;) + { + uint32_t pos, len, posState; + + if (p->fastMode) + len = GetOptimumFast(p, &pos); + else + len = GetOptimum(p, nowPos32, &pos); + + #ifdef SHOW_STAT2 + printf("\n pos = %4X, len = %d pos = %d", nowPos32, len, pos); + #endif + + posState = nowPos32 & p->pbMask; + if (len == 1 && pos == (uint32_t)-1) + { + uint8_t curByte; + int *probs; + const uint8_t *data; + + RangeEnc_EncodeBit(&p->rc, &p->isMatch[p->state][posState], 0); + data = Mf_GetPointerToCurrentPos(&p->matchFinderBase) - p->additionalOffset; + curByte = *data; + probs = LIT_PROBS(nowPos32, *(data - 1)); + if (IsCharState(p->state)) + LitEnc_Encode(&p->rc, probs, curByte); + else + LitEnc_EncodeMatched(&p->rc, probs, curByte, *(data - p->reps[0] - 1)); + p->state = kLiteralNextStates[p->state]; + } + else + { + RangeEnc_EncodeBit(&p->rc, &p->isMatch[p->state][posState], 1); + if (pos < LZMA_NUM_REPS) + { + RangeEnc_EncodeBit(&p->rc, &p->isRep[p->state], 1); + if (pos == 0) + { + RangeEnc_EncodeBit(&p->rc, &p->isRepG0[p->state], 0); + RangeEnc_EncodeBit(&p->rc, &p->isRep0Long[p->state][posState], ((len == 1) ? 0 : 1)); + } + else + { + uint32_t distance = p->reps[pos]; + RangeEnc_EncodeBit(&p->rc, &p->isRepG0[p->state], 1); + if (pos == 1) + RangeEnc_EncodeBit(&p->rc, &p->isRepG1[p->state], 0); + else + { + RangeEnc_EncodeBit(&p->rc, &p->isRepG1[p->state], 1); + RangeEnc_EncodeBit(&p->rc, &p->isRepG2[p->state], pos - 2); + if (pos == 3) + p->reps[3] = p->reps[2]; + p->reps[2] = p->reps[1]; + } + p->reps[1] = p->reps[0]; + p->reps[0] = distance; + } + if (len == 1) + p->state = kShortRepNextStates[p->state]; + else + { + LenEnc_Encode2(&p->repLenEnc, &p->rc, len - LZMA_MATCH_LEN_MIN, posState, !p->fastMode, p->ProbPrices); + p->state = kRepNextStates[p->state]; + } + } + else + { + uint32_t posSlot; + RangeEnc_EncodeBit(&p->rc, &p->isRep[p->state], 0); + p->state = kMatchNextStates[p->state]; + LenEnc_Encode2(&p->lenEnc, &p->rc, len - LZMA_MATCH_LEN_MIN, posState, !p->fastMode, p->ProbPrices); + pos -= LZMA_NUM_REPS; + GetPosSlot(pos, posSlot); + RcTree_Encode(&p->rc, p->posSlotEncoder[GetLenToPosState(len)], kNumPosSlotBits, posSlot); + + if (posSlot >= kStartPosModelIndex) + { + uint32_t footerBits = ((posSlot >> 1) - 1); + uint32_t base = ((2 | (posSlot & 1)) << footerBits); + uint32_t posReduced = pos - base; + + if (posSlot < kEndPosModelIndex) + RcTree_ReverseEncode(&p->rc, p->posEncoders + base - posSlot - 1, footerBits, posReduced); + else + { + RangeEnc_EncodeDirectBits(&p->rc, posReduced >> kNumAlignBits, footerBits - kNumAlignBits); + RcTree_ReverseEncode(&p->rc, p->posAlignEncoder, kNumAlignBits, posReduced & kAlignMask); + p->alignPriceCount++; + } + } + p->reps[3] = p->reps[2]; + p->reps[2] = p->reps[1]; + p->reps[1] = p->reps[0]; + p->reps[0] = pos; + p->matchPriceCount++; + } + } + p->additionalOffset -= len; + nowPos32 += len; + if (p->additionalOffset == 0) + { + uint32_t processed; + if (!p->fastMode) + { + if (p->matchPriceCount >= (1 << 7)) + FillDistancesPrices(p); + if (p->alignPriceCount >= kAlignTableSize) + FillAlignPrices(p); + } + if (Mf_GetNumAvailableBytes(&p->matchFinderBase) == 0) + break; + processed = nowPos32 - startPos32; + if (processed >= (1 << 15)) + { + p->nowPos64 += nowPos32 - startPos32; + return CheckErrors(p); + } + } + } + p->nowPos64 += nowPos32 - startPos32; + return Flush(p, nowPos32); +} + + +CLzmaEncHandle LzmaEnc_Init( const int dict_size, const int match_len_limit, + const int infd, const int outfd ) + { + int i; + const uint32_t beforeSize = kNumOpts; + CLzmaEnc * const p = (CLzmaEnc *)malloc(sizeof(CLzmaEnc)); + if( !p ) return 0; + + p->nowPos64 = 0; + p->dictSize = dict_size; + p->numFastBytes = match_len_limit; + p->lc = literal_context_bits; + p->lp = 0; + p->pb = pos_state_bits; + p->optimumEndIndex = 0; + p->optimumCurrentIndex = 0; + p->additionalOffset = 0; + p->state = 0; + p->result = SZ_OK; + p->fastMode = false; + p->finished = false; + + if (!Mf_Init(&p->matchFinderBase, infd, 16 + ( match_len_limit / 2 ), p->dictSize, beforeSize, p->numFastBytes, LZMA_MATCH_LEN_MAX)) + { free( p ); return 0; } + Mf_CreateVTable(&p->matchFinderBase, &p->matchFinder); + + LzmaEnc_FastPosInit(p->g_FastPos); + LzmaEnc_InitPriceTables(p->ProbPrices); + for (i = 0; i < kDicLogSizeMaxCompress; i++) + if (p->dictSize <= ((uint32_t)1 << i)) + break; + p->distTableSize = i * 2; + if( !RangeEnc_Init( &p->rc, outfd ) ) { free( p ); return 0; } + p->litProbs = (int *)malloc((0x300 << (p->lc + p->lp)) * sizeof(int)); + if( !p->litProbs ) { free( p ); return 0; } + + for (i = 0 ; i < LZMA_NUM_REPS; i++) + p->reps[i] = 0; + for (i = 0; i < kNumStates; i++) + { + int j; + for (j = 0; j < LZMA_NUM_PB_STATES_MAX; j++) + { + p->isMatch[i][j] = kProbInitValue; + p->isRep0Long[i][j] = kProbInitValue; + } + p->isRep[i] = kProbInitValue; + p->isRepG0[i] = kProbInitValue; + p->isRepG1[i] = kProbInitValue; + p->isRepG2[i] = kProbInitValue; + } + { + const int num = 0x300 << (p->lp + p->lc); + for (i = 0; i < num; i++) + p->litProbs[i] = kProbInitValue; + } + for (i = 0; i < kNumLenToPosStates; i++) + { + int *probs = p->posSlotEncoder[i]; + uint32_t j; + for (j = 0; j < (1 << kNumPosSlotBits); j++) + probs[j] = kProbInitValue; + } + for (i = 0; i < kNumFullDistances - kEndPosModelIndex; i++) + p->posEncoders[i] = kProbInitValue; + LenEnc_Init(&p->lenEnc.p); + LenEnc_Init(&p->repLenEnc.p); + for (i = 0; i < (1 << kNumAlignBits); i++) + p->posAlignEncoder[i] = kProbInitValue; + p->pbMask = (1 << p->pb) - 1; + p->lpMask = (1 << p->lp) - 1; + + if (!p->fastMode) { FillDistancesPrices(p); FillAlignPrices(p); } + p->lenEnc.tableSize = + p->repLenEnc.tableSize = + p->numFastBytes + 1 - LZMA_MATCH_LEN_MIN; + LenPriceEnc_UpdateTables(&p->lenEnc, 1 << p->pb, p->ProbPrices); + LenPriceEnc_UpdateTables(&p->repLenEnc, 1 << p->pb, p->ProbPrices); + return p; + } + + +void LzmaEnc_Free(CLzmaEncHandle pp) +{ + CLzmaEnc *p = (CLzmaEnc *)pp; + Mf_Free(&p->matchFinderBase); + free(p->litProbs); + p->litProbs = 0; + RangeEnc_Free(&p->rc); + free(p); +} + + +int LzmaEnc_Encode(CLzmaEncHandle pp) +{ + int res = SZ_OK; + CLzmaEnc *p = (CLzmaEnc *)pp; + + for (;;) + { + res = LzmaEnc_CodeOneBlock(p); + if( res != SZ_OK || p->finished ) + break; + } + return res; +} diff --git a/LzmaEnc.h b/LzmaEnc.h index 529a8f4..dbc977d 100644 --- a/LzmaEnc.h +++ b/LzmaEnc.h @@ -1,59 +1,18 @@ -/* LzmaEnc.h -- LZMA Encoder -2009-02-07 : Igor Pavlov : Public domain */ - -#ifndef __LZMA_ENC_H -#define __LZMA_ENC_H - -#include "Types.h" - -#ifdef __cplusplus -extern "C" { -#endif - -typedef struct _CLzmaEncProps -{ - int level; /* 0 <= level <= 9 */ - UInt32 dictSize; /* (1 << 12) <= dictSize <= (1 << 27) for 32-bit version - (1 << 12) <= dictSize <= (1 << 30) for 64-bit version - default = (1 << 24) */ - int lc; /* 0 <= lc <= 8, default = 3 */ - int lp; /* 0 <= lp <= 4, default = 0 */ - int pb; /* 0 <= pb <= 4, default = 2 */ - int algo; /* 0 - fast, 1 - normal, default = 1 */ - int fb; /* 5 <= fb <= 273, default = 32 */ - int btMode; /* 0 - hashChain Mode, 1 - binTree mode - normal, default = 1 */ - int numHashBytes; /* 2, 3 or 4, default = 4 */ - UInt32 mc; /* 1 <= mc <= (1 << 30), default = 32 */ - int numThreads; /* 1 or 2, default = 2 */ -} CLzmaEncProps; - -void LzmaEncProps_Init(CLzmaEncProps *p); -void LzmaEncProps_Normalize(CLzmaEncProps *p); -UInt32 LzmaEncProps_GetDictSize(const CLzmaEncProps *props2); - - -/* ---------- CLzmaEncHandle Interface ---------- */ - -/* LzmaEnc_* functions can return the following exit codes: -Returns: - SZ_OK - OK - SZ_ERROR_MEM - Memory allocation error - SZ_ERROR_PARAM - Incorrect paramater in props - SZ_ERROR_WRITE - Write callback error. - SZ_ERROR_PROGRESS - some break from progress callback - SZ_ERROR_THREAD - errors in multithreading functions (only for Mt version) -*/ - -typedef void * CLzmaEncHandle; - -CLzmaEncHandle LzmaEnc_Create(ISzAlloc *alloc); -void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAlloc *alloc, ISzAlloc *allocBig); -void LzmaEnc_SetProps(CLzmaEncHandle p, const CLzmaEncProps *props); -SRes LzmaEnc_Encode(CLzmaEncHandle p, ISeqOutStream *outStream, ISeqInStream *inStream, - ICompressProgress *progress, ISzAlloc *alloc, ISzAlloc *allocBig); - -#ifdef __cplusplus -} -#endif - -#endif +/* LzmaEnc.h -- LZMA Encoder +2009-02-07 : Igor Pavlov : Public domain */ + + +/* ---------- CLzmaEncHandle Interface ---------- */ + +/* LzmaEnc_* functions can return the following exit codes: +Returns: + SZ_OK - OK + SZ_ERROR_WRITE - Write callback error. +*/ + +typedef void * CLzmaEncHandle; + +CLzmaEncHandle LzmaEnc_Init( const int dict_size, const int match_len_limit, + const int infd, const int outfd ); +void LzmaEnc_Free(CLzmaEncHandle p); +int LzmaEnc_Encode(CLzmaEncHandle p); diff --git a/Makefile.in b/Makefile.in index c20e0f2..03ceaee 100644 --- a/Makefile.in +++ b/Makefile.in @@ -6,21 +6,20 @@ INSTALL_DATA = $(INSTALL) -p -m 644 INSTALL_DIR = $(INSTALL) -d -m 755 SHELL = /bin/sh -objs = 7zFile.o 7zStream.o Alloc.o LzFind.o LzmaDec.o LzmaEnc.o \ - pdarg_parser.o main.o +objs = carg_parser.o LzFind.o LzmaEnc.o LzmaDec.o main.o -.PHONY : all install install-info install-man install-strip \ - uninstall uninstall-info uninstall-man \ +.PHONY : all install install-bin install-info install-man install-strip \ + install-as-lzip uninstall uninstall-bin uninstall-info uninstall-man \ doc info man check dist clean distclean all : $(progname) $(progname) : $(objs) - $(CC) $(LDFLAGS) -o $@ $^ + $(CC) $(LDFLAGS) -o $@ $(objs) $(progname)_profiled : $(objs) - $(CC) $(LDFLAGS) -pg -o $@ $^ + $(CC) $(LDFLAGS) -pg -o $@ $(objs) main.o : main.c $(CC) $(CPPFLAGS) $(CFLAGS) -DPROGVERSION=\"$(pkgversion)\" -c -o $@ $< @@ -28,15 +27,12 @@ main.o : main.c %.o : %.c $(CC) $(CPPFLAGS) $(CFLAGS) -c -o $@ $< -$(objs) : Makefile -7zFile.o : 7zFile.h Types.h -7zStream.o : Types.h -Alloc.o : Alloc.h -LzFind.o : LzFind.h LzHash.h Types.h pdlzip.h -LzmaDec.o : LzmaDec.h Types.h -LzmaEnc.o : LzFind.h LzmaEnc.h Types.h pdlzip.h -pdarg_parser.o : pdarg_parser.h -main.o : 7zFile.h Alloc.h LzmaDec.h LzmaEnc.h pdarg_parser.h pdlzip.h +$(objs) : Makefile +carg_parser.o : carg_parser.h +LzmaDec.o : clzip.h LzmaDec.h +LzFind.o : clzip.h LzFind.h +LzmaEnc.o : clzip.h LzFind.h LzmaEnc.h +main.o : carg_parser.h clzip.h LzmaDec.h LzmaEnc.h doc : man @@ -58,14 +54,16 @@ Makefile : $(VPATH)/configure $(VPATH)/Makefile.in check : all @$(VPATH)/testsuite/check.sh $(VPATH)/testsuite $(pkgversion) -install : all install-man +install : install-bin install-man + +install-bin : all if [ ! -d "$(DESTDIR)$(bindir)" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(bindir)" ; fi $(INSTALL_PROGRAM) ./$(progname) "$(DESTDIR)$(bindir)/$(progname)" install-info : if [ ! -d "$(DESTDIR)$(infodir)" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(infodir)" ; fi $(INSTALL_DATA) $(VPATH)/doc/$(pkgname).info "$(DESTDIR)$(infodir)/$(pkgname).info" - -install-info --info-dir="$(DESTDIR)$(infodir)" $(DESTDIR)$(infodir)/$(pkgname).info + -install-info --info-dir="$(DESTDIR)$(infodir)" "$(DESTDIR)$(infodir)/$(pkgname).info" install-man : if [ ! -d "$(DESTDIR)$(mandir)/man1" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(mandir)/man1" ; fi @@ -74,7 +72,13 @@ install-man : install-strip : all $(MAKE) INSTALL_PROGRAM='$(INSTALL_PROGRAM) -s' install -uninstall : uninstall-man +install-as-lzip : install + -rm -f "$(DESTDIR)$(bindir)/lzip" + cd "$(DESTDIR)$(bindir)" && ln -s $(progname) lzip + +uninstall : uninstall-bin uninstall-man + +uninstall-bin : -rm -f "$(DESTDIR)$(bindir)/$(progname)" uninstall-info : @@ -97,8 +101,8 @@ dist : doc $(DISTNAME)/doc/$(progname).1 \ $(DISTNAME)/testsuite/check.sh \ $(DISTNAME)/testsuite/test.txt \ - $(DISTNAME)/testsuite/test.lz \ - $(DISTNAME)/testsuite/test.lzma \ + $(DISTNAME)/testsuite/test.txt.lz \ + $(DISTNAME)/testsuite/test.txt.lzma \ $(DISTNAME)/*.h \ $(DISTNAME)/*.c rm -f $(DISTNAME) diff --git a/NEWS b/NEWS index 13a6da4..fe9a4dd 100644 --- a/NEWS +++ b/NEWS @@ -1,8 +1,18 @@ -Changes in version 1.3: +Changes in version 1.4: -A small change has been made in the "--help" output and man page. +The options "-f, --force", "-F, --recompress", "-k, --keep", and +"-o, --output", have been ported from clzip. -Quote characters in messages have been changed as advised by GNU Coding -Standards. +Pdlzip now accepts more than one file in the command line. -Standard input and standard output are now set in binary mode on OS2. +Decompression time has been reduced by 5%. + +The dependence of "--test" on the existence of "/dev/null" has been +removed. + +Configure option "--datadir" has been renamed to "--datarootdir" to +follow GNU Standards. + +The target "install-as-lzip" has been added to the Makefile. + +The target "install-bin" has been added to the Makefile. diff --git a/README b/README index 7841983..80aab86 100644 --- a/README +++ b/README @@ -6,17 +6,20 @@ gzip or bzip2. Pdlzip decompresses almost as fast as gzip and compresses better than bzip2, which makes it well suited for software distribution and data archiving. +Pdlzip uses the lzip file format; the files produced by pdlzip are +(hope)fully compatible with lzip-1.4 or newer. Pdlzip is in fact a +"public domain" version of the lzip data compressor, intended for those +who can't distribute GPL licensed Free Software. + Pdlzip is also able to decompress legacy lzma-alone (.lzma) files. -Pdlzip is a "public domain" version of the lzip data compressor, -intended for those who can't distribute GPL licensed Free Software. Pdlzip is written in C. -Pdlzip uses public domain compression code from the LZMA SDK written by -Igor Pavlov. +Pdlzip includes public domain compression code from the LZMA SDK written +by Igor Pavlov. -Copyright (C) 2010, 2011, 2012 Antonio Diaz Diaz. +Copyright (C) 2010, 2011, 2012, 2013 Antonio Diaz Diaz. This file is free documentation: you have unlimited permission to copy, distribute and modify it. diff --git a/Types.h b/Types.h deleted file mode 100644 index 5ae7ce1..0000000 --- a/Types.h +++ /dev/null @@ -1,225 +0,0 @@ -/* Types.h -- Basic types -2009-11-23 : Igor Pavlov : Public domain */ - -#ifndef __7Z_TYPES_H -#define __7Z_TYPES_H - -#include - -#ifdef _WIN32 -#include -#endif - -#ifndef EXTERN_C_BEGIN -#ifdef __cplusplus -#define EXTERN_C_BEGIN extern "C" { -#define EXTERN_C_END } -#else -#define EXTERN_C_BEGIN -#define EXTERN_C_END -#endif -#endif - -EXTERN_C_BEGIN - -#define SZ_OK 0 - -#define SZ_ERROR_DATA 1 -#define SZ_ERROR_MEM 2 -#define SZ_ERROR_CRC 3 -#define SZ_ERROR_UNSUPPORTED 4 -#define SZ_ERROR_PARAM 5 -#define SZ_ERROR_INPUT_EOF 6 -#define SZ_ERROR_OUTPUT_EOF 7 -#define SZ_ERROR_READ 8 -#define SZ_ERROR_WRITE 9 -#define SZ_ERROR_PROGRESS 10 -#define SZ_ERROR_FAIL 11 -#define SZ_ERROR_THREAD 12 - -#define SZ_ERROR_ARCHIVE 16 -#define SZ_ERROR_NO_ARCHIVE 17 - -typedef int SRes; - -#ifdef _WIN32 -typedef DWORD WRes; -#else -typedef int WRes; -#endif - -#ifndef RINOK -#define RINOK(x) { int __result__ = (x); if (__result__ != 0) return __result__; } -#endif - -typedef unsigned char Byte; -typedef short Int16; -typedef unsigned short UInt16; - -#ifdef _LZMA_UINT32_IS_ULONG -typedef long Int32; -typedef unsigned long UInt32; -#else -typedef int Int32; -typedef unsigned int UInt32; -#endif - -#ifdef _SZ_NO_INT_64 - -/* define _SZ_NO_INT_64, if your compiler doesn't support 64-bit integers. - NOTES: Some code will work incorrectly in that case! */ - -typedef long Int64; -typedef unsigned long UInt64; - -#else - -#if defined(_MSC_VER) || defined(__BORLANDC__) -typedef __int64 Int64; -typedef unsigned __int64 UInt64; -#else -typedef long long int Int64; -typedef unsigned long long int UInt64; -#endif - -#endif - -#ifdef _LZMA_NO_SYSTEM_SIZE_T -typedef UInt32 SizeT; -#else -typedef size_t SizeT; -#endif - -typedef int Bool; -#define True 1 -#define False 0 - - -#ifdef _WIN32 -#define MY_STD_CALL __stdcall -#else -#define MY_STD_CALL -#endif - -#ifdef _MSC_VER - -#if _MSC_VER >= 1300 -#define MY_NO_INLINE __declspec(noinline) -#else -#define MY_NO_INLINE -#endif - -#define MY_CDECL __cdecl -#define MY_FAST_CALL __fastcall - -#else - -#define MY_CDECL -#define MY_FAST_CALL - -#endif - - -/* The following interfaces use first parameter as pointer to structure */ - -typedef struct -{ - SRes (*Read)(void *p, void *buf, size_t *size); - /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream. - (output(*size) < input(*size)) is allowed */ -} ISeqInStream; - -/* it can return SZ_ERROR_INPUT_EOF */ -SRes SeqInStream_Read(ISeqInStream *stream, void *buf, size_t size); -SRes SeqInStream_Read2(ISeqInStream *stream, void *buf, size_t size, SRes errorType); - -typedef struct -{ - size_t (*Write)(void *p, const void *buf, size_t size); - /* Returns: result - the number of actually written bytes. - (result < size) means error */ -} ISeqOutStream; - -typedef enum -{ - SZ_SEEK_SET = 0, - SZ_SEEK_CUR = 1, - SZ_SEEK_END = 2 -} ESzSeek; - -typedef struct -{ - SRes (*Read)(void *p, void *buf, size_t *size); /* same as ISeqInStream::Read */ - SRes (*Seek)(void *p, Int64 *pos, ESzSeek origin); -} ISeekInStream; - -typedef struct -{ - SRes (*Look)(void *p, void **buf, size_t *size); - /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream. - (output(*size) > input(*size)) is not allowed - (output(*size) < input(*size)) is allowed */ - SRes (*Skip)(void *p, size_t offset); - /* offset must be <= output(*size) of Look */ - - SRes (*Read)(void *p, void *buf, size_t *size); - /* reads directly (without buffer). It's same as ISeqInStream::Read */ - SRes (*Seek)(void *p, Int64 *pos, ESzSeek origin); -} ILookInStream; - -SRes LookInStream_LookRead(ILookInStream *stream, void *buf, size_t *size); -SRes LookInStream_SeekTo(ILookInStream *stream, UInt64 offset); - -/* reads via ILookInStream::Read */ -SRes LookInStream_Read2(ILookInStream *stream, void *buf, size_t size, SRes errorType); -SRes LookInStream_Read(ILookInStream *stream, void *buf, size_t size); - -#define LookToRead_BUF_SIZE (1 << 14) - -typedef struct -{ - ILookInStream s; - ISeekInStream *realStream; - size_t pos; - size_t size; - Byte buf[LookToRead_BUF_SIZE]; -} CLookToRead; - -void LookToRead_CreateVTable(CLookToRead *p, int lookahead); -void LookToRead_Init(CLookToRead *p); - -typedef struct -{ - ISeqInStream s; - ILookInStream *realStream; -} CSecToLook; - -void SecToLook_CreateVTable(CSecToLook *p); - -typedef struct -{ - ISeqInStream s; - ILookInStream *realStream; -} CSecToRead; - -void SecToRead_CreateVTable(CSecToRead *p); - -typedef struct -{ - SRes (*Progress)(void *p, UInt64 inSize, UInt64 outSize); - /* Returns: result. (result != SZ_OK) means break. - Value (UInt64)(Int64)-1 for size means unknown value. */ -} ICompressProgress; - -typedef struct -{ - void *(*Alloc)(void *p, size_t size); - void (*Free)(void *p, void *address); /* address can be 0 */ -} ISzAlloc; - -#define IAlloc_Alloc(p, size) (p)->Alloc((p), size) -#define IAlloc_Free(p, a) (p)->Free((p), a) - -EXTERN_C_END - -#endif diff --git a/carg_parser.c b/carg_parser.c new file mode 100644 index 0000000..30cabb3 --- /dev/null +++ b/carg_parser.c @@ -0,0 +1,277 @@ +/* Pdlzip - Data compressor based on the LZMA algorithm + Copyright (C) 2010, 2011, 2012 Antonio Diaz Diaz. + + This program is free software: you have unlimited permission + to copy, distribute and modify it. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +*/ + +#include +#include + +#include "carg_parser.h" + + +/* assure at least a minimum size for buffer 'buf' */ +static void * ap_resize_buffer( void * buf, const int min_size ) + { + if( buf ) buf = realloc( buf, min_size ); + else buf = malloc( min_size ); + return buf; + } + + +static char push_back_record( struct Arg_parser * const ap, + const int code, const char * const argument ) + { + const int len = strlen( argument ); + struct ap_Record *p; + void * tmp = ap_resize_buffer( ap->data, + ( ap->data_size + 1 ) * sizeof (struct ap_Record) ); + if( !tmp ) return 0; + ap->data = (struct ap_Record *)tmp; + p = &(ap->data[ap->data_size]); + p->code = code; + p->argument = 0; + tmp = ap_resize_buffer( p->argument, len + 1 ); + if( !tmp ) return 0; + p->argument = (char *)tmp; + strncpy( p->argument, argument, len + 1 ); + ++ap->data_size; + return 1; + } + + +static char add_error( struct Arg_parser * const ap, const char * const msg ) + { + const int len = strlen( msg ); + void * tmp = ap_resize_buffer( ap->error, ap->error_size + len + 1 ); + if( !tmp ) return 0; + ap->error = (char *)tmp; + strncpy( ap->error + ap->error_size, msg, len + 1 ); + ap->error_size += len; + return 1; + } + + +static void free_data( struct Arg_parser * const ap ) + { + int i; + for( i = 0; i < ap->data_size; ++i ) free( ap->data[i].argument ); + if( ap->data ) { free( ap->data ); ap->data = 0; } + ap->data_size = 0; + } + + +static char parse_long_option( struct Arg_parser * const ap, + const char * const opt, const char * const arg, + const struct ap_Option options[], + int * const argindp ) + { + unsigned len; + int index = -1; + int i; + char exact = 0, ambig = 0; + + for( len = 0; opt[len+2] && opt[len+2] != '='; ++len ) ; + + /* Test all long options for either exact match or abbreviated matches. */ + for( i = 0; options[i].code != 0; ++i ) + if( options[i].name && !strncmp( options[i].name, &opt[2], len ) ) + { + if( strlen( options[i].name ) == len ) /* Exact match found */ + { index = i; exact = 1; break; } + else if( index < 0 ) index = i; /* First nonexact match found */ + else if( options[index].code != options[i].code || + options[index].has_arg != options[i].has_arg ) + ambig = 1; /* Second or later nonexact match found */ + } + + if( ambig && !exact ) + { + add_error( ap, "option '" ); add_error( ap, opt ); + add_error( ap, "' is ambiguous" ); + return 1; + } + + if( index < 0 ) /* nothing found */ + { + add_error( ap, "unrecognized option '" ); add_error( ap, opt ); + add_error( ap, "'" ); + return 1; + } + + ++*argindp; + + if( opt[len+2] ) /* '--=' syntax */ + { + if( options[index].has_arg == ap_no ) + { + add_error( ap, "option '--" ); add_error( ap, options[index].name ); + add_error( ap, "' doesn't allow an argument" ); + return 1; + } + if( options[index].has_arg == ap_yes && !opt[len+3] ) + { + add_error( ap, "option '--" ); add_error( ap, options[index].name ); + add_error( ap, "' requires an argument" ); + return 1; + } + return push_back_record( ap, options[index].code, &opt[len+3] ); + } + + if( options[index].has_arg == ap_yes ) + { + if( !arg || !arg[0] ) + { + add_error( ap, "option '--" ); add_error( ap, options[index].name ); + add_error( ap, "' requires an argument" ); + return 1; + } + ++*argindp; + return push_back_record( ap, options[index].code, arg ); + } + + return push_back_record( ap, options[index].code, "" ); + } + + +static char parse_short_option( struct Arg_parser * const ap, + const char * const opt, const char * const arg, + const struct ap_Option options[], + int * const argindp ) + { + int cind = 1; /* character index in opt */ + + while( cind > 0 ) + { + int index = -1; + int i; + const unsigned char code = opt[cind]; + char code_str[2]; + code_str[0] = code; code_str[1] = 0; + + if( code != 0 ) + for( i = 0; options[i].code; ++i ) + if( code == options[i].code ) + { index = i; break; } + + if( index < 0 ) + { + add_error( ap, "invalid option -- " ); add_error( ap, code_str ); + return 1; + } + + if( opt[++cind] == 0 ) { ++*argindp; cind = 0; } /* opt finished */ + + if( options[index].has_arg != ap_no && cind > 0 && opt[cind] ) + { + if( !push_back_record( ap, code, &opt[cind] ) ) return 0; + ++*argindp; cind = 0; + } + else if( options[index].has_arg == ap_yes ) + { + if( !arg || !arg[0] ) + { + add_error( ap, "option requires an argument -- " ); + add_error( ap, code_str ); + return 1; + } + ++*argindp; cind = 0; + if( !push_back_record( ap, code, arg ) ) return 0; + } + else if( !push_back_record( ap, code, "" ) ) return 0; + } + return 1; + } + + +char ap_init( struct Arg_parser * const ap, + const int argc, const char * const argv[], + const struct ap_Option options[], const char in_order ) + { + const char ** non_options = 0; /* skipped non-options */ + int non_options_size = 0; /* number of skipped non-options */ + int argind = 1; /* index in argv */ + int i; + + ap->data = 0; + ap->error = 0; + ap->data_size = 0; + ap->error_size = 0; + if( argc < 2 || !argv || !options ) return 1; + + while( argind < argc ) + { + const unsigned char ch1 = argv[argind][0]; + const unsigned char ch2 = ( ch1 ? argv[argind][1] : 0 ); + + if( ch1 == '-' && ch2 ) /* we found an option */ + { + const char * const opt = argv[argind]; + const char * const arg = (argind + 1 < argc) ? argv[argind+1] : 0; + if( ch2 == '-' ) + { + if( !argv[argind][2] ) { ++argind; break; } /* we found "--" */ + else if( !parse_long_option( ap, opt, arg, options, &argind ) ) return 0; + } + else if( !parse_short_option( ap, opt, arg, options, &argind ) ) return 0; + if( ap->error ) break; + } + else + { + if( !in_order ) + { + void * tmp = ap_resize_buffer( non_options, + ( non_options_size + 1 ) * sizeof *non_options ); + if( !tmp ) return 0; + non_options = (const char **)tmp; + non_options[non_options_size++] = argv[argind++]; + } + else if( !push_back_record( ap, 0, argv[argind++] ) ) return 0; + } + } + if( ap->error ) free_data( ap ); + else + { + for( i = 0; i < non_options_size; ++i ) + if( !push_back_record( ap, 0, non_options[i] ) ) return 0; + while( argind < argc ) + if( !push_back_record( ap, 0, argv[argind++] ) ) return 0; + } + if( non_options ) free( non_options ); + return 1; + } + + +void ap_free( struct Arg_parser * const ap ) + { + free_data( ap ); + if( ap->error ) { free( ap->error ); ap->error = 0; } + ap->error_size = 0; + } + + +const char * ap_error( const struct Arg_parser * const ap ) + { return ap->error; } + + +int ap_arguments( const struct Arg_parser * const ap ) + { return ap->data_size; } + + +int ap_code( const struct Arg_parser * const ap, const int i ) + { + if( i >= 0 && i < ap_arguments( ap ) ) return ap->data[i].code; + else return 0; + } + + +const char * ap_argument( const struct Arg_parser * const ap, const int i ) + { + if( i >= 0 && i < ap_arguments( ap ) ) return ap->data[i].argument; + else return ""; + } diff --git a/carg_parser.h b/carg_parser.h new file mode 100644 index 0000000..020c57c --- /dev/null +++ b/carg_parser.h @@ -0,0 +1,85 @@ +/* Pdlzip - Data compressor based on the LZMA algorithm + Copyright (C) 2010, 2011, 2012 Antonio Diaz Diaz. + + This program is free software: you have unlimited permission + to copy, distribute and modify it. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +*/ + +/* Arg_parser reads the arguments in 'argv' and creates a number of + option codes, option arguments and non-option arguments. + + In case of error, 'ap_error' returns a non-null pointer to an error + message. + + 'options' is an array of 'struct ap_Option' terminated by an element + containing a code which is zero. A null name means a short-only + option. A code value outside the unsigned char range means a + long-only option. + + Arg_parser normally makes it appear as if all the option arguments + were specified before all the non-option arguments for the purposes + of parsing, even if the user of your program intermixed option and + non-option arguments. If you want the arguments in the exact order + the user typed them, call 'ap_init' with 'in_order' = true. + + The argument '--' terminates all options; any following arguments are + treated as non-option arguments, even if they begin with a hyphen. + + The syntax for optional option arguments is '-' + (without whitespace), or '--='. +*/ + +#ifdef __cplusplus +extern "C" { +#endif + +enum ap_Has_arg { ap_no, ap_yes, ap_maybe }; + +struct ap_Option + { + int code; /* Short option letter or code ( code != 0 ) */ + const char * name; /* Long option name (maybe null) */ + enum ap_Has_arg has_arg; + }; + + +struct ap_Record + { + int code; + char * argument; + }; + + +struct Arg_parser + { + struct ap_Record * data; + char * error; + int data_size; + int error_size; + }; + + +char ap_init( struct Arg_parser * const ap, + const int argc, const char * const argv[], + const struct ap_Option options[], const char in_order ); + +void ap_free( struct Arg_parser * const ap ); + +const char * ap_error( const struct Arg_parser * const ap ); + + /* The number of arguments parsed (may be different from argc) */ +int ap_arguments( const struct Arg_parser * const ap ); + + /* If ap_code( i ) is 0, ap_argument( i ) is a non-option. + Else ap_argument( i ) is the option's argument (or empty). */ +int ap_code( const struct Arg_parser * const ap, const int i ); + +const char * ap_argument( const struct Arg_parser * const ap, const int i ); + +#ifdef __cplusplus +} +#endif diff --git a/clzip.h b/clzip.h new file mode 100644 index 0000000..0ec2756 --- /dev/null +++ b/clzip.h @@ -0,0 +1,223 @@ +/* Pdlzip - Data compressor based on the LZMA algorithm + Copyright (C) 2010, 2011, 2012, 2013 Antonio Diaz Diaz. + + This program is free software: you have unlimited permission + to copy, distribute and modify it. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +*/ + +#ifndef __cplusplus +enum Bool { false = 0, true = 1 }; +typedef enum Bool bool; +#endif + +#ifndef max + #define max(x,y) ((x) >= (y) ? (x) : (y)) +#endif +#ifndef min + #define min(x,y) ((x) <= (y) ? (x) : (y)) +#endif + +typedef int State; + +enum { + min_dictionary_bits = 12, + min_dictionary_size = 1 << min_dictionary_bits, + max_dictionary_bits = 29, + max_dictionary_size = 1 << max_dictionary_bits, + literal_context_bits = 3, + pos_state_bits = 2, + + len_low_bits = 3, + len_mid_bits = 3, + len_high_bits = 8, + len_low_symbols = 1 << len_low_bits, + len_mid_symbols = 1 << len_mid_bits, + len_high_symbols = 1 << len_high_bits, + max_len_symbols = len_low_symbols + len_mid_symbols + len_high_symbols, + + min_match_len = 2, /* must be 2 */ + max_match_len = min_match_len + max_len_symbols - 1, /* 273 */ + min_match_len_limit = 5 }; + + +struct Pretty_print + { + const char * name; + const char * stdin_name; + int longest_name; + int verbosity; + bool first_post; + }; + +void Pp_init( struct Pretty_print * const pp, const char * const filenames[], + const int num_filenames, const int v ); + +static inline void Pp_set_name( struct Pretty_print * const pp, + const char * const filename ) + { + if( filename && filename[0] && strcmp( filename, "-" ) != 0 ) + pp->name = filename; + else pp->name = pp->stdin_name; + pp->first_post = true; + } + +static inline void Pp_reset( struct Pretty_print * const pp ) + { if( pp->name && pp->name[0] ) pp->first_post = true; } +void Pp_show_msg( struct Pretty_print * const pp, const char * const msg ); + + +typedef uint32_t CRC32[256]; /* Table of CRCs of all 8-bit messages. */ + +extern CRC32 crc32; + +static inline void CRC32_init( void ) + { + unsigned n; + for( n = 0; n < 256; ++n ) + { + unsigned c = n; + int k; + for( k = 0; k < 8; ++k ) + { if( c & 1 ) c = 0xEDB88320U ^ ( c >> 1 ); else c >>= 1; } + crc32[n] = c; + } + } + +static inline void CRC32_update_byte( uint32_t * const crc, const uint8_t byte ) + { *crc = crc32[(*crc^byte)&0xFF] ^ ( *crc >> 8 ); } + +static inline void CRC32_update_buf( uint32_t * const crc, + const uint8_t * const buffer, const int size ) + { + int i; + for( i = 0; i < size; ++i ) + *crc = crc32[(*crc^buffer[i])&0xFF] ^ ( *crc >> 8 ); + } + + +static inline int real_bits( unsigned value ) + { + int bits = 0; + while( value > 0 ) { value >>= 1; ++bits; } + return bits; + } + + +static const uint8_t magic_string[4] = { 0x4C, 0x5A, 0x49, 0x50 }; /* "LZIP" */ + +typedef uint8_t File_header[6]; /* 0-3 magic bytes */ + /* 4 version */ + /* 5 coded_dict_size */ +enum { Fh_size = 6 }; + +static inline void Fh_set_magic( File_header data ) + { memcpy( data, magic_string, 4 ); data[4] = 1; } + +static inline bool Fh_verify_magic( const File_header data ) + { return ( memcmp( data, magic_string, 4 ) == 0 ); } + +static inline uint8_t Fh_version( const File_header data ) + { return data[4]; } + +static inline bool Fh_verify_version( const File_header data ) + { return ( data[4] <= 1 ); } + +static inline unsigned Fh_get_dictionary_size( const File_header data ) + { + unsigned sz = ( 1 << ( data[5] & 0x1F ) ); + if( sz > min_dictionary_size ) + sz -= ( sz / 16 ) * ( ( data[5] >> 5 ) & 7 ); + return sz; + } + +static inline bool Fh_set_dictionary_size( File_header data, const int sz ) + { + if( sz >= min_dictionary_size && sz <= max_dictionary_size ) + { + data[5] = real_bits( sz - 1 ); + if( sz > min_dictionary_size ) + { + const int base_size = 1 << data[5]; + const int wedge = base_size / 16; + int i; + for( i = 7; i >= 1; --i ) + if( base_size - ( i * wedge ) >= sz ) + { data[5] |= ( i << 5 ); break; } + } + return true; + } + return false; + } + + +typedef uint8_t File_trailer[20]; + /* 0-3 CRC32 of the uncompressed data */ + /* 4-11 size of the uncompressed data */ + /* 12-19 member size including header and trailer */ + +enum { Ft_size = 20 }; + +static inline int Ft_versioned_size( const int version ) + { return ( ( version >= 1 ) ? 20 : 12 ); } + +static inline unsigned Ft_get_data_crc( const File_trailer data ) + { + unsigned tmp = 0; + int i; + for( i = 3; i >= 0; --i ) { tmp <<= 8; tmp += data[i]; } + return tmp; + } + +static inline void Ft_set_data_crc( File_trailer data, unsigned crc ) + { + int i; + for( i = 0; i <= 3; ++i ) { data[i] = (uint8_t)crc; crc >>= 8; } + } + +static inline unsigned long long Ft_get_data_size( const File_trailer data ) + { + unsigned long long tmp = 0; + int i; + for( i = 11; i >= 4; --i ) { tmp <<= 8; tmp += data[i]; } + return tmp; + } + +static inline void Ft_set_data_size( File_trailer data, unsigned long long sz ) + { + int i; + for( i = 4; i <= 11; ++i ) { data[i] = (uint8_t)sz; sz >>= 8; } + } + +static inline unsigned long long Ft_get_member_size( const File_trailer data ) + { + unsigned long long tmp = 0; + int i; + for( i = 19; i >= 12; --i ) { tmp <<= 8; tmp += data[i]; } + return tmp; + } + +static inline void Ft_set_member_size( File_trailer data, unsigned long long sz ) + { + int i; + for( i = 12; i <= 19; ++i ) { data[i] = (uint8_t)sz; sz >>= 8; } + } + + +/* defined in LzmaDec.c */ +int readblock( const int fd, uint8_t * const buf, const int size ); +int writeblock( const int fd, const uint8_t * const buf, const int size ); + +/* defined in main.c */ +extern int verbosity; + +void show_error( const char * const msg, const int errcode, const bool help ); +void internal_error( const char * const msg ); + +#define SZ_OK 0 + +#define SZ_ERROR_READ 8 +#define SZ_ERROR_WRITE 9 diff --git a/configure b/configure index 889aff5..a9f31cb 100755 --- a/configure +++ b/configure @@ -1,6 +1,6 @@ #! /bin/sh # configure script for Pdlzip - Data compressor based on the LZMA algorithm -# Copyright (C) 2010, 2011, 2012 Antonio Diaz Diaz. +# Copyright (C) 2010, 2011, 2012, 2013 Antonio Diaz Diaz. # # This configure script is free software: you have unlimited permission # to copy, distribute and modify it. @@ -8,9 +8,9 @@ args= no_create= pkgname=pdlzip -pkgversion=1.3 +pkgversion=1.4-rc1 progname=pdlzip -srctrigger=pdlzip.h +srctrigger=clzip.h # clear some things potentially inherited from environment. LC_ALL=C @@ -19,15 +19,22 @@ srcdir= prefix=/usr/local exec_prefix='$(prefix)' bindir='$(exec_prefix)/bin' -datadir='$(prefix)/share' -infodir='$(datadir)/info' -mandir='$(datadir)/man' -sysconfdir='$(prefix)/etc' -CC= +datarootdir='$(prefix)/share' +infodir='$(datarootdir)/info' +mandir='$(datarootdir)/man' +CC=gcc CPPFLAGS= CFLAGS='-Wall -W -O2' LDFLAGS= +# checking whether we are using GNU C. +if [ ! -x /bin/gcc ] && + [ ! -x /usr/bin/gcc ] && + [ ! -x /usr/local/bin/gcc ] ; then + CC=cc + CFLAGS='-W -O2' +fi + # Loop over all args while [ -n "$1" ] ; do @@ -40,12 +47,12 @@ while [ -n "$1" ] ; do # Split out the argument for options that take them case ${option} in - *=*) optarg=`echo ${option} | sed -e 's,^[^=]*=,,'` ;; + *=*) optarg=`echo ${option} | sed -e 's,^[^=]*=,,;s,/$,,'` ;; esac # Process the options case ${option} in - --help | --he* | -h) + --help | -h) echo "Usage: configure [options]" echo echo "Options: [defaults in brackets]" @@ -55,42 +62,31 @@ while [ -n "$1" ] ; do echo " --prefix=DIR install into DIR [${prefix}]" echo " --exec-prefix=DIR base directory for arch-dependent files [${exec_prefix}]" echo " --bindir=DIR user executables directory [${bindir}]" - echo " --datadir=DIR base directory for doc and data [${datadir}]" + echo " --datarootdir=DIR base directory for doc and data [${datarootdir}]" echo " --infodir=DIR info files directory [${infodir}]" echo " --mandir=DIR man pages directory [${mandir}]" - echo " --sysconfdir=DIR read-only single-machine data directory [${sysconfdir}]" echo " CC=COMPILER C compiler to use [gcc]" echo " CPPFLAGS=OPTIONS command line options for the preprocessor [${CPPFLAGS}]" echo " CFLAGS=OPTIONS command line options for the C compiler [${CFLAGS}]" echo " LDFLAGS=OPTIONS command line options for the linker [${LDFLAGS}]" echo exit 0 ;; - --version | --ve* | -V) + --version | -V) echo "Configure script for ${pkgname} version ${pkgversion}" exit 0 ;; - --srcdir* | --sr*) - srcdir=`echo ${optarg} | sed -e 's,/$,,'` ;; - --prefix* | --pr*) - prefix=`echo ${optarg} | sed -e 's,/$,,'` ;; - --exec-prefix* | --ex*) - exec_prefix=`echo ${optarg} | sed -e 's,/$,,'` ;; - --bindir* | --bi*) - bindir=`echo ${optarg} | sed -e 's,/$,,'` ;; - --datadir* | --da*) - datadir=`echo ${optarg} | sed -e 's,/$,,'` ;; - --infodir* | --inf*) - infodir=`echo ${optarg} | sed -e 's,/$,,'` ;; - --mandir* | --ma*) - mandir=`echo ${optarg} | sed -e 's,/$,,'` ;; - --sysconfdir* | --sy*) - sysconfdir=`echo ${optarg} | sed -e 's,/$,,'` ;; - --no-create | --no-c*) - no_create=yes ;; - - CC=*) CC=${optarg} ;; + --srcdir=*) srcdir=${optarg} ;; + --prefix=*) prefix=${optarg} ;; + --exec-prefix=*) exec_prefix=${optarg} ;; + --bindir=*) bindir=${optarg} ;; + --datarootdir=*) datarootdir=${optarg} ;; + --infodir=*) infodir=${optarg} ;; + --mandir=*) mandir=${optarg} ;; + --no-create) no_create=yes ;; + + CC=*) CC=${optarg} ;; CPPFLAGS=*) CPPFLAGS=${optarg} ;; - CFLAGS=*) CFLAGS=${optarg} ;; - LDFLAGS=*) LDFLAGS=${optarg} ;; + CFLAGS=*) CFLAGS=${optarg} ;; + LDFLAGS=*) LDFLAGS=${optarg} ;; --* | *=* | *-*-*) ;; *) @@ -103,14 +99,14 @@ done srcdirtext= if [ -z "${srcdir}" ] ; then srcdirtext="or . or .." ; srcdir=. - if [ ! -r ${srcdir}/${srctrigger} ] ; then srcdir=.. ; fi - if [ ! -r ${srcdir}/${srctrigger} ] ; then + if [ ! -r "${srcdir}/${srctrigger}" ] ; then srcdir=.. ; fi + if [ ! -r "${srcdir}/${srctrigger}" ] ; then ## the sed command below emulates the dirname command srcdir=`echo $0 | sed -e 's,[^/]*$,,;s,/$,,;s,^$,.,'` fi fi -if [ ! -r ${srcdir}/${srctrigger} ] ; then +if [ ! -r "${srcdir}/${srctrigger}" ] ; then exec 1>&2 echo echo "configure: Can't find sources in ${srcdir} ${srcdirtext}" @@ -119,18 +115,7 @@ if [ ! -r ${srcdir}/${srctrigger} ] ; then fi # Set srcdir to . if that's what it is. -if [ "`pwd`" = "`cd ${srcdir} ; pwd`" ] ; then srcdir=. ; fi - -# checking whether we are using GNU C. -if [ -z "${CC}" ] ; then # Let the user override the test. - if [ -x /bin/gcc ] || - [ -x /usr/bin/gcc ] || - [ -x /usr/local/bin/gcc ] ; then - CC="gcc" - else - CC="cc" - fi -fi +if [ "`pwd`" = "`cd "${srcdir}" ; pwd`" ] ; then srcdir=. ; fi echo if [ -z "${no_create}" ] ; then @@ -154,10 +139,9 @@ echo "VPATH = ${srcdir}" echo "prefix = ${prefix}" echo "exec_prefix = ${exec_prefix}" echo "bindir = ${bindir}" -echo "datadir = ${datadir}" +echo "datarootdir = ${datarootdir}" echo "infodir = ${infodir}" echo "mandir = ${mandir}" -echo "sysconfdir = ${sysconfdir}" echo "CC = ${CC}" echo "CPPFLAGS = ${CPPFLAGS}" echo "CFLAGS = ${CFLAGS}" @@ -165,7 +149,7 @@ echo "LDFLAGS = ${LDFLAGS}" rm -f Makefile cat > Makefile << EOF # Makefile for Pdlzip - Data compressor based on the LZMA algorithm -# Copyright (C) 2010, 2011, 2012 Antonio Diaz Diaz. +# Copyright (C) 2010, 2011, 2012, 2013 Antonio Diaz Diaz. # This file was generated automatically by configure. Do not edit. # # This Makefile is free software: you have unlimited permission @@ -178,15 +162,14 @@ VPATH = ${srcdir} prefix = ${prefix} exec_prefix = ${exec_prefix} bindir = ${bindir} -datadir = ${datadir} +datarootdir = ${datarootdir} infodir = ${infodir} mandir = ${mandir} -sysconfdir = ${sysconfdir} CC = ${CC} CPPFLAGS = ${CPPFLAGS} CFLAGS = ${CFLAGS} LDFLAGS = ${LDFLAGS} EOF -cat ${srcdir}/Makefile.in >> Makefile +cat "${srcdir}/Makefile.in" >> Makefile echo "OK. Now you can run make." diff --git a/doc/pdlzip.1 b/doc/pdlzip.1 index 153a618..8833c31 100644 --- a/doc/pdlzip.1 +++ b/doc/pdlzip.1 @@ -1,10 +1,10 @@ .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.37.1. -.TH PDLZIP "1" "January 2012" "Pdlzip 1.3" "User Commands" +.TH PDLZIP "1" "February 2013" "Pdlzip 1.4-rc1" "User Commands" .SH NAME Pdlzip \- reduces the size of files .SH SYNOPSIS .B pdlzip -[\fIoptions\fR] [\fIfile\fR] +[\fIoptions\fR] [\fIfiles\fR] .SH DESCRIPTION Pdlzip \- A "public domain" version of the lzip data compressor also able to decompress legacy lzma\-alone (.lzma) files. @@ -22,9 +22,21 @@ send output to standard output \fB\-d\fR, \fB\-\-decompress\fR decompress .TP +\fB\-f\fR, \fB\-\-force\fR +overwrite existing output files +.TP +\fB\-F\fR, \fB\-\-recompress\fR +force recompression of compressed files +.TP +\fB\-k\fR, \fB\-\-keep\fR +keep (don't delete) input files +.TP \fB\-m\fR, \fB\-\-match\-length=\fR set match length limit in bytes [36] .TP +\fB\-o\fR, \fB\-\-output=\fR +if reading stdin, place the output into +.TP \fB\-q\fR, \fB\-\-quiet\fR suppress all messages .TP @@ -46,7 +58,7 @@ alias for \fB\-1\fR \fB\-\-best\fR alias for \fB\-9\fR .PP -If no file name is given, pdlzip compresses or decompresses +If no file names are given, pdlzip compresses or decompresses from standard input to standard output. Numbers may be followed by a multiplier: k = kB = 10^3 = 1000, Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc... @@ -59,7 +71,7 @@ Report bugs to lzip\-bug@nongnu.org .br Pdlzip home page: http://www.nongnu.org/lzip/pdlzip.html .SH COPYRIGHT -Copyright \(co 2012 Antonio Diaz Diaz. +Copyright \(co 2013 Antonio Diaz Diaz. Public Domain 2009 Igor Pavlov. .br This is free software: you are free to change and redistribute it. diff --git a/main.c b/main.c index 353a1d9..cda4327 100644 --- a/main.c +++ b/main.c @@ -1,754 +1,1033 @@ -/* Pdlzip - Data compressor based on the LZMA algorithm - 2009-08-14 : Igor Pavlov : Public domain - Copyright (C) 2010, 2011, 2012 Antonio Diaz Diaz. - - This program is free software: you have unlimited permission - to copy, distribute and modify it. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -*/ - -#define _FILE_OFFSET_BITS 64 - -#include -#include -#include -#include -#include -#include -#include -#if defined(__OS2__) -#include -#endif - -#include "pdarg_parser.h" -#include "pdlzip.h" -#include "Alloc.h" -#include "7zFile.h" -#include "LzmaDec.h" -#include "LzmaEnc.h" - -#if CHAR_BIT != 8 -#error "Environments where CHAR_BIT != 8 are not supported." -#endif - -#ifndef LLONG_MAX -#define LLONG_MAX 0x7FFFFFFFFFFFFFFFLL -#endif -#ifndef LLONG_MIN -#define LLONG_MIN (-LLONG_MAX - 1LL) -#endif -#ifndef ULLONG_MAX -#define ULLONG_MAX 0xFFFFFFFFFFFFFFFFULL -#endif - -long long int llabs( long long int number ); - - -static void *SzAlloc(void *p, size_t size) { p = p; return MyAlloc(size); } -static void SzFree(void *p, void *address) { p = p; MyFree(address); } -static ISzAlloc g_Alloc = { SzAlloc, SzFree }; - -const char * const Program_name = "Pdlzip"; -const char * const program_name = "pdlzip"; -const char * const program_year = "2012"; -const char * invocation_name = 0; - - -struct { const char * from; const char * to; } const known_extensions[] = { - { ".lz", "" }, - { ".tlz", ".tar" }, - { ".lzma", "" }, - { 0, 0 } }; - -struct Lzma_options - { - int dictionary_size; /* 4KiB..512MiB */ - int match_len_limit; /* 5..273 */ - }; - -enum Mode { m_compress, m_decompress, m_test }; -char * output_filename = 0; - - -/* assure at least a minimum size for buffer 'buf' */ -inline void * resize_buffer( void * buf, const int min_size ) - { - if( buf ) buf = realloc( buf, min_size ); - else buf = malloc( min_size ); - return buf; - } - - -static void show_help() - { - printf( "%s - A \"public domain\" version of the lzip data compressor\n", Program_name ); - printf( "also able to decompress legacy lzma-alone (.lzma) files.\n" - "\nUsage: %s [options] [file]\n", invocation_name ); - printf( "\nOptions:\n" - " -h, --help display this help and exit\n" - " -V, --version output version information and exit\n" - " -c, --stdout send output to standard output\n" - " -d, --decompress decompress\n" -/* " -f, --force overwrite existing output files\n" */ -/* " -k, --keep keep (don't delete) input files\n" */ - " -m, --match-length= set match length limit in bytes [36]\n" - " -q, --quiet suppress all messages\n" - " -s, --dictionary-size= set dictionary size limit in bytes [8MiB]\n" - " -t, --test test compressed file integrity\n" - " -v, --verbose be verbose (a 2nd -v gives more)\n" - " -1 .. -9 set compression level [default 6]\n" - " --fast alias for -1\n" - " --best alias for -9\n" - "If no file name is given, pdlzip compresses or decompresses\n" - "from standard input to standard output.\n" - "Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n" - "Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n" - "The bidimensional parameter space of LZMA can't be mapped to a linear\n" - "scale optimal for all files. If your files are large, very repetitive,\n" - "etc, you may need to use the --match-length and --dictionary-size\n" - "options directly to achieve optimal performance.\n" - "\nReport bugs to lzip-bug@nongnu.org\n" - "Pdlzip home page: http://www.nongnu.org/lzip/pdlzip.html\n" ); - } - - -static void show_version() - { - printf( "%s %s\n", Program_name, PROGVERSION ); - printf( "Copyright (C) %s Antonio Diaz Diaz.\n", program_year ); - printf( "Public Domain 2009 Igor Pavlov.\n" - "This is free software: you are free to change and redistribute it.\n" - "There is NO WARRANTY, to the extent permitted by law.\n" ); - } - - -static const char * format_num( long long num ) - { - const char * const prefix[8] = - { "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" }; - enum { buf_size = 16, factor = 1024 }; - static char buf[buf_size]; - const char *p = ""; - bool exact = ( num % factor == 0 ); - int i; - - for( i = 0; i < 8 && ( llabs( num ) > 9999 || - ( exact && llabs( num ) >= factor ) ); ++i ) - { num /= factor; if( num % factor != 0 ) exact = false; p = prefix[i]; } - snprintf( buf, buf_size, "%lld %s", num, p ); - return buf; - } - - -static long long getnum( const char * const ptr, - const long long llimit, const long long ulimit ) - { - long long result; - char *tail; - - errno = 0; - result = strtoll( ptr, &tail, 0 ); - if( tail == ptr ) - { - show_error( "Bad or missing numerical argument.", 0, true ); - exit( 1 ); - } - - if( !errno && tail[0] ) - { - int factor = ( tail[1] == 'i' ) ? 1024 : 1000; - int exponent = 0, i; - bool bad_multiplier = false; - switch( tail[0] ) - { - case ' ': break; - case 'Y': exponent = 8; break; - case 'Z': exponent = 7; break; - case 'E': exponent = 6; break; - case 'P': exponent = 5; break; - case 'T': exponent = 4; break; - case 'G': exponent = 3; break; - case 'M': exponent = 2; break; - case 'K': if( factor == 1024 ) exponent = 1; else bad_multiplier = true; - break; - case 'k': if( factor == 1000 ) exponent = 1; else bad_multiplier = true; - break; - default : bad_multiplier = true; - } - if( bad_multiplier ) - { - show_error( "Bad multiplier in numerical argument.", 0, true ); - exit( 1 ); - } - for( i = 0; i < exponent; ++i ) - { - if( LLONG_MAX / factor >= llabs( result ) ) result *= factor; - else { errno = ERANGE; break; } - } - } - if( !errno && ( result < llimit || result > ulimit ) ) errno = ERANGE; - if( errno ) - { - show_error( "Numerical argument out of limits.", 0, false ); - exit( 1 ); - } - return result; - } - - -static int get_dict_size( const char * const arg ) - { - char *tail; - int bits = strtol( arg, &tail, 0 ); - if( bits >= min_dictionary_bits && - bits <= max_dictionary_bits && *tail == 0 ) - return ( 1 << bits ); - return getnum( arg, min_dictionary_size, max_dictionary_size ); - } - - -static void show_name( const char * const name ) - { - if( verbosity >= 1 ) - fprintf( stderr, " %s: ", ( name && name[0] ) ? name : "(stdin)" ); - } - - -#define IN_BUF_SIZE (1 << 16) -#define OUT_BUF_SIZE (1 << 16) - -static bool read_inbuf( ISeqInStream * const inStream, Byte inBuf[], - size_t * const inPos, size_t * const inSize ) - { - size_t rest; - if( *inPos >= *inSize ) *inSize = 0; - else if( *inPos > 0 ) - { - memmove( inBuf, inBuf + *inPos, *inSize - *inPos ); - *inSize -= *inPos; - } - *inPos = 0; - rest = IN_BUF_SIZE - *inSize; - if( rest > 0 ) - { - if( inStream->Read( inStream, inBuf + *inSize, &rest ) != 0 ) - { show_error( "Read error", errno, false ); return false; } - *inSize += rest; - } - return true; - } - - -static int lzma_Decode2( UInt64 unpackSize, CLzmaDec *state, - ISeqOutStream *outStream, ISeqInStream *inStream, - Byte inBuf[], size_t * const inPos, - size_t * const inSize, const bool testing ) - { - long long total_in = 13, total_out = 0; - Byte outBuf[OUT_BUF_SIZE]; - size_t outPos = 0; - const bool thereIsSize = (unpackSize != (UInt64)(Int64)-1); - LzmaDec_Init(state); - - for (;;) - { - SizeT inProcessed; - SizeT outProcessed = OUT_BUF_SIZE - outPos; - ELzmaFinishMode finishMode = LZMA_FINISH_ANY; - ELzmaStatus status; - - if( *inPos == *inSize && !read_inbuf( inStream, inBuf, inPos, inSize ) ) - return 1; - inProcessed = *inSize - *inPos; - if (thereIsSize && outProcessed > unpackSize) - { - outProcessed = (SizeT)unpackSize; - finishMode = LZMA_FINISH_END; - } - - if( LzmaDec_DecodeToBuf( state, outBuf + outPos, &outProcessed, - inBuf + *inPos, &inProcessed, finishMode, &status ) != 0 ) - { show_error( "Data error.", 0, false ); return 1; } - *inPos += inProcessed; - total_in += inProcessed; - outPos += outProcessed; - unpackSize -= outProcessed; - - if (outStream) - if (outStream->Write(outStream, outBuf, outPos) != outPos) - { show_error( "Can not write output file", errno, false ); return 1; } - - total_out += outPos; - outPos = 0; - - if( ( inProcessed == 0 && outProcessed == 0 ) || - ( thereIsSize && unpackSize == 0 ) ) - { - if( ( thereIsSize && unpackSize != 0 ) || - ( !thereIsSize && status != LZMA_STATUS_FINISHED_WITH_MARK ) ) - { show_error( "Data error.", 0, false ); return 1; } - if( verbosity >= 2 ) - fprintf( stderr, "lzma-alone, dictionary size %7sB. ", - format_num( state->prop.dicSize ) ); - if( verbosity >= 3 ) - fprintf( stderr, "uncompressed size %9lld, compressed size %8lld. ", - total_out, total_in ); - if( verbosity >= 1 ) - { if( testing ) fprintf( stderr, "(apparently) ok\n" ); - else fprintf( stderr, "(apparently) done\n" ); } - return 0; - } - } - } - - -static int Decode2( CLzmaDec *state, ISeqOutStream *outStream, - ISeqInStream *inStream, Byte inBuf[], size_t * const inPos, - size_t * const inSize, const int member_version, - const bool testing ) - { - long long total_in = Fh_size, total_out = 0; - Byte outBuf[OUT_BUF_SIZE]; - size_t outPos = 0; - uint32_t crc = 0xFFFFFFFFU; - LzmaDec_Init(state); - - for (;;) - { - SizeT inProcessed; - SizeT outProcessed = OUT_BUF_SIZE - outPos; - ELzmaFinishMode finishMode = LZMA_FINISH_ANY; - ELzmaStatus status; - - if( *inPos == *inSize && !read_inbuf( inStream, inBuf, inPos, inSize ) ) - return 1; - if( *inPos == *inSize ) - { show_error( "Unexpected EOF.", 0, false ); return 1; } - inProcessed = *inSize - *inPos; - - if( LzmaDec_DecodeToBuf( state, outBuf + outPos, &outProcessed, - inBuf + *inPos, &inProcessed, finishMode, &status ) != 0 ) - { show_error( "Data error.", 0, false ); return 1; } - *inPos += inProcessed; - total_in += inProcessed; - outPos += outProcessed; - - if (outStream) - if (outStream->Write(outStream, outBuf, outPos) != outPos) - { show_error( "Can not write output file", errno, false ); return 1; } - - CRC32_update_buf( &crc, outBuf, outPos ); - total_out += outPos; - outPos = 0; - - if (inProcessed == 0 && outProcessed == 0) - { - File_trailer trailer; - size_t i; - const size_t trailer_size = Ft_versioned_size( member_version ); - bool error = false; - - if( status != LZMA_STATUS_FINISHED_WITH_MARK ) - { show_error( "Data error.", 0, false ); return 1; } - if( *inSize - *inPos < trailer_size && - !read_inbuf( inStream, inBuf, inPos, inSize ) ) return 1; - if( *inSize - *inPos < trailer_size ) - { - error = true; - if( verbosity >= 0 ) - fprintf( stderr, "trailer truncated at trailer position %u;" - " some checks may fail.\n", - (unsigned int)(*inSize - *inPos) ); - for( i = *inSize - *inPos; i < trailer_size; ++i ) - inBuf[*inPos+i] = 0; - } - for( i = 0; i < trailer_size; ++i ) - trailer[i] = inBuf[(*inPos)++]; - total_in += trailer_size; - if( member_version == 0 ) Ft_set_member_size( trailer, total_in ); - if( Ft_get_data_crc( trailer ) != ( crc ^ 0xFFFFFFFFU ) ) - { - error = true; - if( verbosity >= 0 ) - fprintf( stderr, "crc mismatch; trailer says %08X, data crc is %08X.\n", - (unsigned int)Ft_get_data_crc( trailer ), - (unsigned int)( crc ^ 0xFFFFFFFFU ) ); - } - if( Ft_get_data_size( trailer ) != total_out ) - { - error = true; - if( verbosity >= 0 ) - fprintf( stderr, "data size mismatch; trailer says %lld, data size is %lld.\n", - Ft_get_data_size( trailer ), total_out ); - } - if( Ft_get_member_size( trailer ) != total_in ) - { - error = true; - if( verbosity >= 0 ) - fprintf( stderr, "member size mismatch; trailer says %lld, member size is %lld.\n", - Ft_get_member_size( trailer ), total_in ); - } - if( !error && verbosity >= 3 ) - fprintf( stderr, "data crc %08X, data size %9lld, member size %8lld. ", - (unsigned int)Ft_get_data_crc( trailer ), - Ft_get_data_size( trailer ), Ft_get_member_size( trailer ) ); - if( !error && verbosity >= 1 ) - { if( testing ) fprintf( stderr, "ok\n" ); - else fprintf( stderr, "done\n" ); } - if( error ) return 2; - return 0; - } - } - } - - -static int Decode( ISeqOutStream *outStream, ISeqInStream *inStream, - const char * const name, const bool testing ) - { - UInt64 unpackSize = 0; - CLzmaDec state; - File_header header; - Byte inBuf[IN_BUF_SIZE]; - size_t inPos = 0, inSize = 0; - int retval = 0; - bool lzma_mode = false; - bool first_member; - /* 5 bytes of LZMA properties and 8 bytes of uncompressed size */ - unsigned char props[LZMA_PROPS_SIZE+8]; - - for( first_member = true; ; first_member = false ) - { - int i; - if( inSize < Fh_size && - !read_inbuf( inStream, inBuf, &inPos, &inSize ) ) return 1; - if( inSize < Fh_size ) /* End Of File */ - { - if( !first_member ) break; - show_error( "Error reading member header.", 0, false ); return 1; - } - for( i = 0; i < Fh_size; ++i ) - props[i] = header[i] = inBuf[inPos++]; - if( !Fh_verify_magic( header ) ) - { - if( !first_member ) break; /* trailing garbage */ - if( inSize >= 13 - Fh_size ) /* try lzma-alone */ - { - for( i = Fh_size; i < 13; ++i ) props[i] = inBuf[inPos++]; - for( i = 0; i < 8; ++i ) - unpackSize += (UInt64)props[LZMA_PROPS_SIZE+i] << (i * 8); - if( ( props[12] == 0 || props[12] == 0xFF ) && props[12] == props[11] ) - lzma_mode = true; - } - if( !lzma_mode ) - { - show_error( "Bad magic number (file not in lzip format).", 0, false ); - return 2; - } - } - if( !first_member ) show_name( name ); - if( !lzma_mode ) - { - int ds, i; - if( !Fh_verify_version( header ) ) - { - if( verbosity >= 0 ) - fprintf( stderr, "version %d member format not supported, newer %s needed.\n", - Fh_version( header ), program_name ); - return 2; - } - if( Fh_get_dictionary_size( header ) < min_dictionary_size || - Fh_get_dictionary_size( header ) > max_dictionary_size ) - { - if( verbosity >= 0 ) - fprintf( stderr, "invalid dictionary size in member header.\n" ); - return 2; - } - - if( verbosity >= 2 ) - fprintf( stderr, "version %d, dictionary size %7sB. ", - Fh_version( header ), - format_num( Fh_get_dictionary_size( header ) ) ); - - props[0] = 93; /* (45 * 2) + (9 * 0) + 3 */ - ds = Fh_get_dictionary_size( header ); - for( i = 1; i <= 4; ++i ) { props[i] = ds & 0xFF; ds >>= 8; } - } - - LzmaDec_Construct(&state); - if( LzmaDec_Allocate( &state, props, LZMA_PROPS_SIZE, &g_Alloc ) != 0 ) - { show_error( "Can not allocate memory.", 0, false ); return 1; } - if( !lzma_mode ) - retval = Decode2( &state, outStream, inStream, inBuf, &inPos, - &inSize, Fh_version( header ), testing ); - else - retval = lzma_Decode2( unpackSize, &state, outStream, inStream, - inBuf, &inPos, &inSize, testing ); - LzmaDec_Free(&state, &g_Alloc); - if( retval != 0 || lzma_mode ) break; - } - return retval; - } - - -static int Encode( ISeqOutStream *outStream, ISeqInStream *inStream, - const struct Lzma_options * const encoder_options ) - { - CLzmaEncProps props; - int retval = 0; - File_header header; - - CLzmaEncHandle enc = LzmaEnc_Create(&g_Alloc); - if(enc == 0) - { show_error( "Can not allocate memory.", 0, false ); return 1; } - - LzmaEncProps_Init(&props); - props.dictSize = encoder_options->dictionary_size; - props.lc = literal_context_bits; - props.lp = 0; - props.pb = pos_state_bits; - props.fb = encoder_options->match_len_limit; - props.btMode = 1; - props.numHashBytes = 4; - props.mc = 16 + ( encoder_options->match_len_limit / 2 ); - LzmaEnc_SetProps(enc, &props); - - Fh_set_magic( header ); - if( !Fh_set_dictionary_size( header, encoder_options->dictionary_size ) || - encoder_options->match_len_limit < min_match_len_limit || - encoder_options->match_len_limit > max_match_len ) - internal_error( "invalid argument to encoder" ); - - if( outStream->Write( outStream, header, Fh_size ) != Fh_size ) - { show_error( "Can not write output file", errno, false ); retval = 1; } - else - if( LzmaEnc_Encode(enc, outStream, inStream, NULL, &g_Alloc, &g_Alloc) != 0 ) - { show_error( "Data error.", 0, false ); retval = 1; } - LzmaEnc_Destroy(enc, &g_Alloc, &g_Alloc); - return retval; - } - - -int verbosity = 0; - - -void show_error( const char * const msg, const int errcode, const bool help ) - { - if( verbosity >= 0 ) - { - if( msg && msg[0] ) - { - fprintf( stderr, "%s: %s", program_name, msg ); - if( errcode > 0 ) fprintf( stderr, ": %s", strerror( errcode ) ); - fprintf( stderr, "\n" ); - } - if( help && invocation_name && invocation_name[0] ) - fprintf( stderr, "Try '%s --help' for more information.\n", - invocation_name ); - } - } - - -void internal_error( const char * const msg ) - { - if( verbosity >= 0 ) - fprintf( stderr, "%s: internal error: %s.\n", program_name, msg ); - exit( 3 ); - } - - -static int extension_index( const char * const name ) - { - int i; - for( i = 0; known_extensions[i].from; ++i ) - { - const char * const ext = known_extensions[i].from; - if( strlen( name ) > strlen( ext ) && - strncmp( name + strlen( name ) - strlen( ext ), ext, strlen( ext ) ) == 0 ) - return i; - } - return -1; - } - - -static void set_c_outname( const char * const name ) - { - output_filename = resize_buffer( output_filename, strlen( name ) + - strlen( known_extensions[0].from ) + 1 ); - strcpy( output_filename, name ); - strcat( output_filename, known_extensions[0].from ); - } - - -static void set_d_outname( const char * const name ) - { - const int i = extension_index( name ); - if( i >= 0 ) - { - const char * const from = known_extensions[i].from; - if( strlen( name ) > strlen( from ) ) - { - output_filename = resize_buffer( output_filename, strlen( name ) + - strlen( known_extensions[0].to ) + 1 ); - strcpy( output_filename, name ); - strcpy( output_filename + strlen( name ) - strlen( from ), - known_extensions[i].to ); - return; - } - } - output_filename = resize_buffer( output_filename, strlen( name ) + 4 + 1 ); - strcpy( output_filename, name ); - strcat( output_filename, ".out" ); - if( verbosity >= 1 ) - fprintf( stderr, "%s: Can't guess original name for '%s' -- using '%s'.\n", - program_name, name, output_filename ); - } - - -CRC32 crc32; - - -int main( const int argc, const char * const argv[] ) - { - /* Mapping from gzip/bzip2 style 1..9 compression modes - to the corresponding LZMA compression modes. */ - const struct Lzma_options option_mapping[] = - { - { 1 << 20, 5 }, /* -0 */ - { 1 << 20, 5 }, /* -1 */ - { 3 << 19, 6 }, /* -2 */ - { 1 << 21, 8 }, /* -3 */ - { 3 << 20, 12 }, /* -4 */ - { 1 << 22, 20 }, /* -5 */ - { 1 << 23, 36 }, /* -6 */ - { 1 << 24, 68 }, /* -7 */ - { 3 << 23, 132 }, /* -8 */ - { 1 << 25, 273 } }; /* -9 */ - struct Lzma_options encoder_options = option_mapping[6]; /* default = "-6" */ - enum Mode program_mode = m_compress; - const char * input_filename = ""; - CFileSeqInStream inStream; - CFileOutStream outStream; - int argind; - int retval; - bool force = false; - bool keep_input_files = false; - bool to_stdout = false; - - const struct ap_Option options[] = - { - { '0', 0, ap_no }, - { '1', "fast", ap_no }, - { '2', 0, ap_no }, - { '3', 0, ap_no }, - { '4', 0, ap_no }, - { '5', 0, ap_no }, - { '6', 0, ap_no }, - { '7', 0, ap_no }, - { '8', 0, ap_no }, - { '9', "best", ap_no }, - { 'b', "member-size", ap_yes }, - { 'c', "stdout", ap_no }, - { 'd', "decompress", ap_no }, - { 'f', "force", ap_no }, - { 'h', "help", ap_no }, - { 'k', "keep", ap_no }, - { 'm', "match-length", ap_yes }, - { 'q', "quiet", ap_no }, - { 's', "dictionary-size", ap_yes }, - { 'S', "volume-size", ap_yes }, - { 't', "test", ap_no }, - { 'v', "verbose", ap_no }, - { 'V', "version", ap_no }, - { 0 , 0, ap_no } }; - - struct Arg_parser parser; - - invocation_name = argv[0]; - CRC32_init(); - if (sizeof(UInt32) != 4 || sizeof(UInt64) != 8) - internal_error( "incorrect UInt32 or UInt64" ); - - if( !ap_init( &parser, argc, argv, options, 0 ) ) - { show_error( "Memory exhausted.", 0, false ); return 1; } - if( ap_error( &parser ) ) /* bad option */ - { show_error( ap_error( &parser ), 0, true ); return 1; } - - for( argind = 0; argind < ap_arguments( &parser ); ++argind ) - { - const int code = ap_code( &parser, argind ); - const char * const arg = ap_argument( &parser, argind ); - if( !code ) break; /* no more options */ - switch( code ) - { - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - encoder_options = option_mapping[code-'0']; break; - case 'b': break; - case 'c': to_stdout = true; break; - case 'd': program_mode = m_decompress; break; - case 'e': break; - case 'f': force = true; break; - case 'h': show_help(); return 0; - case 'k': keep_input_files = true; break; - case 'm': encoder_options.match_len_limit = - getnum( arg, min_match_len_limit, max_match_len ); break; - case 'q': verbosity = -1; break; - case 's': encoder_options.dictionary_size = get_dict_size( arg ); - break; - case 'S': break; - case 't': program_mode = m_test; break; - case 'v': if( verbosity < 4 ) ++verbosity; break; - case 'V': show_version(); return 0; - default : internal_error( "uncaught option" ); - } - } /* end process options */ - -#if defined(__OS2__) - _fsetmode( stdin, "b" ); - _fsetmode( stdout, "b" ); -#endif - - if( ap_arguments( &parser ) > argind && - strcmp( ap_argument( &parser, argind ), "-" ) ) - input_filename = ap_argument( &parser, argind ); - if( ap_arguments( &parser ) > argind + 1 ) - { show_error( "Too many file names.", 0, true ); return 1; } - - if( program_mode == m_test ) output_filename = "/dev/null"; - else - { - if( to_stdout || !input_filename[0] ) output_filename = ""; - else - { - if( program_mode == m_compress ) set_c_outname( input_filename ); - else set_d_outname( input_filename ); - } - } - - FileSeqInStream_CreateVTable(&inStream); - File_Construct(&inStream.file); - - FileOutStream_CreateVTable(&outStream); - File_Construct(&outStream.file); - - if (InFile_Open(&inStream.file, input_filename) != 0) - { show_error( "Can not open input file", errno, false ); return 1; } - - if (OutFile_Open(&outStream.file, output_filename) != 0) - { show_error( "Can not open output file", errno, false ); return 1; } - - show_name( input_filename ); - if( program_mode == m_compress ) - retval = Encode( &outStream.s, &inStream.s, &encoder_options ); - else - retval = Decode( &outStream.s, &inStream.s, input_filename, program_mode == m_test ); - - File_Close(&outStream.file); - File_Close(&inStream.file); - - ap_free( &parser ); - return retval; - } +/* Pdlzip - Data compressor based on the LZMA algorithm + 2009-08-14 : Igor Pavlov : Public domain + Copyright (C) 2010, 2011, 2012, 2013 Antonio Diaz Diaz. + + This program is free software: you have unlimited permission + to copy, distribute and modify it. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +*/ +/* + Return values: 0 for a normal exit, 1 for environmental problems + (file not found, invalid flags, I/O errors, etc), 2 to indicate a + corrupt or invalid input file, 3 for an internal consistency error + (eg, bug) which caused pdlzip to panic. +*/ + +#define _FILE_OFFSET_BITS 64 + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if defined(__MSVCRT__) +#include +#define fchmod(x,y) 0 +#define fchown(x,y,z) 0 +#define SIGHUP SIGTERM +#define S_ISSOCK(x) 0 +#define S_IRGRP 0 +#define S_IWGRP 0 +#define S_IROTH 0 +#define S_IWOTH 0 +#endif +#if defined(__OS2__) +#include +#endif + +#include "carg_parser.h" +#include "clzip.h" +#include "LzmaDec.h" +#include "LzmaEnc.h" + +#if CHAR_BIT != 8 +#error "Environments where CHAR_BIT != 8 are not supported." +#endif + + +const char * const Program_name = "Pdlzip"; +const char * const program_name = "pdlzip"; +const char * const program_year = "2013"; +const char * invocation_name = 0; + +#ifdef O_BINARY +const int o_binary = O_BINARY; +#else +const int o_binary = 0; +#endif + +struct { const char * from; const char * to; } const known_extensions[] = { + { ".lz", "" }, + { ".tlz", ".tar" }, + { ".lzma", "" }, + { 0, 0 } }; + +struct Lzma_options + { + int dictionary_size; /* 4KiB..512MiB */ + int match_len_limit; /* 5..273 */ + }; + +enum Mode { m_compress, m_decompress, m_test }; + +char * output_filename = 0; +int outfd = -1; +int verbosity = 0; +const mode_t usr_rw = S_IRUSR | S_IWUSR; +const mode_t all_rw = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH; +mode_t outfd_mode = S_IRUSR | S_IWUSR; +bool delete_output_on_interrupt = false; + + +static void show_help( void ) + { + printf( "%s - A \"public domain\" version of the lzip data compressor\n", Program_name ); + printf( "also able to decompress legacy lzma-alone (.lzma) files.\n" + "\nUsage: %s [options] [files]\n", invocation_name ); + printf( "\nOptions:\n" + " -h, --help display this help and exit\n" + " -V, --version output version information and exit\n" + " -c, --stdout send output to standard output\n" + " -d, --decompress decompress\n" + " -f, --force overwrite existing output files\n" + " -F, --recompress force recompression of compressed files\n" + " -k, --keep keep (don't delete) input files\n" + " -m, --match-length= set match length limit in bytes [36]\n" + " -o, --output= if reading stdin, place the output into \n" + " -q, --quiet suppress all messages\n" + " -s, --dictionary-size= set dictionary size limit in bytes [8MiB]\n" + " -t, --test test compressed file integrity\n" + " -v, --verbose be verbose (a 2nd -v gives more)\n" + " -1 .. -9 set compression level [default 6]\n" + " --fast alias for -1\n" + " --best alias for -9\n" + "If no file names are given, pdlzip compresses or decompresses\n" + "from standard input to standard output.\n" + "Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n" + "Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n" + "The bidimensional parameter space of LZMA can't be mapped to a linear\n" + "scale optimal for all files. If your files are large, very repetitive,\n" + "etc, you may need to use the --match-length and --dictionary-size\n" + "options directly to achieve optimal performance.\n" + "\nReport bugs to lzip-bug@nongnu.org\n" + "Pdlzip home page: http://www.nongnu.org/lzip/pdlzip.html\n" ); + } + + +static void show_version( void ) + { + printf( "%s %s\n", Program_name, PROGVERSION ); + printf( "Copyright (C) %s Antonio Diaz Diaz.\n", program_year ); + printf( "Public Domain 2009 Igor Pavlov.\n" + "This is free software: you are free to change and redistribute it.\n" + "There is NO WARRANTY, to the extent permitted by law.\n" ); + } + + +static const char * format_num( unsigned num ) + { + const char * const prefix[8] = + { "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" }; + enum { buf_size = 16, factor = 1024 }; + static char buf[buf_size]; + const char * p = ""; + bool exact = ( num % factor == 0 ); + int i; + + for( i = 0; i < 8 && ( num > 9999 || ( exact && num >= factor ) ); ++i ) + { num /= factor; if( num % factor != 0 ) exact = false; p = prefix[i]; } + snprintf( buf, buf_size, "%u %s", num, p ); + return buf; + } + + +void show_header( const File_header header ) + { + const char * const prefix[8] = + { "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" }; + enum { factor = 1024 }; + const char * p = ""; + const char * np = " "; + unsigned num = Fh_get_dictionary_size( header ), i; + bool exact = ( num % factor == 0 ); + + for( i = 0; i < 8 && ( num > 9999 || ( exact && num >= factor ) ); ++i ) + { num /= factor; if( num % factor != 0 ) exact = false; + p = prefix[i]; np = ""; } + fprintf( stderr, "version %d, dictionary size %s%4u %sB. ", + Fh_version( header ), np, num, p ); + } + + +static unsigned long getnum( const char * const ptr, + const unsigned long llimit, + const unsigned long ulimit ) + { + unsigned long result; + char * tail; + errno = 0; + result = strtoul( ptr, &tail, 0 ); + if( tail == ptr ) + { + show_error( "Bad or missing numerical argument.", 0, true ); + exit( 1 ); + } + + if( !errno && tail[0] ) + { + int factor = ( tail[1] == 'i' ) ? 1024 : 1000; + int exponent = 0, i; + bool bad_multiplier = false; + switch( tail[0] ) + { + case ' ': break; + case 'Y': exponent = 8; break; + case 'Z': exponent = 7; break; + case 'E': exponent = 6; break; + case 'P': exponent = 5; break; + case 'T': exponent = 4; break; + case 'G': exponent = 3; break; + case 'M': exponent = 2; break; + case 'K': if( factor == 1024 ) exponent = 1; else bad_multiplier = true; + break; + case 'k': if( factor == 1000 ) exponent = 1; else bad_multiplier = true; + break; + default : bad_multiplier = true; + } + if( bad_multiplier ) + { + show_error( "Bad multiplier in numerical argument.", 0, true ); + exit( 1 ); + } + for( i = 0; i < exponent; ++i ) + { + if( ulimit / factor >= result ) result *= factor; + else { errno = ERANGE; break; } + } + } + if( !errno && ( result < llimit || result > ulimit ) ) errno = ERANGE; + if( errno ) + { + show_error( "Numerical argument out of limits.", 0, false ); + exit( 1 ); + } + return result; + } + + +static int get_dict_size( const char * const arg ) + { + char * tail; + int bits = strtol( arg, &tail, 0 ); + if( bits >= min_dictionary_bits && + bits <= max_dictionary_bits && *tail == 0 ) + return ( 1 << bits ); + return getnum( arg, min_dictionary_size, max_dictionary_size ); + } + + +static int extension_index( const char * const name ) + { + int i; + for( i = 0; known_extensions[i].from; ++i ) + { + const char * const ext = known_extensions[i].from; + if( strlen( name ) > strlen( ext ) && + strncmp( name + strlen( name ) - strlen( ext ), ext, strlen( ext ) ) == 0 ) + return i; + } + return -1; + } + + +static int open_instream( const char * const name, struct stat * const in_statsp, + const enum Mode program_mode, const int eindex, + const bool recompress, const bool to_stdout ) + { + int infd = -1; + if( program_mode == m_compress && !recompress && eindex >= 0 ) + { + if( verbosity >= 0 ) + fprintf( stderr, "%s: Input file '%s' already has '%s' suffix.\n", + program_name, name, known_extensions[eindex].from ); + } + else + { + infd = open( name, O_RDONLY | o_binary ); + if( infd < 0 ) + { + if( verbosity >= 0 ) + fprintf( stderr, "%s: Can't open input file '%s': %s.\n", + program_name, name, strerror( errno ) ); + } + else + { + const int i = fstat( infd, in_statsp ); + const mode_t mode = in_statsp->st_mode; + const bool can_read = ( i == 0 && + ( S_ISBLK( mode ) || S_ISCHR( mode ) || + S_ISFIFO( mode ) || S_ISSOCK( mode ) ) ); + if( i != 0 || ( !S_ISREG( mode ) && ( !to_stdout || !can_read ) ) ) + { + if( verbosity >= 0 ) + fprintf( stderr, "%s: Input file '%s' is not a regular file%s.\n", + program_name, name, + ( can_read && !to_stdout ) ? + " and '--stdout' was not specified" : "" ); + close( infd ); + infd = -1; + } + } + } + return infd; + } + + +void cleanup_and_fail( const int retval ) + { + if( delete_output_on_interrupt ) + { + delete_output_on_interrupt = false; + if( verbosity >= 0 ) + fprintf( stderr, "%s: Deleting output file '%s', if it exists.\n", + program_name, output_filename ); + if( outfd >= 0 ) { close( outfd ); outfd = -1; } + if( remove( output_filename ) != 0 && errno != ENOENT ) + show_error( "WARNING: deletion of output file (apparently) failed.", 0, false ); + } + exit( retval ); + } + + +/* assure at least a minimum size for buffer 'buf' */ +static void * resize_buffer( void * buf, const int min_size ) + { + if( buf ) buf = realloc( buf, min_size ); + else buf = malloc( min_size ); + if( !buf ) + { + show_error( "Not enough memory.", 0, false ); + cleanup_and_fail( 1 ); + } + return buf; + } + + +static void set_c_outname( const char * const name ) + { + output_filename = resize_buffer( output_filename, strlen( name ) + + strlen( known_extensions[0].from ) + 1 ); + strcpy( output_filename, name ); + strcat( output_filename, known_extensions[0].from ); + } + + +static void set_d_outname( const char * const name, const int i ) + { + if( i >= 0 ) + { + const char * const from = known_extensions[i].from; + if( strlen( name ) > strlen( from ) ) + { + output_filename = resize_buffer( output_filename, strlen( name ) + + strlen( known_extensions[0].to ) + 1 ); + strcpy( output_filename, name ); + strcpy( output_filename + strlen( name ) - strlen( from ), + known_extensions[i].to ); + return; + } + } + output_filename = resize_buffer( output_filename, strlen( name ) + 4 + 1 ); + strcpy( output_filename, name ); + strcat( output_filename, ".out" ); + if( verbosity >= 1 ) + fprintf( stderr, "%s: Can't guess original name for '%s' -- using '%s'.\n", + program_name, name, output_filename ); + } + + +static bool open_outstream( const bool force ) + { + int flags = O_CREAT | O_WRONLY | o_binary; + if( force ) flags |= O_TRUNC; else flags |= O_EXCL; + + outfd = open( output_filename, flags, outfd_mode ); + if( outfd < 0 && verbosity >= 0 ) + { + if( errno == EEXIST ) + fprintf( stderr, "%s: Output file '%s' already exists, skipping.\n", + program_name, output_filename ); + else + fprintf( stderr, "%s: Can't create output file '%s': %s.\n", + program_name, output_filename, strerror( errno ) ); + } + return ( outfd >= 0 ); + } + + +static bool check_tty( const int infd, const enum Mode program_mode ) + { + if( program_mode == m_compress && outfd >= 0 && isatty( outfd ) ) + { + show_error( "I won't write compressed data to a terminal.", 0, true ); + return false; + } + if( ( program_mode == m_decompress || program_mode == m_test ) && + isatty( infd ) ) + { + show_error( "I won't read compressed data from a terminal.", 0, true ); + return false; + } + return true; + } + + + /* Set permissions, owner and times. */ +static void close_and_set_permissions( const struct stat * const in_statsp ) + { + bool warning = false; + if( in_statsp ) + { + /* fchown will in many cases return with EPERM, which can be safely ignored. */ + if( ( fchown( outfd, in_statsp->st_uid, in_statsp->st_gid ) != 0 && + errno != EPERM ) || + fchmod( outfd, in_statsp->st_mode ) != 0 ) warning = true; + } + if( close( outfd ) != 0 ) cleanup_and_fail( 1 ); + outfd = -1; + delete_output_on_interrupt = false; + if( in_statsp ) + { + struct utimbuf t; + t.actime = in_statsp->st_atime; + t.modtime = in_statsp->st_mtime; + if( utime( output_filename, &t ) != 0 ) warning = true; + } + if( warning && verbosity >= 1 ) + show_error( "Can't change output file attributes.", 0, false ); + } + + +static int compress( const struct Lzma_options * const encoder_options, + const int infd, struct Pretty_print * const pp ) + { + int retval = 0; + File_header header; + CLzmaEncHandle encoder; + + if( verbosity >= 1 ) Pp_show_msg( pp, 0 ); + Fh_set_magic( header ); + if( !Fh_set_dictionary_size( header, encoder_options->dictionary_size ) || + encoder_options->match_len_limit < min_match_len_limit || + encoder_options->match_len_limit > max_match_len ) + internal_error( "invalid argument to encoder" ); + + encoder = LzmaEnc_Init( Fh_get_dictionary_size( header ), + encoder_options->match_len_limit, infd, outfd ); + if( !encoder ) + { + Pp_show_msg( pp, "Not enough memory. Try a smaller dictionary size" ); + return 1; + } + + if( writeblock( outfd, header, Fh_size ) != Fh_size ) + { show_error( "Can not write output file", errno, false ); retval = 1; } + else + if( LzmaEnc_Encode( encoder ) != 0 ) + { Pp_show_msg( pp, "Encoder error" ); retval = 1; } + LzmaEnc_Free( encoder ); + return retval; + } + + +#define IN_BUF_SIZE (1 << 16) +#define OUT_BUF_SIZE (1 << 16) + +static bool read_inbuf( const int infd, uint8_t inBuf[], + int * const inPos, int * const inSize ) + { + int rest; + if( *inPos >= *inSize ) *inSize = 0; + else if( *inPos > 0 ) + { + memmove( inBuf, inBuf + *inPos, *inSize - *inPos ); + *inSize -= *inPos; + } + *inPos = 0; + rest = IN_BUF_SIZE - *inSize; + if( rest > 0 ) + { + const int rd = readblock( infd, inBuf + *inSize, rest ); + if( rd < rest && errno ) + { show_error( "Read error", errno, false ); return false; } + *inSize += rd; + } + return true; + } + + +static int lzma_decode( uint64_t unpackSize, CLzmaDec *decoder, const int infd, + uint8_t inBuf[], int * const inPos, + int * const inSize, const bool testing ) + { + unsigned long long total_in = 13, total_out = 0; + uint8_t outBuf[OUT_BUF_SIZE]; + int outPos = 0; + const bool thereIsSize = (unpackSize != (uint64_t)-1); + + for (;;) + { + uint32_t inProcessed; + uint32_t outProcessed = OUT_BUF_SIZE - outPos; + ELzmaFinishMode finishMode = LZMA_FINISH_ANY; + ELzmaStatus status; + + if( *inPos >= *inSize && !read_inbuf( infd, inBuf, inPos, inSize ) ) + return 1; + inProcessed = *inSize - *inPos; + if (thereIsSize && outProcessed > unpackSize) + { + outProcessed = unpackSize; + finishMode = LZMA_FINISH_END; + } + + if( !LzmaDec_DecodeToBuf( decoder, outBuf + outPos, &outProcessed, + inBuf + *inPos, &inProcessed, finishMode, &status ) ) + { show_error( "Data error.", 0, false ); return 1; } + *inPos += inProcessed; + total_in += inProcessed; + outPos += outProcessed; + unpackSize -= outProcessed; + + if( outfd >= 0 && writeblock( outfd, outBuf, outPos ) != outPos ) + { show_error( "Can not write output file", errno, false ); return 1; } + + total_out += outPos; + outPos = 0; + + if( ( inProcessed == 0 && outProcessed == 0 ) || + ( thereIsSize && unpackSize == 0 ) ) + { + if( ( thereIsSize && unpackSize != 0 ) || + ( !thereIsSize && status != LZMA_STATUS_FINISHED_WITH_MARK ) ) + { show_error( "Data error.", 0, false ); return 1; } + if( verbosity >= 2 ) + fprintf( stderr, "lzma-alone, dictionary size %7sB. ", + format_num( decoder->dicBufSize ) ); + if( verbosity >= 3 ) + fprintf( stderr, "uncompressed size %9llu, compressed size %8llu. ", + total_out, total_in ); + if( verbosity >= 1 ) + { if( testing ) fprintf( stderr, "(apparently) ok\n" ); + else fprintf( stderr, "(apparently) done\n" ); } + return 0; + } + } + } + + +static int lzip_decode( CLzmaDec *decoder, const int infd, uint8_t inBuf[], + int * const inPos, int * const inSize, + const int member_version ) + { + unsigned long long total_in = Fh_size, total_out = 0; + uint8_t outBuf[OUT_BUF_SIZE]; + int outPos = 0; + uint32_t crc = 0xFFFFFFFFU; + + for (;;) + { + uint32_t inProcessed; + uint32_t outProcessed = OUT_BUF_SIZE - outPos; + ELzmaFinishMode finishMode = LZMA_FINISH_ANY; + ELzmaStatus status; + + if( *inPos >= *inSize && !read_inbuf( infd, inBuf, inPos, inSize ) ) + return 1; + if( *inPos >= *inSize ) + { show_error( "Unexpected EOF.", 0, false ); return 1; } + inProcessed = *inSize - *inPos; + + if( !LzmaDec_DecodeToBuf( decoder, outBuf + outPos, &outProcessed, + inBuf + *inPos, &inProcessed, finishMode, &status ) ) + { show_error( "Data error.", 0, false ); return 1; } + *inPos += inProcessed; + total_in += inProcessed; + outPos += outProcessed; + + if( outfd >= 0 && writeblock( outfd, outBuf, outPos ) != outPos ) + { show_error( "Can not write output file", errno, false ); return 1; } + + CRC32_update_buf( &crc, outBuf, outPos ); + total_out += outPos; + outPos = 0; + + if (inProcessed == 0 && outProcessed == 0) + { + File_trailer trailer; + int i; + const int trailer_size = Ft_versioned_size( member_version ); + bool error = false; + + if( status != LZMA_STATUS_FINISHED_WITH_MARK ) + { show_error( "Data error.", 0, false ); return 1; } + if( *inSize - *inPos < trailer_size && + !read_inbuf( infd, inBuf, inPos, inSize ) ) return 1; + if( *inSize - *inPos < trailer_size ) + { + error = true; + if( verbosity >= 0 ) + fprintf( stderr, "Trailer truncated at trailer position %u;" + " some checks may fail.\n", + (unsigned)(*inSize - *inPos) ); + for( i = *inSize - *inPos; i < trailer_size; ++i ) + inBuf[*inPos+i] = 0; + } + for( i = 0; i < trailer_size; ++i ) + trailer[i] = inBuf[(*inPos)++]; + total_in += trailer_size; + if( member_version == 0 ) Ft_set_member_size( trailer, total_in ); + if( Ft_get_data_crc( trailer ) != ( crc ^ 0xFFFFFFFFU ) ) + { + error = true; + if( verbosity >= 0 ) + fprintf( stderr, "CRC mismatch; trailer says %08X, data crc is %08X.\n", + Ft_get_data_crc( trailer ), crc ^ 0xFFFFFFFFU ); + } + if( Ft_get_data_size( trailer ) != total_out ) + { + error = true; + if( verbosity >= 0 ) + fprintf( stderr, "Data size mismatch; trailer says %llu, data size is %llu (0x%llX).\n", + Ft_get_data_size( trailer ), total_out, total_out ); + } + if( Ft_get_member_size( trailer ) != total_in ) + { + error = true; + if( verbosity >= 0 ) + fprintf( stderr, "Member size mismatch; trailer says %llu, member size is %llu (0x%llX).\n", + Ft_get_member_size( trailer ), total_in, total_in ); + } + if( !error && verbosity >= 3 && total_out > 0 && total_in > 0 ) + fprintf( stderr, "%6.3f:1, %6.3f bits/byte, %5.2f%% saved. ", + (double)total_out / total_in, + ( 8.0 * total_in ) / total_out, + 100.0 * ( 1.0 - ( (double)total_in / total_out ) ) ); + if( !error && verbosity >= 4 ) + fprintf( stderr, "data CRC %08X, data size %9llu, member size %8llu. ", + Ft_get_data_crc( trailer ), + Ft_get_data_size( trailer ), Ft_get_member_size( trailer ) ); + if( error ) return 2; + return 0; + } + } + } + + +static int decompress( const int infd, struct Pretty_print * const pp, + const bool testing ) + { + uint64_t unpackSize = 0; + CLzmaDec decoder; + uint8_t inBuf[IN_BUF_SIZE]; + int inPos = 0, inSize = 0; + int retval = 0; + bool lzip_mode = true; + bool first_member; + /* 5 bytes of LZMA properties and 8 bytes of uncompressed size */ + uint8_t raw_props[LZMA_PROPS_SIZE+8]; + + for( first_member = true; ; first_member = false ) + { + int i; + File_header header; + if( inSize - inPos < Fh_size && + !read_inbuf( infd, inBuf, &inPos, &inSize ) ) return 1; + if( inSize - inPos < Fh_size ) /* End Of File */ + { + if( first_member ) + { Pp_show_msg( pp, "Error reading member header" ); retval = 1; } + break; + } + for( i = 0; i < Fh_size; ++i ) raw_props[i] = header[i] = inBuf[inPos++]; + if( !Fh_verify_magic( header ) ) + { + if( !first_member ) break; /* trailing garbage */ + if( inSize - inPos >= 13 - Fh_size ) /* try lzma-alone */ + { + for( i = Fh_size; i < 13; ++i ) raw_props[i] = inBuf[inPos++]; + for( i = 0; i < 8; ++i ) + unpackSize += (uint64_t)raw_props[LZMA_PROPS_SIZE+i] << (i * 8); + if( ( raw_props[12] == 0 || raw_props[12] == 0xFF ) && + raw_props[12] == raw_props[11] && raw_props[0] < (9 * 5 * 5) ) + lzip_mode = false; + } + if( lzip_mode ) + { + Pp_show_msg( pp, "Bad magic number (file not in lzip format)" ); + retval = 2; break; + } + } + if( lzip_mode ) + { + int ds, i; + if( !Fh_verify_version( header ) ) + { + if( verbosity >= 0 ) + { Pp_show_msg( pp, 0 ); + fprintf( stderr, "Version %d member format not supported.\n", + Fh_version( header ) ); } + retval = 2; break; + } + if( Fh_get_dictionary_size( header ) < min_dictionary_size || + Fh_get_dictionary_size( header ) > max_dictionary_size ) + { Pp_show_msg( pp, "Invalid dictionary size in member header" ); + retval = 2; break; } + + raw_props[0] = 93; /* (45 * 2) + (9 * 0) + 3 */ + ds = Fh_get_dictionary_size( header ); + for( i = 1; i <= 4; ++i ) { raw_props[i] = ds & 0xFF; ds >>= 8; } + } + + if( verbosity >= 2 || ( verbosity == 1 && first_member ) ) + { + Pp_show_msg( pp, 0 ); + if( lzip_mode && verbosity >= 2 ) show_header( header ); + } + + if( !LzmaDec_Init( &decoder, raw_props ) ) + { + show_error( "Not enough memory. Find a machine with more memory.", 0, false ); + cleanup_and_fail( 1 ); + } + if( lzip_mode ) + retval = lzip_decode( &decoder, infd, inBuf, &inPos, &inSize, + Fh_version( header ) ); + else + retval = lzma_decode( unpackSize, &decoder, infd, + inBuf, &inPos, &inSize, testing ); + LzmaDec_Free(&decoder); + if( retval != 0 || !lzip_mode ) break; + if( verbosity >= 2 ) + { if( testing ) fprintf( stderr, "ok\n" ); + else fprintf( stderr, "done\n" ); Pp_reset( pp ); } + } + if( lzip_mode && verbosity == 1 && retval == 0 ) + { if( testing ) fprintf( stderr, "ok\n" ); + else fprintf( stderr, "done\n" ); } + return retval; + } + + +void signal_handler( int sig ) + { + if( sig ) {} /* keep compiler happy */ + show_error( "Control-C or similar caught, quitting.", 0, false ); + cleanup_and_fail( 1 ); + } + + +static void set_signals( void ) + { + signal( SIGHUP, signal_handler ); + signal( SIGINT, signal_handler ); + signal( SIGTERM, signal_handler ); + } + + +void Pp_init( struct Pretty_print * const pp, const char * const filenames[], + const int num_filenames, const int v ) + { + unsigned stdin_name_len; + int i; + pp->name = 0; + pp->stdin_name = "(stdin)"; + pp->longest_name = 0; + pp->verbosity = v; + pp->first_post = false; + stdin_name_len = strlen( pp->stdin_name ); + + for( i = 0; i < num_filenames; ++i ) + { + const char * const s = filenames[i]; + const int len = ( (strcmp( s, "-" ) == 0) ? stdin_name_len : strlen( s ) ); + if( len > pp->longest_name ) pp->longest_name = len; + } + if( pp->longest_name == 0 ) pp->longest_name = stdin_name_len; + } + + +void Pp_show_msg( struct Pretty_print * const pp, const char * const msg ) + { + if( verbosity >= 0 ) + { + if( pp->first_post ) + { + int i, len; + pp->first_post = false; + fprintf( stderr, " %s: ", pp->name ); + len = pp->longest_name - strlen( pp->name ); + for( i = 0; i < len; ++i ) fprintf( stderr, " " ); + if( !msg ) fflush( stderr ); + } + if( msg ) fprintf( stderr, "%s.\n", msg ); + } + } + + +void show_error( const char * const msg, const int errcode, const bool help ) + { + if( verbosity >= 0 ) + { + if( msg && msg[0] ) + { + fprintf( stderr, "%s: %s", program_name, msg ); + if( errcode > 0 ) fprintf( stderr, ": %s", strerror( errcode ) ); + fprintf( stderr, "\n" ); + } + if( help ) + fprintf( stderr, "Try '%s --help' for more information.\n", + invocation_name ); + } + } + + +void internal_error( const char * const msg ) + { + if( verbosity >= 0 ) + fprintf( stderr, "%s: internal error: %s.\n", program_name, msg ); + exit( 3 ); + } + + +int main( const int argc, const char * const argv[] ) + { + /* Mapping from gzip/bzip2 style 1..9 compression modes + to the corresponding LZMA compression modes. */ + const struct Lzma_options option_mapping[] = + { + { 1 << 20, 5 }, /* -0 */ + { 1 << 20, 5 }, /* -1 */ + { 3 << 19, 6 }, /* -2 */ + { 1 << 21, 8 }, /* -3 */ + { 3 << 20, 12 }, /* -4 */ + { 1 << 22, 20 }, /* -5 */ + { 1 << 23, 36 }, /* -6 */ + { 1 << 24, 68 }, /* -7 */ + { 3 << 23, 132 }, /* -8 */ + { 1 << 25, 273 } }; /* -9 */ + struct Lzma_options encoder_options = option_mapping[6]; /* default = "-6" */ + const char * input_filename = ""; + const char * default_output_filename = ""; + const char ** filenames = 0; + int num_filenames = 0; + int infd = -1; + enum Mode program_mode = m_compress; + int argind = 0; + int retval = 0; + int i; + bool filenames_given = false; + bool force = false; + bool keep_input_files = false; + bool recompress = false; + bool to_stdout = false; + struct Pretty_print pp; + + const struct ap_Option options[] = + { + { '0', "fast", ap_no }, + { '1', 0, ap_no }, + { '2', 0, ap_no }, + { '3', 0, ap_no }, + { '4', 0, ap_no }, + { '5', 0, ap_no }, + { '6', 0, ap_no }, + { '7', 0, ap_no }, + { '8', 0, ap_no }, + { '9', "best", ap_no }, + { 'b', "member-size", ap_yes }, + { 'c', "stdout", ap_no }, + { 'd', "decompress", ap_no }, + { 'f', "force", ap_no }, + { 'F', "recompress", ap_no }, + { 'h', "help", ap_no }, + { 'k', "keep", ap_no }, + { 'm', "match-length", ap_yes }, + { 'o', "output", ap_yes }, + { 'q', "quiet", ap_no }, + { 's', "dictionary-size", ap_yes }, + { 'S', "volume-size", ap_yes }, + { 't', "test", ap_no }, + { 'v', "verbose", ap_no }, + { 'V', "version", ap_no }, + { 0 , 0, ap_no } }; + + struct Arg_parser parser; + + invocation_name = argv[0]; + CRC32_init(); + + if( !ap_init( &parser, argc, argv, options, 0 ) ) + { show_error( "Memory exhausted.", 0, false ); return 1; } + if( ap_error( &parser ) ) /* bad option */ + { show_error( ap_error( &parser ), 0, true ); return 1; } + + for( ; argind < ap_arguments( &parser ); ++argind ) + { + const int code = ap_code( &parser, argind ); + const char * const arg = ap_argument( &parser, argind ); + if( !code ) break; /* no more options */ + switch( code ) + { + case '0': + case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + encoder_options = option_mapping[code-'0']; break; + case 'b': break; + case 'c': to_stdout = true; break; + case 'd': program_mode = m_decompress; break; + case 'f': force = true; break; + case 'F': recompress = true; break; + case 'h': show_help(); return 0; + case 'k': keep_input_files = true; break; + case 'm': encoder_options.match_len_limit = + getnum( arg, min_match_len_limit, max_match_len ); break; + case 'o': default_output_filename = arg; break; + case 'q': verbosity = -1; break; + case 's': encoder_options.dictionary_size = get_dict_size( arg ); + break; + case 'S': break; + case 't': program_mode = m_test; break; + case 'v': if( verbosity < 4 ) ++verbosity; break; + case 'V': show_version(); return 0; + default : internal_error( "uncaught option" ); + } + } /* end process options */ + +#if defined(__MSVCRT__) || defined(__OS2__) + setmode( STDIN_FILENO, O_BINARY ); + setmode( STDOUT_FILENO, O_BINARY ); +#endif + + if( program_mode == m_test ) + outfd = -1; + + num_filenames = max( 1, ap_arguments( &parser ) - argind ); + filenames = resize_buffer( filenames, num_filenames * sizeof filenames[0] ); + filenames[0] = "-"; + + for( i = 0; argind + i < ap_arguments( &parser ); ++i ) + { + filenames[i] = ap_argument( &parser, argind + i ); + if( strcmp( filenames[i], "-" ) != 0 ) filenames_given = true; + } + + if( !to_stdout && program_mode != m_test && + ( filenames_given || default_output_filename[0] ) ) + set_signals(); + + Pp_init( &pp, filenames, num_filenames, verbosity ); + + output_filename = resize_buffer( output_filename, 1 ); + for( i = 0; i < num_filenames; ++i ) + { + int tmp; + struct stat in_stats; + const struct stat * in_statsp; + output_filename[0] = 0; + + if( !filenames[i][0] || strcmp( filenames[i], "-" ) == 0 ) + { + input_filename = ""; + infd = STDIN_FILENO; + if( program_mode != m_test ) + { + if( to_stdout || !default_output_filename[0] ) + outfd = STDOUT_FILENO; + else + { + if( program_mode == m_compress ) + set_c_outname( default_output_filename ); + else + { + output_filename = resize_buffer( output_filename, + strlen( default_output_filename ) + 1 ); + strcpy( output_filename, default_output_filename ); + } + outfd_mode = all_rw; + if( !open_outstream( force ) ) + { + if( retval < 1 ) retval = 1; + close( infd ); infd = -1; + continue; + } + } + } + } + else + { + const int eindex = extension_index( filenames[i] ); + input_filename = filenames[i]; + infd = open_instream( input_filename, &in_stats, program_mode, + eindex, recompress, to_stdout ); + if( infd < 0 ) { if( retval < 1 ) retval = 1; continue; } + if( program_mode != m_test ) + { + if( to_stdout ) outfd = STDOUT_FILENO; + else + { + if( program_mode == m_compress ) + set_c_outname( input_filename ); + else set_d_outname( input_filename, eindex ); + outfd_mode = usr_rw; + if( !open_outstream( force ) ) + { + if( retval < 1 ) retval = 1; + close( infd ); infd = -1; + continue; + } + } + } + } + + if( !check_tty( infd, program_mode ) ) return 1; + + if( output_filename[0] && !to_stdout && program_mode != m_test ) + delete_output_on_interrupt = true; + in_statsp = input_filename[0] ? &in_stats : 0; + Pp_set_name( &pp, input_filename ); + if( program_mode == m_compress ) + tmp = compress( &encoder_options, infd, &pp ); + else + tmp = decompress( infd, &pp, program_mode == m_test ); + if( tmp > retval ) retval = tmp; + if( tmp && program_mode != m_test ) cleanup_and_fail( retval ); + + if( delete_output_on_interrupt ) + close_and_set_permissions( in_statsp ); + if( input_filename[0] ) + { + close( infd ); infd = -1; + if( !keep_input_files && !to_stdout && program_mode != m_test ) + remove( input_filename ); + } + } + if( outfd >= 0 && close( outfd ) != 0 ) + { + show_error( "Can't close stdout", errno, false ); + if( retval < 1 ) retval = 1; + } + free( output_filename ); + free( filenames ); + ap_free( &parser ); + return retval; + } diff --git a/pdarg_parser.c b/pdarg_parser.c deleted file mode 100644 index 71dc5d0..0000000 --- a/pdarg_parser.c +++ /dev/null @@ -1,277 +0,0 @@ -/* Pdlzip - Data compressor based on the LZMA algorithm - Copyright (C) 2010, 2011, 2012 Antonio Diaz Diaz. - - This program is free software: you have unlimited permission - to copy, distribute and modify it. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -*/ - -#include -#include - -#include "pdarg_parser.h" - - -/* assure at least a minimum size for buffer 'buf' */ -static void * ap_resize_buffer( void * buf, const int min_size ) - { - if( buf ) buf = realloc( buf, min_size ); - else buf = malloc( min_size ); - return buf; - } - - -static char push_back_record( struct Arg_parser * const ap, - const int code, const char * const argument ) - { - const int len = strlen( argument ); - struct ap_Record *p; - void * tmp = ap_resize_buffer( ap->data, - ( ap->data_size + 1 ) * sizeof (struct ap_Record) ); - if( !tmp ) return 0; - ap->data = (struct ap_Record *)tmp; - p = &(ap->data[ap->data_size]); - p->code = code; - p->argument = 0; - tmp = ap_resize_buffer( p->argument, len + 1 ); - if( !tmp ) return 0; - p->argument = (char *)tmp; - strncpy( p->argument, argument, len + 1 ); - ++ap->data_size; - return 1; - } - - -static char add_error( struct Arg_parser * const ap, const char * const msg ) - { - const int len = strlen( msg ); - void * tmp = ap_resize_buffer( ap->error, ap->error_size + len + 1 ); - if( !tmp ) return 0; - ap->error = (char *)tmp; - strncpy( ap->error + ap->error_size, msg, len + 1 ); - ap->error_size += len; - return 1; - } - - -static void free_data( struct Arg_parser * const ap ) - { - int i; - for( i = 0; i < ap->data_size; ++i ) free( ap->data[i].argument ); - if( ap->data ) { free( ap->data ); ap->data = 0; } - ap->data_size = 0; - } - - -static char parse_long_option( struct Arg_parser * const ap, - const char * const opt, const char * const arg, - const struct ap_Option options[], - int * const argindp ) - { - unsigned int len; - int index = -1; - int i; - char exact = 0, ambig = 0; - - for( len = 0; opt[len+2] && opt[len+2] != '='; ++len ) ; - - /* Test all long options for either exact match or abbreviated matches. */ - for( i = 0; options[i].code != 0; ++i ) - if( options[i].name && !strncmp( options[i].name, &opt[2], len ) ) - { - if( strlen( options[i].name ) == len ) /* Exact match found */ - { index = i; exact = 1; break; } - else if( index < 0 ) index = i; /* First nonexact match found */ - else if( options[index].code != options[i].code || - options[index].has_arg != options[i].has_arg ) - ambig = 1; /* Second or later nonexact match found */ - } - - if( ambig && !exact ) - { - add_error( ap, "option '" ); add_error( ap, opt ); - add_error( ap, "' is ambiguous" ); - return 1; - } - - if( index < 0 ) /* nothing found */ - { - add_error( ap, "unrecognized option '" ); add_error( ap, opt ); - add_error( ap, "'" ); - return 1; - } - - ++*argindp; - - if( opt[len+2] ) /* '--=' syntax */ - { - if( options[index].has_arg == ap_no ) - { - add_error( ap, "option '--" ); add_error( ap, options[index].name ); - add_error( ap, "' doesn't allow an argument" ); - return 1; - } - if( options[index].has_arg == ap_yes && !opt[len+3] ) - { - add_error( ap, "option '--" ); add_error( ap, options[index].name ); - add_error( ap, "' requires an argument" ); - return 1; - } - return push_back_record( ap, options[index].code, &opt[len+3] ); - } - - if( options[index].has_arg == ap_yes ) - { - if( !arg || !arg[0] ) - { - add_error( ap, "option '--" ); add_error( ap, options[index].name ); - add_error( ap, "' requires an argument" ); - return 1; - } - ++*argindp; - return push_back_record( ap, options[index].code, arg ); - } - - return push_back_record( ap, options[index].code, "" ); - } - - -static char parse_short_option( struct Arg_parser * const ap, - const char * const opt, const char * const arg, - const struct ap_Option options[], - int * const argindp ) - { - int cind = 1; /* character index in opt */ - - while( cind > 0 ) - { - int index = -1; - int i; - const unsigned char code = opt[cind]; - char code_str[2]; - code_str[0] = code; code_str[1] = 0; - - if( code != 0 ) - for( i = 0; options[i].code; ++i ) - if( code == options[i].code ) - { index = i; break; } - - if( index < 0 ) - { - add_error( ap, "invalid option -- " ); add_error( ap, code_str ); - return 1; - } - - if( opt[++cind] == 0 ) { ++*argindp; cind = 0; } /* opt finished */ - - if( options[index].has_arg != ap_no && cind > 0 && opt[cind] ) - { - if( !push_back_record( ap, code, &opt[cind] ) ) return 0; - ++*argindp; cind = 0; - } - else if( options[index].has_arg == ap_yes ) - { - if( !arg || !arg[0] ) - { - add_error( ap, "option requires an argument -- " ); - add_error( ap, code_str ); - return 1; - } - ++*argindp; cind = 0; - if( !push_back_record( ap, code, arg ) ) return 0; - } - else if( !push_back_record( ap, code, "" ) ) return 0; - } - return 1; - } - - -char ap_init( struct Arg_parser * const ap, - const int argc, const char * const argv[], - const struct ap_Option options[], const char in_order ) - { - const char ** non_options = 0; /* skipped non-options */ - int non_options_size = 0; /* number of skipped non-options */ - int argind = 1; /* index in argv */ - int i; - - ap->data = 0; - ap->error = 0; - ap->data_size = 0; - ap->error_size = 0; - if( argc < 2 || !argv || !options ) return 1; - - while( argind < argc ) - { - const unsigned char ch1 = argv[argind][0]; - const unsigned char ch2 = ( ch1 ? argv[argind][1] : 0 ); - - if( ch1 == '-' && ch2 ) /* we found an option */ - { - const char * const opt = argv[argind]; - const char * const arg = (argind + 1 < argc) ? argv[argind+1] : 0; - if( ch2 == '-' ) - { - if( !argv[argind][2] ) { ++argind; break; } /* we found "--" */ - else if( !parse_long_option( ap, opt, arg, options, &argind ) ) return 0; - } - else if( !parse_short_option( ap, opt, arg, options, &argind ) ) return 0; - if( ap->error ) break; - } - else - { - if( !in_order ) - { - void * tmp = ap_resize_buffer( non_options, - ( non_options_size + 1 ) * sizeof *non_options ); - if( !tmp ) return 0; - non_options = (const char **)tmp; - non_options[non_options_size++] = argv[argind++]; - } - else if( !push_back_record( ap, 0, argv[argind++] ) ) return 0; - } - } - if( ap->error ) free_data( ap ); - else - { - for( i = 0; i < non_options_size; ++i ) - if( !push_back_record( ap, 0, non_options[i] ) ) return 0; - while( argind < argc ) - if( !push_back_record( ap, 0, argv[argind++] ) ) return 0; - } - if( non_options ) free( non_options ); - return 1; - } - - -void ap_free( struct Arg_parser * const ap ) - { - free_data( ap ); - if( ap->error ) { free( ap->error ); ap->error = 0; } - ap->error_size = 0; - } - - -const char * ap_error( const struct Arg_parser * const ap ) - { return ap->error; } - - -int ap_arguments( const struct Arg_parser * const ap ) - { return ap->data_size; } - - -int ap_code( const struct Arg_parser * const ap, const int i ) - { - if( i >= 0 && i < ap_arguments( ap ) ) return ap->data[i].code; - else return 0; - } - - -const char * ap_argument( const struct Arg_parser * const ap, const int i ) - { - if( i >= 0 && i < ap_arguments( ap ) ) return ap->data[i].argument; - else return ""; - } diff --git a/pdarg_parser.h b/pdarg_parser.h deleted file mode 100644 index 020c57c..0000000 --- a/pdarg_parser.h +++ /dev/null @@ -1,85 +0,0 @@ -/* Pdlzip - Data compressor based on the LZMA algorithm - Copyright (C) 2010, 2011, 2012 Antonio Diaz Diaz. - - This program is free software: you have unlimited permission - to copy, distribute and modify it. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -*/ - -/* Arg_parser reads the arguments in 'argv' and creates a number of - option codes, option arguments and non-option arguments. - - In case of error, 'ap_error' returns a non-null pointer to an error - message. - - 'options' is an array of 'struct ap_Option' terminated by an element - containing a code which is zero. A null name means a short-only - option. A code value outside the unsigned char range means a - long-only option. - - Arg_parser normally makes it appear as if all the option arguments - were specified before all the non-option arguments for the purposes - of parsing, even if the user of your program intermixed option and - non-option arguments. If you want the arguments in the exact order - the user typed them, call 'ap_init' with 'in_order' = true. - - The argument '--' terminates all options; any following arguments are - treated as non-option arguments, even if they begin with a hyphen. - - The syntax for optional option arguments is '-' - (without whitespace), or '--='. -*/ - -#ifdef __cplusplus -extern "C" { -#endif - -enum ap_Has_arg { ap_no, ap_yes, ap_maybe }; - -struct ap_Option - { - int code; /* Short option letter or code ( code != 0 ) */ - const char * name; /* Long option name (maybe null) */ - enum ap_Has_arg has_arg; - }; - - -struct ap_Record - { - int code; - char * argument; - }; - - -struct Arg_parser - { - struct ap_Record * data; - char * error; - int data_size; - int error_size; - }; - - -char ap_init( struct Arg_parser * const ap, - const int argc, const char * const argv[], - const struct ap_Option options[], const char in_order ); - -void ap_free( struct Arg_parser * const ap ); - -const char * ap_error( const struct Arg_parser * const ap ); - - /* The number of arguments parsed (may be different from argc) */ -int ap_arguments( const struct Arg_parser * const ap ); - - /* If ap_code( i ) is 0, ap_argument( i ) is a non-option. - Else ap_argument( i ) is the option's argument (or empty). */ -int ap_code( const struct Arg_parser * const ap, const int i ); - -const char * ap_argument( const struct Arg_parser * const ap, const int i ); - -#ifdef __cplusplus -} -#endif diff --git a/pdlzip.h b/pdlzip.h deleted file mode 100644 index 6229d7c..0000000 --- a/pdlzip.h +++ /dev/null @@ -1,190 +0,0 @@ -/* Pdlzip - Data compressor based on the LZMA algorithm - Copyright (C) 2010, 2011, 2012 Antonio Diaz Diaz. - - This program is free software: you have unlimited permission - to copy, distribute and modify it. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -*/ - -#ifndef __cplusplus -enum Bool { false = 0, true = 1 }; -typedef enum Bool bool; -#endif - -#ifndef max - #define max(x,y) ((x) >= (y) ? (x) : (y)) -#endif -#ifndef min - #define min(x,y) ((x) <= (y) ? (x) : (y)) -#endif - -enum { - min_dictionary_bits = 12, - min_dictionary_size = 1 << min_dictionary_bits, - max_dictionary_bits = 26, - max_dictionary_size = 1 << max_dictionary_bits, - literal_context_bits = 3, - pos_state_bits = 2, - - len_low_bits = 3, - len_mid_bits = 3, - len_high_bits = 8, - len_low_symbols = 1 << len_low_bits, - len_mid_symbols = 1 << len_mid_bits, - len_high_symbols = 1 << len_high_bits, - max_len_symbols = len_low_symbols + len_mid_symbols + len_high_symbols, - - min_match_len = 2, /* must be 2 */ - max_match_len = min_match_len + max_len_symbols - 1, /* 273 */ - min_match_len_limit = 5 }; - -typedef uint32_t CRC32[256]; /* Table of CRCs of all 8-bit messages. */ - -extern CRC32 crc32; - -static inline void CRC32_init() - { - unsigned int n; - for( n = 0; n < 256; ++n ) - { - unsigned int c = n; - int k; - for( k = 0; k < 8; ++k ) - { if( c & 1 ) c = 0xEDB88320U ^ ( c >> 1 ); else c >>= 1; } - crc32[n] = c; - } - } - -static inline void CRC32_update_byte( uint32_t * crc, const uint8_t byte ) - { *crc = crc32[(*crc^byte)&0xFF] ^ ( *crc >> 8 ); } -static inline void CRC32_update_buf( uint32_t * crc, const uint8_t * const buffer, - const int size ) - { - int i; - for( i = 0; i < size; ++i ) - *crc = crc32[(*crc^buffer[i])&0xFF] ^ ( *crc >> 8 ); - } - - -static inline int real_bits( const int value ) - { - int bits = 0, i, mask; - for( i = 1, mask = 1; mask > 0; ++i, mask <<= 1 ) - if( value & mask ) bits = i; - return bits; - } - - -static const uint8_t magic_string[4] = { 'L', 'Z', 'I', 'P' }; - -typedef uint8_t File_header[6]; /* 0-3 magic bytes */ - /* 4 version */ - /* 5 coded_dict_size */ -enum { Fh_size = 6 }; - -static inline void Fh_set_magic( File_header data ) - { - memcpy( data, magic_string, 4 ); - data[4] = 1; - } - -static inline bool Fh_verify_magic( const File_header data ) - { - return ( memcmp( data, magic_string, 4 ) == 0 ); - } - -static inline uint8_t Fh_version( const File_header data ) - { return data[4]; } - -static inline bool Fh_verify_version( const File_header data ) - { return ( data[4] <= 1 ); } - -static inline int Fh_get_dictionary_size( const File_header data ) - { - int sz = ( 1 << ( data[5] & 0x1F ) ); - if( sz > min_dictionary_size && sz <= max_dictionary_size ) - sz -= ( sz / 16 ) * ( ( data[5] >> 5 ) & 0x07 ); - return sz; - } - -static inline bool Fh_set_dictionary_size( File_header data, const int sz ) - { - if( sz >= min_dictionary_size && sz <= max_dictionary_size ) - { - data[5] = real_bits( sz - 1 ); - if( sz > min_dictionary_size ) - { - const int base_size = 1 << data[5]; - const int wedge = base_size / 16; - int i; - for( i = 7; i >= 1; --i ) - if( base_size - ( i * wedge ) >= sz ) - { data[5] |= ( i << 5 ); break; } - } - return true; - } - return false; - } - - -typedef uint8_t File_trailer[20]; - /* 0-3 CRC32 of the uncompressed data */ - /* 4-11 size of the uncompressed data */ - /* 12-19 member size including header and trailer */ - -enum { Ft_size = 20 }; - -static inline int Ft_versioned_size( const int version ) - { return ( ( version >= 1 ) ? 20 : 12 ); } - -static inline uint32_t Ft_get_data_crc( const File_trailer data ) - { - uint32_t tmp = 0; - int i; - for( i = 3; i >= 0; --i ) { tmp <<= 8; tmp += data[i]; } - return tmp; - } - -static inline void Ft_set_data_crc( File_trailer data, uint32_t crc ) - { - int i; - for( i = 0; i <= 3; ++i ) { data[i] = (uint8_t)crc; crc >>= 8; } - } - -static inline long long Ft_get_data_size( const File_trailer data ) - { - long long tmp = 0; - int i; - for( i = 11; i >= 4; --i ) { tmp <<= 8; tmp += data[i]; } - return tmp; - } - -static inline void Ft_set_data_size( File_trailer data, long long sz ) - { - int i; - for( i = 4; i <= 11; ++i ) { data[i] = (uint8_t)sz; sz >>= 8; } - } - -static inline long long Ft_get_member_size( const File_trailer data ) - { - long long tmp = 0; - int i; - for( i = 19; i >= 12; --i ) { tmp <<= 8; tmp += data[i]; } - return tmp; - } - -static inline void Ft_set_member_size( File_trailer data, long long sz ) - { - int i; - for( i = 12; i <= 19; ++i ) { data[i] = (uint8_t)sz; sz >>= 8; } - } - - -/* defined in main.c */ -extern int verbosity; - -void show_error( const char * const msg, const int errcode, const bool help ); -void internal_error( const char * const msg ); diff --git a/testsuite/check.sh b/testsuite/check.sh index ea7a6cc..bd7bf3a 100755 --- a/testsuite/check.sh +++ b/testsuite/check.sh @@ -1,6 +1,6 @@ #! /bin/sh # check script for Pdlzip - Data compressor based on the LZMA algorithm -# Copyright (C) 2010, 2011, 2012 Antonio Diaz Diaz. +# Copyright (C) 2010, 2011, 2012, 2013 Antonio Diaz Diaz. # # This script is free software: you have unlimited permission # to copy, distribute and modify it. @@ -26,18 +26,32 @@ fail=0 printf "testing pdlzip-%s..." "$2" -"${LZIP}" -t "${testdir}"/test.lz || fail=1 -printf . -"${LZIP}" -cd "${testdir}"/test.lz > copy || fail=1 +"${LZIP}" -t "${testdir}"/test.txt.lz || fail=1 +"${LZIP}" -cd "${testdir}"/test.txt.lz > copy || fail=1 cmp in copy || fail=1 printf . -"${LZIP}" -t "${testdir}"/test.lzma || fail=1 +"${LZIP}" -cfq "${testdir}"/test.txt.lz > out +if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi +"${LZIP}" -cF "${testdir}"/test.txt.lz > out || fail=1 +"${LZIP}" -cd out | "${LZIP}" -d > copy || fail=1 +cmp in copy || fail=1 printf . -"${LZIP}" -cd "${testdir}"/test.lzma > copy || fail=1 + +"${LZIP}" -t "${testdir}"/test.txt.lzma || fail=1 +"${LZIP}" -cd "${testdir}"/test.txt.lzma > copy || fail=1 cmp in copy || fail=1 printf . +"${LZIP}" -cqs-1 in > out +if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi +"${LZIP}" -cqs0 in > out +if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi +"${LZIP}" -cqs4095 in > out +if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi +"${LZIP}" -cqm274 in > out +if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi + for i in s4Ki 0 1 2 3 4 5 6 7 8s16 9s16 ; do "${LZIP}" -k -$i in || fail=1 mv -f in.lz copy.lz || fail=1 @@ -62,11 +76,24 @@ for i in s4Ki 0 1 2 3 4 5 6 7 8s16 9s16 ; do printf . done +for i in s4Ki 0 1 2 3 4 5 6 7 8s16 9s16 ; do + "${LZIP}" -f -$i -o out < in || fail=1 + "${LZIP}" -df -o copy < out.lz || fail=1 + cmp in copy || fail=1 + printf . +done + "${LZIP}" < in > anyothername || fail=1 "${LZIP}" -d anyothername || fail=1 cmp in anyothername.out || fail=1 printf . +cat in in > in2 || framework_failure +"${LZIP}" < in2 > out2 || fail=1 +"${LZIP}" -d < out2 > copy2 || fail=1 +cmp in2 copy2 || fail=1 +printf . + echo if [ ${fail} = 0 ] ; then echo "tests completed successfully." diff --git a/testsuite/test.lz b/testsuite/test.lz deleted file mode 100644 index a09b1e8..0000000 Binary files a/testsuite/test.lz and /dev/null differ diff --git a/testsuite/test.lzma b/testsuite/test.lzma deleted file mode 100644 index 5f62956..0000000 Binary files a/testsuite/test.lzma and /dev/null differ diff --git a/testsuite/test.txt.lz b/testsuite/test.txt.lz new file mode 100644 index 0000000..4db881a Binary files /dev/null and b/testsuite/test.txt.lz differ diff --git a/testsuite/test.txt.lzma b/testsuite/test.txt.lzma new file mode 100644 index 0000000..76f2605 Binary files /dev/null and b/testsuite/test.txt.lzma differ -- cgit v1.2.3