From 26a029d407be480d791972afb5975cf62c9360a6 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Fri, 19 Apr 2024 02:47:55 +0200 Subject: Adding upstream version 124.0.1. Signed-off-by: Daniel Baumann --- other-licenses/7zstub/src/DOC/7zC.txt | 187 ++++ other-licenses/7zstub/src/DOC/7zFormat.txt | 469 ++++++++ other-licenses/7zstub/src/DOC/Methods.txt | 167 +++ other-licenses/7zstub/src/DOC/installer.txt | 166 +++ other-licenses/7zstub/src/DOC/lzma-history.txt | 424 +++++++ other-licenses/7zstub/src/DOC/lzma-sdk.txt | 357 ++++++ .../7zstub/src/DOC/lzma-specification.txt | 1176 ++++++++++++++++++++ other-licenses/7zstub/src/DOC/lzma.txt | 328 ++++++ 8 files changed, 3274 insertions(+) create mode 100644 other-licenses/7zstub/src/DOC/7zC.txt create mode 100644 other-licenses/7zstub/src/DOC/7zFormat.txt create mode 100644 other-licenses/7zstub/src/DOC/Methods.txt create mode 100644 other-licenses/7zstub/src/DOC/installer.txt create mode 100644 other-licenses/7zstub/src/DOC/lzma-history.txt create mode 100644 other-licenses/7zstub/src/DOC/lzma-sdk.txt create mode 100644 other-licenses/7zstub/src/DOC/lzma-specification.txt create mode 100644 other-licenses/7zstub/src/DOC/lzma.txt (limited to 'other-licenses/7zstub/src/DOC') diff --git a/other-licenses/7zstub/src/DOC/7zC.txt b/other-licenses/7zstub/src/DOC/7zC.txt new file mode 100644 index 0000000000..49276787f8 --- /dev/null +++ b/other-licenses/7zstub/src/DOC/7zC.txt @@ -0,0 +1,187 @@ +7z ANSI-C Decoder 9.35 +---------------------- + +7z ANSI-C provides 7z/LZMA decoding. +7z ANSI-C version is simplified version ported from C++ code. + +LZMA is default and general compression method of 7z format +in 7-Zip compression program (www.7-zip.org). LZMA provides high +compression ratio and very fast decompression. + + +LICENSE +------- + +7z ANSI-C Decoder is part of the LZMA SDK. +LZMA SDK is written and placed in the public domain by Igor Pavlov. + +Files +--------------------- + +7zDecode.* - Low level 7z decoding +7zExtract.* - High level 7z decoding +7zHeader.* - .7z format constants +7zIn.* - .7z archive opening +7zItem.* - .7z structures +7zMain.c - Test application + + +How To Use +---------- + +You can create .7z archive with 7z.exe, 7za.exe or 7zr.exe: + + 7z.exe a archive.7z *.htm -r -mx -m0fb=255 + +If you have big number of files in archive, and you need fast extracting, +you can use partly-solid archives: + + 7za.exe a archive.7z *.htm -ms=512K -r -mx -m0fb=255 -m0d=512K + +In that example 7-Zip will use 512KB solid blocks. So it needs to decompress only +512KB for extracting one file from such archive. + + +Limitations of current version of 7z ANSI-C Decoder +--------------------------------------------------- + + - It reads only "FileName", "Size", "LastWriteTime" and "CRC" information for each file in archive. + - It supports only LZMA and Copy (no compression) methods with BCJ or BCJ2 filters. + - It converts original UTF-16 Unicode file names to UTF-8 Unicode file names. + +These limitations will be fixed in future versions. + + +Using 7z ANSI-C Decoder Test application: +----------------------------------------- + +Usage: 7zDec + +: + e: Extract files from archive + l: List contents of archive + t: Test integrity of archive + +Example: + + 7zDec l archive.7z + +lists contents of archive.7z + + 7zDec e archive.7z + +extracts files from archive.7z to current folder. + + +How to use .7z Decoder +---------------------- + +Memory allocation +~~~~~~~~~~~~~~~~~ + +7z Decoder uses two memory pools: +1) Temporary pool +2) Main pool +Such scheme can allow you to avoid fragmentation of allocated blocks. + + +Steps for using 7z decoder +-------------------------- + +Use code at 7zMain.c as example. + +1) Declare variables: + inStream /* implements ILookInStream interface */ + CSzArEx db; /* 7z archive database structure */ + ISzAlloc allocImp; /* memory functions for main pool */ + ISzAlloc allocTempImp; /* memory functions for temporary pool */ + +2) call CrcGenerateTable(); function to initialize CRC structures. + +3) call SzArEx_Init(&db); function to initialize db structures. + +4) call SzArEx_Open(&db, inStream, &allocMain, &allocTemp) to open archive + +This function opens archive "inStream" and reads headers to "db". +All items in "db" will be allocated with "allocMain" functions. +SzArEx_Open function allocates and frees temporary structures by "allocTemp" functions. + +5) List items or Extract items + + Listing code: + ~~~~~~~~~~~~~ + + Use SzArEx_GetFileNameUtf16 function. Look example code in C\Util\7z\7zMain.c file. + + + Extracting code: + ~~~~~~~~~~~~~~~~ + + SZ_RESULT SzAr_Extract( + CArchiveDatabaseEx *db, + ILookInStream *inStream, + UInt32 fileIndex, /* index of file */ + UInt32 *blockIndex, /* index of solid block */ + Byte **outBuffer, /* pointer to pointer to output buffer (allocated with allocMain) */ + size_t *outBufferSize, /* buffer size for output buffer */ + size_t *offset, /* offset of stream for required file in *outBuffer */ + size_t *outSizeProcessed, /* size of file in *outBuffer */ + ISzAlloc *allocMain, + ISzAlloc *allocTemp); + + If you need to decompress more than one file, you can send these values from previous call: + blockIndex, + outBuffer, + outBufferSize, + You can consider "outBuffer" as cache of solid block. If your archive is solid, + it will increase decompression speed. + + After decompressing you must free "outBuffer": + allocImp.Free(outBuffer); + +6) call SzArEx_Free(&db, allocImp.Free) to free allocated items in "db". + + + + +Memory requirements for .7z decoding +------------------------------------ + +Memory usage for Archive opening: + - Temporary pool: + - Memory for uncompressed .7z headers + - some other temporary blocks + - Main pool: + - Memory for database: + Estimated size of one file structures in solid archive: + - Size (4 or 8 Bytes) + - CRC32 (4 bytes) + - LastWriteTime (8 bytes) + - Some file information (4 bytes) + - File Name (variable length) + pointer + allocation structures + +Memory usage for archive Decompressing: + - Temporary pool: + - Memory for LZMA decompressing structures + - Main pool: + - Memory for decompressed solid block + - Memory for temprorary buffers, if BCJ2 fileter is used. Usually these + temprorary buffers can be about 15% of solid block size. + + +7z Decoder doesn't allocate memory for compressed blocks. +Instead of this, you must allocate buffer with desired +size before calling 7z Decoder. Use 7zMain.c as example. + + +Defines +------- + +_SZ_ALLOC_DEBUG - define it if you want to debug alloc/free operations to stderr. + + +--- + +http://www.7-zip.org +http://www.7-zip.org/sdk.html +http://www.7-zip.org/support.html diff --git a/other-licenses/7zstub/src/DOC/7zFormat.txt b/other-licenses/7zstub/src/DOC/7zFormat.txt new file mode 100644 index 0000000000..6b8678fbb4 --- /dev/null +++ b/other-licenses/7zstub/src/DOC/7zFormat.txt @@ -0,0 +1,469 @@ +7z Format description (4.59) +---------------------------- + +This file contains description of 7z archive format. +7z archive can contain files compressed with any method. +See "Methods.txt" for description for defined compressing methods. + + +Format structure Overview +------------------------- + +Some fields can be optional. + +Archive structure +~~~~~~~~~~~~~~~~~ +SignatureHeader +[PackedStreams] +[PackedStreamsForHeaders] +[ + Header + or + { + Packed Header + HeaderInfo + } +] + + + +Header structure +~~~~~~~~~~~~~~~~ +{ + ArchiveProperties + AdditionalStreams + { + PackInfo + { + PackPos + NumPackStreams + Sizes[NumPackStreams] + CRCs[NumPackStreams] + } + CodersInfo + { + NumFolders + Folders[NumFolders] + { + NumCoders + CodersInfo[NumCoders] + { + ID + NumInStreams; + NumOutStreams; + PropertiesSize + Properties[PropertiesSize] + } + NumBindPairs + BindPairsInfo[NumBindPairs] + { + InIndex; + OutIndex; + } + PackedIndices + } + UnPackSize[Folders][Folders.NumOutstreams] + CRCs[NumFolders] + } + SubStreamsInfo + { + NumUnPackStreamsInFolders[NumFolders]; + UnPackSizes[] + CRCs[] + } + } + MainStreamsInfo + { + (Same as in AdditionalStreams) + } + FilesInfo + { + NumFiles + Properties[] + { + ID + Size + Data + } + } +} + +HeaderInfo structure +~~~~~~~~~~~~~~~~~~~~ +{ + (Same as in AdditionalStreams) +} + + + +Notes about Notation and encoding +--------------------------------- + +7z uses little endian encoding. + +7z archive format has optional headers that are marked as +[] +Header +[] + +REAL_UINT64 means real UINT64. + +UINT64 means real UINT64 encoded with the following scheme: + + Size of encoding sequence depends from first byte: + First_Byte Extra_Bytes Value + (binary) + 0xxxxxxx : ( xxxxxxx ) + 10xxxxxx BYTE y[1] : ( xxxxxx << (8 * 1)) + y + 110xxxxx BYTE y[2] : ( xxxxx << (8 * 2)) + y + ... + 1111110x BYTE y[6] : ( x << (8 * 6)) + y + 11111110 BYTE y[7] : y + 11111111 BYTE y[8] : y + + + +Property IDs +------------ + +0x00 = kEnd + +0x01 = kHeader + +0x02 = kArchiveProperties + +0x03 = kAdditionalStreamsInfo +0x04 = kMainStreamsInfo +0x05 = kFilesInfo + +0x06 = kPackInfo +0x07 = kUnPackInfo +0x08 = kSubStreamsInfo + +0x09 = kSize +0x0A = kCRC + +0x0B = kFolder + +0x0C = kCodersUnPackSize +0x0D = kNumUnPackStream + +0x0E = kEmptyStream +0x0F = kEmptyFile +0x10 = kAnti + +0x11 = kName +0x12 = kCTime +0x13 = kATime +0x14 = kMTime +0x15 = kWinAttributes +0x16 = kComment + +0x17 = kEncodedHeader + +0x18 = kStartPos +0x19 = kDummy + + +7z format headers +----------------- + +SignatureHeader +~~~~~~~~~~~~~~~ + BYTE kSignature[6] = {'7', 'z', 0xBC, 0xAF, 0x27, 0x1C}; + + ArchiveVersion + { + BYTE Major; // now = 0 + BYTE Minor; // now = 2 + }; + + UINT32 StartHeaderCRC; + + StartHeader + { + REAL_UINT64 NextHeaderOffset + REAL_UINT64 NextHeaderSize + UINT32 NextHeaderCRC + } + + +........................... + + +ArchiveProperties +~~~~~~~~~~~~~~~~~ +BYTE NID::kArchiveProperties (0x02) +for (;;) +{ + BYTE PropertyType; + if (aType == 0) + break; + UINT64 PropertySize; + BYTE PropertyData[PropertySize]; +} + + +Digests (NumStreams) +~~~~~~~~~~~~~~~~~~~~~ + BYTE AllAreDefined + if (AllAreDefined == 0) + { + for(NumStreams) + BIT Defined + } + UINT32 CRCs[NumDefined] + + +PackInfo +~~~~~~~~~~~~ + BYTE NID::kPackInfo (0x06) + UINT64 PackPos + UINT64 NumPackStreams + + [] + BYTE NID::kSize (0x09) + UINT64 PackSizes[NumPackStreams] + [] + + [] + BYTE NID::kCRC (0x0A) + PackStreamDigests[NumPackStreams] + [] + + BYTE NID::kEnd + + +Folder +~~~~~~ + UINT64 NumCoders; + for (NumCoders) + { + BYTE + { + 0:3 CodecIdSize + 4: Is Complex Coder + 5: There Are Attributes + 6: Reserved + 7: There are more alternative methods. (Not used anymore, must be 0). + } + BYTE CodecId[CodecIdSize] + if (Is Complex Coder) + { + UINT64 NumInStreams; + UINT64 NumOutStreams; + } + if (There Are Attributes) + { + UINT64 PropertiesSize + BYTE Properties[PropertiesSize] + } + } + + NumBindPairs = NumOutStreamsTotal - 1; + + for (NumBindPairs) + { + UINT64 InIndex; + UINT64 OutIndex; + } + + NumPackedStreams = NumInStreamsTotal - NumBindPairs; + if (NumPackedStreams > 1) + for(NumPackedStreams) + { + UINT64 Index; + }; + + + + +Coders Info +~~~~~~~~~~~ + + BYTE NID::kUnPackInfo (0x07) + + + BYTE NID::kFolder (0x0B) + UINT64 NumFolders + BYTE External + switch(External) + { + case 0: + Folders[NumFolders] + case 1: + UINT64 DataStreamIndex + } + + + BYTE ID::kCodersUnPackSize (0x0C) + for(Folders) + for(Folder.NumOutStreams) + UINT64 UnPackSize; + + + [] + BYTE NID::kCRC (0x0A) + UnPackDigests[NumFolders] + [] + + + + BYTE NID::kEnd + + + +SubStreams Info +~~~~~~~~~~~~~~ + BYTE NID::kSubStreamsInfo; (0x08) + + [] + BYTE NID::kNumUnPackStream; (0x0D) + UINT64 NumUnPackStreamsInFolders[NumFolders]; + [] + + + [] + BYTE NID::kSize (0x09) + UINT64 UnPackSizes[] + [] + + + [] + BYTE NID::kCRC (0x0A) + Digests[Number of streams with unknown CRC] + [] + + + BYTE NID::kEnd + + +Streams Info +~~~~~~~~~~~~ + + [] + PackInfo + [] + + + [] + CodersInfo + [] + + + [] + SubStreamsInfo + [] + + BYTE NID::kEnd + + +FilesInfo +~~~~~~~~~ + BYTE NID::kFilesInfo; (0x05) + UINT64 NumFiles + + for (;;) + { + BYTE PropertyType; + if (aType == 0) + break; + + UINT64 Size; + + switch(PropertyType) + { + kEmptyStream: (0x0E) + for(NumFiles) + BIT IsEmptyStream + + kEmptyFile: (0x0F) + for(EmptyStreams) + BIT IsEmptyFile + + kAnti: (0x10) + for(EmptyStreams) + BIT IsAntiFile + + case kCTime: (0x12) + case kATime: (0x13) + case kMTime: (0x14) + BYTE AllAreDefined + if (AllAreDefined == 0) + { + for(NumFiles) + BIT TimeDefined + } + BYTE External; + if(External != 0) + UINT64 DataIndex + [] + for(Definded Items) + UINT64 Time + [] + + kNames: (0x11) + BYTE External; + if(External != 0) + UINT64 DataIndex + [] + for(Files) + { + wchar_t Names[NameSize]; + wchar_t 0; + } + [] + + kAttributes: (0x15) + BYTE AllAreDefined + if (AllAreDefined == 0) + { + for(NumFiles) + BIT AttributesAreDefined + } + BYTE External; + if(External != 0) + UINT64 DataIndex + [] + for(Definded Attributes) + UINT32 Attributes + [] + } + } + + +Header +~~~~~~ + BYTE NID::kHeader (0x01) + + [] + ArchiveProperties + [] + + [] + BYTE NID::kAdditionalStreamsInfo; (0x03) + StreamsInfo + [] + + [] + BYTE NID::kMainStreamsInfo; (0x04) + StreamsInfo + [] + + [] + FilesInfo + [] + + BYTE NID::kEnd + + +HeaderInfo +~~~~~~~~~~ + [] + BYTE NID::kEncodedHeader; (0x17) + StreamsInfo for Encoded Header + [] + + +--- +End of document diff --git a/other-licenses/7zstub/src/DOC/Methods.txt b/other-licenses/7zstub/src/DOC/Methods.txt new file mode 100644 index 0000000000..11adcb0ba3 --- /dev/null +++ b/other-licenses/7zstub/src/DOC/Methods.txt @@ -0,0 +1,167 @@ +7-Zip method IDs for 7z and xz archives +--------------------------------------- + +Version: 17.01 +Date: 2017-05-27 + +Each compression or crypto method in 7z is associated with unique binary value (ID). +The length of ID in bytes is arbitrary but it can not exceed 63 bits (8 bytes). + +xz and 7z formats use same ID map. + +If you want to add some new ID, you have two ways: + 1) Write request for allocating IDs to 7-Zip developers. + 2) Generate 8-bytes ID: + + 3F ZZ ZZ ZZ ZZ ZZ MM MM + + 3F - Prefix for random IDs (1 byte) + ZZ ZZ ZZ ZZ ZZ - Developer ID (5 bytes). Use real random bytes. + + MM MM - Method ID (2 bytes) + + You can notify 7-Zip developers about your Developer ID / Method ID. + + Note: Use new ID, if old codec can not decode data encoded with new version. + + +List of defined IDs +------------------- + +00 - Copy + +03 - Delta +04 - BCJ (x86) +05 - PPC (big-endian) +06 - IA64 +07 - ARM (little-endian) +08 - ARMT (little-endian) +09 - SPARC + +21 - LZMA2 + +02.. - Common + 03 [Swap] + - 2 Swap2 + - 4 Swap4 + +03.. - 7z + 01 - + 01 - LZMA + + 03 - [Branch Codecs] + 01 - [x86 Codecs] + 03 - BCJ + 1B - BCJ2 (4 packed streams) + 02 - + 05 - PPC (big-endian) + 03 - + 01 - Alpha + 04 - + 01 - IA64 + 05 - + 01 - ARM (little-endian) + 06 - + 05 - M68 (big-endian) + 07 - + 01 - ARMT (little-endian) + 08 - + 05 - SPARC + + 04 - + 01 - PPMD + + 7F - + 01 - experimental method. + + +04.. - Misc codecs + + 00 - Reserved + + 01 - [Zip] + 00 - Copy (not used. Use {00} instead) + 01 - Shrink + 06 - Implode + 08 - Deflate + 09 - Deflate64 + 0A - Imploding + 0C - BZip2 (not used. Use {040202} instead) + 0E - LZMA (LZMA-zip) + 5F - xz + 60 - Jpeg + 61 - WavPack + 62 - PPMd (PPMd-zip) + 63 - wzAES + + 02 - + 02 - BZip2 + + 03 - [Rar] + 01 - Rar1 + 02 - Rar2 + 03 - Rar3 + 05 - Rar5 + + 04 - [Arj] + 01 - Arj(1,2,3) + 02 - Arj4 + + 05 - [Z] + + 06 - [Lzh] + + 07 - Reserved for 7z + + 08 - [Cab] + + 09 - [NSIS] + 01 - DeflateNSIS + 02 - BZip2NSIS + + F7 - External codecs (that are not included to 7-Zip) + + 0x xx - reserved + + 10 xx - reserved (LZHAM) + 01 - LZHAM + + 11 xx - reserved (Tino Reichardt) + 01 - ZSTD + 02 - BROTLI + 04 - LZ4 + 05 - LZ5 + 06 - LIZARD + + +06.. - Crypto + + F0 - Ciphers without hashing algo + + 01 - [AES] + 0x - AES-128 + 4x - AES-192 + 8x - AES-256 + Cx - AES + + x0 - ECB + x1 - CBC + x2 - CFB + x3 - OFB + x4 - CTR + + F1 - Combine Ciphers + + 01 - [Zip] + 01 - ZipCrypto (Main Zip crypto algo) + + 03 - [RAR] + 02 - + 03 - Rar29AES (AES-128 + modified SHA-1) + + 07 - [7z] + 01 - 7zAES (AES-256 + SHA-256) + + +--- +End of document diff --git a/other-licenses/7zstub/src/DOC/installer.txt b/other-licenses/7zstub/src/DOC/installer.txt new file mode 100644 index 0000000000..70ad7dc6a6 --- /dev/null +++ b/other-licenses/7zstub/src/DOC/installer.txt @@ -0,0 +1,166 @@ +7-Zip for installers 9.38 +------------------------- + +7-Zip is a file archiver for Windows NT/2000/2003/2008/XP/Vista/7/8/10. + +7-Zip for installers is part of LZMA SDK. +LZMA SDK is written and placed in the public domain by Igor Pavlov. + +It's allowed to join 7-Zip SFX module with another software. +It's allowed to change resources of 7-Zip's SFX modules. + + +HOW to use +----------- + +7zr.exe is reduced version of 7za.exe of 7-Zip. +7zr.exe supports only format with these codecs: LZMA, LZMA2, BCJ, BCJ2, ARM, Copy. + +Example of compressing command for installation packages: + +7zr a archive.7z files + +7zSD.sfx is SFX module for installers. 7zSD.sfx uses msvcrt.dll. + +SFX modules for installers allow to create installation program. +Such module extracts archive to temp folder and then runs specified program and removes +temp files after program finishing. Self-extract archive for installers must be created +as joining 3 files: SFX_Module, Installer_Config, 7z_Archive. +Installer_Config is optional file. You can use the following command to create installer +self-extract archive: + +copy /b 7zSD.sfx + config.txt + archive.7z archive.exe + +The smallest installation package size can be achieved, if installation files was +uncompressed before including to 7z archive. + +-y switch for installer module (at runtime) specifies quiet mode for extracting. + +Installer Config file format +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Config file contains commands for Installer. File begins from string +;!@Install@!UTF-8! and ends with ;!@InstallEnd@!. File must be written +in UTF-8 encoding. File contains string pairs: + +ID_String="Value" + +ID_String Description + +Title Title for messages +BeginPrompt Begin Prompt message +Progress Value can be "yes" or "no". Default value is "yes". +RunProgram Command for executing. Default value is "setup.exe". + Substring %%T will be replaced with path to temporary + folder, where files were extracted +Directory Directory prefix for "RunProgram". Default value is ".\\" +ExecuteFile Name of file for executing +ExecuteParameters Parameters for "ExecuteFile" + + +You can omit any string pair. + +There are two ways to run program: RunProgram and ExecuteFile. +Use RunProgram, if you want to run some program from .7z archive. +Use ExecuteFile, if you want to open some document from .7z archive or +if you want to execute some command from Windows. + +If you use RunProgram and if you specify empty directory prefix: Directory="", +the system searches for the executable file in the following sequence: + +1. The directory from which the application (installer) loaded. +2. The temporary folder, where files were extracted. +3. The Windows system directory. + + +Config file Examples +~~~~~~~~~~~~~~~~~~~~ + +;!@Install@!UTF-8! +Title="7-Zip 4.00" +BeginPrompt="Do you want to install the 7-Zip 4.00?" +RunProgram="setup.exe" +;!@InstallEnd@! + + + +;!@Install@!UTF-8! +Title="7-Zip 4.00" +BeginPrompt="Do you want to install the 7-Zip 4.00?" +ExecuteFile="7zip.msi" +;!@InstallEnd@! + + + +;!@Install@!UTF-8! +Title="7-Zip 4.01 Update" +BeginPrompt="Do you want to install the 7-Zip 4.01 Update?" +ExecuteFile="msiexec.exe" +ExecuteParameters="/i 7zip.msi REINSTALL=ALL REINSTALLMODE=vomus" +;!@InstallEnd@! + + + +Small SFX modules for installers +-------------------------------- + +7zS2.sfx - small SFX module (GUI version) +7zS2con.sfx - small SFX module (Console version) + +Small SFX modules support this codecs: LZMA, LZMA2, BCJ, BCJ2, ARM, COPY + +Small SFX module is similar to common SFX module for installers. +The difference (what's new in small version): + - Smaller size (30 KB vs 100 KB) + - C source code instead of Ñ++ + - No installer Configuration file + - No extracting progress window + - It decompresses solid 7z blocks (it can be whole 7z archive) to RAM. + So user that calls SFX installer must have free RAM of size of largest + solid 7z block (size of 7z archive at simplest case). + +How to use +---------- + +copy /b 7zS2.sfx + archive.7z sfx.exe + +When you run installer sfx module (sfx.exe) +1) It creates "7zNNNNNNNN" temp folder in system temp folder. +2) It extracts .7z archive to that folder +3) It executes one file from "7zNNNNNNNN" temp folder. +4) It removes "7zNNNNNNNN" temp folder + +You can send parameters to installer, and installer will transfer them to extracted .exe file. + +Small SFX uses 3 levels of priorities to select file to execute: + + 1) Files in root folder have higher priority than files in subfolders. + 2) File extension priorities (from high to low priority order): + bat, cmd, exe, inf, msi, cab (under Windows CE), html, htm + 3) File name priorities (from high to low priority order): + setup, install, run, start + +Windows CE (ARM) version of 7zS2.sfx is included to 7-Zip for Windows Mobile package. + + +Examples +-------- + +1) To create compressed console 7-Zip: + +7zr a c.7z 7z.exe 7z.dll -mx +copy /b 7zS2con.sfx + c.7z 7zCompr.exe +7zCompr.exe b -md22 + + +2) To create compressed GUI 7-Zip: + +7zr a g.7z 7zg.exe 7z.dll -mx +copy /b 7zS2.sfx + g.7z 7zgCompr.exe +7zgCompr.exe b -md22 + + +3) To open some file: + +7zr a h.7z readme.txt -mx +copy /b 7zS2.sfx + h.7z 7zTxt.exe +7zTxt.exe diff --git a/other-licenses/7zstub/src/DOC/lzma-history.txt b/other-licenses/7zstub/src/DOC/lzma-history.txt new file mode 100644 index 0000000000..c53e3bd702 --- /dev/null +++ b/other-licenses/7zstub/src/DOC/lzma-history.txt @@ -0,0 +1,424 @@ +HISTORY of the LZMA SDK +----------------------- + +18.05 2018-04-30 +------------------------- +- The speed for LZMA/LZMA2 compressing was increased + by 8% for fastest/fast compression levels and + by 3% for normal/maximum compression levels. +- Previous versions of 7-Zip could work incorrectly in "Large memory pages" mode in + Windows 10 because of some BUG with "Large Pages" in Windows 10. + Now 7-Zip doesn't use "Large Pages" on Windows 10 up to revision 1709 (16299). +- The BUG was fixed in Lzma2Enc.c + Lzma2Enc_Encode2() function worked incorretly, + if (inStream == NULL) and the number of block threads is more than 1. + + +18.03 beta 2018-03-04 +------------------------- +- Asm\x86\LzmaDecOpt.asm: new optimized LZMA decoder written in asm + for x64 with about 30% higher speed than main version of LZMA decoder written in C. +- The speed for single-thread LZMA/LZMA2 decoder written in C was increased by 3%. +- 7-Zip now can use multi-threading for 7z/LZMA2 decoding, + if there are multiple independent data chunks in LZMA2 stream. +- 7-Zip now can use multi-threading for xz decoding, + if there are multiple blocks in xz stream. + + +18.01 2019-01-28 +------------------------- +- The BUG in 17.01 - 18.00 beta was fixed: + XzDec.c : random block unpacking and XzUnpacker_IsBlockFinished() + didn't work correctly for xz archives without checksum (CRC). + + +18.00 beta 2019-01-10 +------------------------- +- The BUG in xz encoder was fixed: + There was memory leak of 16 KB for each file compressed with + xz compression method, if additional filter was used. + + +17.01 beta 2017-08-28 +------------------------- +- Minor speed optimization for LZMA2 (xz and 7z) multi-threading compression. + 7-Zip now uses additional memory buffers for multi-block LZMA2 compression. + CPU utilization was slightly improved. +- 7-zip now creates multi-block xz archives by default. Block size can be + specified with -ms[Size]{m|g} switch. +- xz decoder now can unpack random block from multi-block xz archives. +- 7-Zip command line: @listfile now doesn't work after -- switch. + Use -i@listfile before -- switch instead. +- The BUGs were fixed: + 7-Zip 17.00 beta crashed for commands that write anti-item to 7z archive. + + +17.00 beta 2017-04-29 +------------------------- +- NewHandler.h / NewHandler.cpp: + now it redefines operator new() only for old MSVC compilers (_MSC_VER < 1900). +- C/7zTypes.h : the names of variables in interface structures were changed (vt). +- Some bugs were fixed. 7-Zip could crash in some cases. +- Some internal changes in code. + + +16.04 2016-10-04 +------------------------- +- The bug was fixed in DllSecur.c. + + +16.03 2016-09-28 +------------------------- +- SFX modules now use some protection against DLL preloading attack. +- Some bugs in 7z code were fixed. + + +16.02 2016-05-21 +------------------------- +- The BUG in 16.00 - 16.01 was fixed: + Split Handler (SplitHandler.cpp) returned incorrect + total size value (kpidSize) for split archives. + + +16.01 2016-05-19 +------------------------- +- Some internal changes to reduce the number of compiler warnings. + + +16.00 2016-05-10 +------------------------- +- Some bugs were fixed. + + +15.12 2015-11-19 +------------------------- +- The BUG in C version of 7z decoder was fixed: + 7zDec.c : SzDecodeLzma2() + 7z decoder could mistakenly report about decoding error for some 7z archives + that use LZMA2 compression method. + The probability to get that mistaken decoding error report was about + one error per 16384 solid blocks for solid blocks larger than 16 KB (compressed size). +- The BUG (in 9.26-15.11) in C version of 7z decoder was fixed: + 7zArcIn.c : SzReadHeader2() + 7z decoder worked incorrectly for 7z archives that contain + empty solid blocks, that can be placed to 7z archive, if some file is + unavailable for reading during archive creation. + + +15.09 beta 2015-10-16 +------------------------- +- The BUG in LZMA / LZMA2 encoding code was fixed. + The BUG in LzFind.c::MatchFinder_ReadBlock() function. + If input data size is larger than (4 GiB - dictionary_size), + the following code worked incorrectly: + - LZMA : LzmaEnc_MemEncode(), LzmaEncode() : LZMA encoding functions + for compressing from memory to memory. + That BUG is not related to LZMA encoder version that works via streams. + - LZMA2 : multi-threaded version of LZMA2 encoder worked incorrectly, if + default value of chunk size (CLzma2EncProps::blockSize) is changed + to value larger than (4 GiB - dictionary_size). + + +9.38 beta 2015-01-03 +------------------------- +- The BUG in 9.31-9.37 was fixed: + IArchiveGetRawProps interface was disabled for 7z archives. +- The BUG in 9.26-9.36 was fixed: + Some code in CPP\7zip\Archive\7z\ worked correctly only under Windows. + + +9.36 beta 2014-12-26 +------------------------- +- The BUG in command line version was fixed: + 7-Zip created temporary archive in current folder during update archive + operation, if -w{Path} switch was not specified. + The fixed 7-Zip creates temporary archive in folder that contains updated archive. +- The BUG in 9.33-9.35 was fixed: + 7-Zip silently ignored file reading errors during 7z or gz archive creation, + and the created archive contained only part of file that was read before error. + The fixed 7-Zip stops archive creation and it reports about error. + + +9.35 beta 2014-12-07 +------------------------- +- 7zr.exe now support AES encryption. +- SFX mudules were added to LZMA SDK +- Some bugs were fixed. + + +9.21 beta 2011-04-11 +------------------------- +- New class FString for file names at file systems. +- Speed optimization in CRC code for big-endian CPUs. +- The BUG in Lzma2Dec.c was fixed: + Lzma2Decode function didn't work. + + +9.18 beta 2010-11-02 +------------------------- +- New small SFX module for installers (SfxSetup). + + +9.12 beta 2010-03-24 +------------------------- +- The BUG in LZMA SDK 9.* was fixed: LZMA2 codec didn't work, + if more than 10 threads were used (or more than 20 threads in some modes). + + +9.11 beta 2010-03-15 +------------------------- +- PPMd compression method support + + +9.09 2009-12-12 +------------------------- +- The bug was fixed: + Utf16_To_Utf8 funstions in UTFConvert.cpp and 7zMain.c + incorrectly converted surrogate characters (the code >= 0x10000) to UTF-8. +- Some bugs were fixed + + +9.06 2009-08-17 +------------------------- +- Some changes in ANSI-C 7z Decoder interfaces. + + +9.04 2009-05-30 +------------------------- +- LZMA2 compression method support +- xz format support + + +4.65 2009-02-03 +------------------------- +- Some minor fixes + + +4.63 2008-12-31 +------------------------- +- Some minor fixes + + +4.61 beta 2008-11-23 +------------------------- +- The bug in ANSI-C LZMA Decoder was fixed: + If encoded stream was corrupted, decoder could access memory + outside of allocated range. +- Some changes in ANSI-C 7z Decoder interfaces. +- LZMA SDK is placed in the public domain. + + +4.60 beta 2008-08-19 +------------------------- +- Some minor fixes. + + +4.59 beta 2008-08-13 +------------------------- +- The bug was fixed: + LZMA Encoder in fast compression mode could access memory outside of + allocated range in some rare cases. + + +4.58 beta 2008-05-05 +------------------------- +- ANSI-C LZMA Decoder was rewritten for speed optimizations. +- ANSI-C LZMA Encoder was included to LZMA SDK. +- C++ LZMA code now is just wrapper over ANSI-C code. + + +4.57 2007-12-12 +------------------------- +- Speed optimizations in Ñ++ LZMA Decoder. +- Small changes for more compatibility with some C/C++ compilers. + + +4.49 beta 2007-07-05 +------------------------- +- .7z ANSI-C Decoder: + - now it supports BCJ and BCJ2 filters + - now it supports files larger than 4 GB. + - now it supports "Last Write Time" field for files. +- C++ code for .7z archives compressing/decompressing from 7-zip + was included to LZMA SDK. + + +4.43 2006-06-04 +------------------------- +- Small changes for more compatibility with some C/C++ compilers. + + +4.42 2006-05-15 +------------------------- +- Small changes in .h files in ANSI-C version. + + +4.39 beta 2006-04-14 +------------------------- +- The bug in versions 4.33b:4.38b was fixed: + C++ version of LZMA encoder could not correctly compress + files larger than 2 GB with HC4 match finder (-mfhc4). + + +4.37 beta 2005-04-06 +------------------------- +- Fixes in C++ code: code could no be compiled if _NO_EXCEPTIONS was defined. + + +4.35 beta 2005-03-02 +------------------------- +- The bug was fixed in C++ version of LZMA Decoder: + If encoded stream was corrupted, decoder could access memory + outside of allocated range. + + +4.34 beta 2006-02-27 +------------------------- +- Compressing speed and memory requirements for compressing were increased +- LZMA now can use only these match finders: HC4, BT2, BT3, BT4 + + +4.32 2005-12-09 +------------------------- +- Java version of LZMA SDK was included + + +4.30 2005-11-20 +------------------------- +- Compression ratio was improved in -a2 mode +- Speed optimizations for compressing in -a2 mode +- -fb switch now supports values up to 273 +- The bug in 7z_C (7zIn.c) was fixed: + It used Alloc/Free functions from different memory pools. + So if program used two memory pools, it worked incorrectly. +- 7z_C: .7z format supporting was improved +- LZMA# SDK (C#.NET version) was included + + +4.27 (Updated) 2005-09-21 +------------------------- +- Some GUIDs/interfaces in C++ were changed. + IStream.h: + ISequentialInStream::Read now works as old ReadPart + ISequentialOutStream::Write now works as old WritePart + + +4.27 2005-08-07 +------------------------- +- The bug in LzmaDecodeSize.c was fixed: + if _LZMA_IN_CB and _LZMA_OUT_READ were defined, + decompressing worked incorrectly. + + +4.26 2005-08-05 +------------------------- +- Fixes in 7z_C code and LzmaTest.c: + previous versions could work incorrectly, + if malloc(0) returns 0 + + +4.23 2005-06-29 +------------------------- +- Small fixes in C++ code + + +4.22 2005-06-10 +------------------------- +- Small fixes + + +4.21 2005-06-08 +------------------------- +- Interfaces for ANSI-C LZMA Decoder (LzmaDecode.c) were changed +- New additional version of ANSI-C LZMA Decoder with zlib-like interface: + - LzmaStateDecode.h + - LzmaStateDecode.c + - LzmaStateTest.c +- ANSI-C LZMA Decoder now can decompress files larger than 4 GB + + +4.17 2005-04-18 +------------------------- +- New example for RAM->RAM compressing/decompressing: + LZMA + BCJ (filter for x86 code): + - LzmaRam.h + - LzmaRam.cpp + - LzmaRamDecode.h + - LzmaRamDecode.c + - -f86 switch for lzma.exe + + +4.16 2005-03-29 +------------------------- +- The bug was fixed in LzmaDecode.c (ANSI-C LZMA Decoder): + If _LZMA_OUT_READ was defined, and if encoded stream was corrupted, + decoder could access memory outside of allocated range. +- Speed optimization of ANSI-C LZMA Decoder (now it's about 20% faster). + Old version of LZMA Decoder now is in file LzmaDecodeSize.c. + LzmaDecodeSize.c can provide slightly smaller code than LzmaDecode.c +- Small speed optimization in LZMA C++ code +- filter for SPARC's code was added +- Simplified version of .7z ANSI-C Decoder was included + + +4.06 2004-09-05 +------------------------- +- The bug in v4.05 was fixed: + LZMA-Encoder didn't release output stream in some cases. + + +4.05 2004-08-25 +------------------------- +- Source code of filters for x86, IA-64, ARM, ARM-Thumb + and PowerPC code was included to SDK +- Some internal minor changes + + +4.04 2004-07-28 +------------------------- +- More compatibility with some C++ compilers + + +4.03 2004-06-18 +------------------------- +- "Benchmark" command was added. It measures compressing + and decompressing speed and shows rating values. + Also it checks hardware errors. + + +4.02 2004-06-10 +------------------------- +- C++ LZMA Encoder/Decoder code now is more portable + and it can be compiled by GCC on Linux. + + +4.01 2004-02-15 +------------------------- +- Some detection of data corruption was enabled. + LzmaDecode.c / RangeDecoderReadByte + ..... + { + rd->ExtraBytes = 1; + return 0xFF; + } + + +4.00 2004-02-13 +------------------------- +- Original version of LZMA SDK + + + +HISTORY of the LZMA +------------------- + 2001-2008: Improvements to LZMA compressing/decompressing code, + keeping compatibility with original LZMA format + 1996-2001: Development of LZMA compression format + + Some milestones: + + 2001-08-30: LZMA compression was added to 7-Zip + 1999-01-02: First version of 7-Zip was released + + +End of document diff --git a/other-licenses/7zstub/src/DOC/lzma-sdk.txt b/other-licenses/7zstub/src/DOC/lzma-sdk.txt new file mode 100644 index 0000000000..01521e9398 --- /dev/null +++ b/other-licenses/7zstub/src/DOC/lzma-sdk.txt @@ -0,0 +1,357 @@ +LZMA SDK 18.05 +-------------- + +LZMA SDK provides the documentation, samples, header files, +libraries, and tools you need to develop applications that +use 7z / LZMA / LZMA2 / XZ compression. + +LZMA is an improved version of famous LZ77 compression algorithm. +It was improved in way of maximum increasing of compression ratio, +keeping high decompression speed and low memory requirements for +decompressing. + +LZMA2 is a LZMA based compression method. LZMA2 provides better +multithreading support for compression than LZMA and some other improvements. + +7z is a file format for data compression and file archiving. +7z is a main file format for 7-Zip compression program (www.7-zip.org). +7z format supports different compression methods: LZMA, LZMA2 and others. +7z also supports AES-256 based encryption. + +XZ is a file format for data compression that uses LZMA2 compression. +XZ format provides additional features: SHA/CRC check, filters for +improved compression ratio, splitting to blocks and streams, + + + +LICENSE +------- + +LZMA SDK is written and placed in the public domain by Igor Pavlov. + +Some code in LZMA SDK is based on public domain code from another developers: + 1) PPMd var.H (2001): Dmitry Shkarin + 2) SHA-256: Wei Dai (Crypto++ library) + +Anyone is free to copy, modify, publish, use, compile, sell, or distribute the +original LZMA SDK code, either in source code form or as a compiled binary, for +any purpose, commercial or non-commercial, and by any means. + +LZMA SDK code is compatible with open source licenses, for example, you can +include it to GNU GPL or GNU LGPL code. + + +LZMA SDK Contents +----------------- + + Source code: + + - C / C++ / C# / Java - LZMA compression and decompression + - C / C++ - LZMA2 compression and decompression + - C / C++ - XZ compression and decompression + - C - 7z decompression + - C++ - 7z compression and decompression + - C - small SFXs for installers (7z decompression) + - C++ - SFXs and SFXs for installers (7z decompression) + + Precomiled binaries: + + - console programs for lzma / 7z / xz compression and decompression + - SFX modules for installers. + + +UNIX/Linux version +------------------ +To compile C++ version of file->file LZMA encoding, go to directory +CPP/7zip/Bundles/LzmaCon +and call make to recompile it: + make -f makefile.gcc clean all + +In some UNIX/Linux versions you must compile LZMA with static libraries. +To compile with static libraries, you can use +LIB = -lm -static + +Also you can use p7zip (port of 7-Zip for POSIX systems like Unix or Linux): + + http://p7zip.sourceforge.net/ + + +Files +----- + +DOC/7zC.txt - 7z ANSI-C Decoder description +DOC/7zFormat.txt - 7z Format description +DOC/installer.txt - information about 7-Zip for installers +DOC/lzma.txt - LZMA compression description +DOC/lzma-sdk.txt - LZMA SDK description (this file) +DOC/lzma-history.txt - history of LZMA SDK +DOC/lzma-specification.txt - Specification of LZMA +DOC/Methods.txt - Compression method IDs for .7z + +bin/installer/ - example script to create installer that uses SFX module, + +bin/7zdec.exe - simplified 7z archive decoder +bin/7zr.exe - 7-Zip console program (reduced version) +bin/x64/7zr.exe - 7-Zip console program (reduced version) (x64 version) +bin/lzma.exe - file->file LZMA encoder/decoder for Windows +bin/7zS2.sfx - small SFX module for installers (GUI version) +bin/7zS2con.sfx - small SFX module for installers (Console version) +bin/7zSD.sfx - SFX module for installers. + + +7zDec.exe +--------- +7zDec.exe is simplified 7z archive decoder. +It supports only LZMA, LZMA2, and PPMd methods. +7zDec decodes whole solid block from 7z archive to RAM. +The RAM consumption can be high. + + + + +Source code structure +--------------------- + + +Asm/ - asm files (optimized code for CRC calculation and Intel-AES encryption) + +C/ - C files (compression / decompression and other) + Util/ + 7z - 7z decoder program (decoding 7z files) + Lzma - LZMA program (file->file LZMA encoder/decoder). + LzmaLib - LZMA library (.DLL for Windows) + SfxSetup - small SFX module for installers + +CPP/ -- CPP files + + Common - common files for C++ projects + Windows - common files for Windows related code + + 7zip - files related to 7-Zip + + Archive - files related to archiving + + Common - common files for archive handling + 7z - 7z C++ Encoder/Decoder + + Bundles - Modules that are bundles of other modules (files) + + Alone7z - 7zr.exe: Standalone 7-Zip console program (reduced version) + Format7zExtractR - 7zxr.dll: Reduced version of 7z DLL: extracting from 7z/LZMA/BCJ/BCJ2. + Format7zR - 7zr.dll: Reduced version of 7z DLL: extracting/compressing to 7z/LZMA/BCJ/BCJ2 + LzmaCon - lzma.exe: LZMA compression/decompression + LzmaSpec - example code for LZMA Specification + SFXCon - 7zCon.sfx: Console 7z SFX module + SFXSetup - 7zS.sfx: 7z SFX module for installers + SFXWin - 7z.sfx: GUI 7z SFX module + + Common - common files for 7-Zip + + Compress - files for compression/decompression + + Crypto - files for encryption / decompression + + UI - User Interface files + + Client7z - Test application for 7za.dll, 7zr.dll, 7zxr.dll + Common - Common UI files + Console - Code for console program (7z.exe) + Explorer - Some code from 7-Zip Shell extension + FileManager - Some GUI code from 7-Zip File Manager + GUI - Some GUI code from 7-Zip + + +CS/ - C# files + 7zip + Common - some common files for 7-Zip + Compress - files related to compression/decompression + LZ - files related to LZ (Lempel-Ziv) compression algorithm + LZMA - LZMA compression/decompression + LzmaAlone - file->file LZMA compression/decompression + RangeCoder - Range Coder (special code of compression/decompression) + +Java/ - Java files + SevenZip + Compression - files related to compression/decompression + LZ - files related to LZ (Lempel-Ziv) compression algorithm + LZMA - LZMA compression/decompression + RangeCoder - Range Coder (special code of compression/decompression) + + +Note: + Asm / C / C++ source code of LZMA SDK is part of 7-Zip's source code. + 7-Zip's source code can be downloaded from 7-Zip's SourceForge page: + + http://sourceforge.net/projects/sevenzip/ + + + +LZMA features +------------- + - Variable dictionary size (up to 1 GB) + - Estimated compressing speed: about 2 MB/s on 2 GHz CPU + - Estimated decompressing speed: + - 20-30 MB/s on modern 2 GHz cpu + - 1-2 MB/s on 200 MHz simple RISC cpu: (ARM, MIPS, PowerPC) + - Small memory requirements for decompressing (16 KB + DictionarySize) + - Small code size for decompressing: 5-8 KB + +LZMA decoder uses only integer operations and can be +implemented in any modern 32-bit CPU (or on 16-bit CPU with some conditions). + +Some critical operations that affect the speed of LZMA decompression: + 1) 32*16 bit integer multiply + 2) Mispredicted branches (penalty mostly depends from pipeline length) + 3) 32-bit shift and arithmetic operations + +The speed of LZMA decompressing mostly depends from CPU speed. +Memory speed has no big meaning. But if your CPU has small data cache, +overall weight of memory speed will slightly increase. + + +How To Use +---------- + +Using LZMA encoder/decoder executable +-------------------------------------- + +Usage: LZMA inputFile outputFile [...] + + e: encode file + + d: decode file + + b: Benchmark. There are two tests: compressing and decompressing + with LZMA method. Benchmark shows rating in MIPS (million + instructions per second). Rating value is calculated from + measured speed and it is normalized with Intel's Core 2 results. + Also Benchmark checks possible hardware errors (RAM + errors in most cases). Benchmark uses these settings: + (-a1, -d21, -fb32, -mfbt4). You can change only -d parameter. + Also you can change the number of iterations. Example for 30 iterations: + LZMA b 30 + Default number of iterations is 10. + + + + + -a{N}: set compression mode 0 = fast, 1 = normal + default: 1 (normal) + + d{N}: Sets Dictionary size - [0, 30], default: 23 (8MB) + The maximum value for dictionary size is 1 GB = 2^30 bytes. + Dictionary size is calculated as DictionarySize = 2^N bytes. + For decompressing file compressed by LZMA method with dictionary + size D = 2^N you need about D bytes of memory (RAM). + + -fb{N}: set number of fast bytes - [5, 273], default: 128 + Usually big number gives a little bit better compression ratio + and slower compression process. + + -lc{N}: set number of literal context bits - [0, 8], default: 3 + Sometimes lc=4 gives gain for big files. + + -lp{N}: set number of literal pos bits - [0, 4], default: 0 + lp switch is intended for periodical data when period is + equal 2^N. For example, for 32-bit (4 bytes) + periodical data you can use lp=2. Often it's better to set lc0, + if you change lp switch. + + -pb{N}: set number of pos bits - [0, 4], default: 2 + pb switch is intended for periodical data + when period is equal 2^N. + + -mf{MF_ID}: set Match Finder. Default: bt4. + Algorithms from hc* group doesn't provide good compression + ratio, but they often works pretty fast in combination with + fast mode (-a0). + + Memory requirements depend from dictionary size + (parameter "d" in table below). + + MF_ID Memory Description + + bt2 d * 9.5 + 4MB Binary Tree with 2 bytes hashing. + bt3 d * 11.5 + 4MB Binary Tree with 3 bytes hashing. + bt4 d * 11.5 + 4MB Binary Tree with 4 bytes hashing. + hc4 d * 7.5 + 4MB Hash Chain with 4 bytes hashing. + + -eos: write End Of Stream marker. By default LZMA doesn't write + eos marker, since LZMA decoder knows uncompressed size + stored in .lzma file header. + + -si: Read data from stdin (it will write End Of Stream marker). + -so: Write data to stdout + + +Examples: + +1) LZMA e file.bin file.lzma -d16 -lc0 + +compresses file.bin to file.lzma with 64 KB dictionary (2^16=64K) +and 0 literal context bits. -lc0 allows to reduce memory requirements +for decompression. + + +2) LZMA e file.bin file.lzma -lc0 -lp2 + +compresses file.bin to file.lzma with settings suitable +for 32-bit periodical data (for example, ARM or MIPS code). + +3) LZMA d file.lzma file.bin + +decompresses file.lzma to file.bin. + + +Compression ratio hints +----------------------- + +Recommendations +--------------- + +To increase the compression ratio for LZMA compressing it's desirable +to have aligned data (if it's possible) and also it's desirable to locate +data in such order, where code is grouped in one place and data is +grouped in other place (it's better than such mixing: code, data, code, +data, ...). + + +Filters +------- +You can increase the compression ratio for some data types, using +special filters before compressing. For example, it's possible to +increase the compression ratio on 5-10% for code for those CPU ISAs: +x86, IA-64, ARM, ARM-Thumb, PowerPC, SPARC. + +You can find C source code of such filters in C/Bra*.* files + +You can check the compression ratio gain of these filters with such +7-Zip commands (example for ARM code): +No filter: + 7z a a1.7z a.bin -m0=lzma + +With filter for little-endian ARM code: + 7z a a2.7z a.bin -m0=arm -m1=lzma + +It works in such manner: +Compressing = Filter_encoding + LZMA_encoding +Decompressing = LZMA_decoding + Filter_decoding + +Compressing and decompressing speed of such filters is very high, +so it will not increase decompressing time too much. +Moreover, it reduces decompression time for LZMA_decoding, +since compression ratio with filtering is higher. + +These filters convert CALL (calling procedure) instructions +from relative offsets to absolute addresses, so such data becomes more +compressible. + +For some ISAs (for example, for MIPS) it's impossible to get gain from such filter. + + + +--- + +http://www.7-zip.org +http://www.7-zip.org/sdk.html +http://www.7-zip.org/support.html diff --git a/other-licenses/7zstub/src/DOC/lzma-specification.txt b/other-licenses/7zstub/src/DOC/lzma-specification.txt new file mode 100644 index 0000000000..b6796df756 --- /dev/null +++ b/other-licenses/7zstub/src/DOC/lzma-specification.txt @@ -0,0 +1,1176 @@ +LZMA specification (DRAFT version) +---------------------------------- + +Author: Igor Pavlov +Date: 2015-06-14 + +This specification defines the format of LZMA compressed data and lzma file format. + +Notation +-------- + +We use the syntax of C++ programming language. +We use the following types in C++ code: + unsigned - unsigned integer, at least 16 bits in size + int - signed integer, at least 16 bits in size + UInt64 - 64-bit unsigned integer + UInt32 - 32-bit unsigned integer + UInt16 - 16-bit unsigned integer + Byte - 8-bit unsigned integer + bool - boolean type with two possible values: false, true + + +lzma file format +================ + +The lzma file contains the raw LZMA stream and the header with related properties. + +The files in that format use ".lzma" extension. + +The lzma file format layout: + +Offset Size Description + + 0 1 LZMA model properties (lc, lp, pb) in encoded form + 1 4 Dictionary size (32-bit unsigned integer, little-endian) + 5 8 Uncompressed size (64-bit unsigned integer, little-endian) + 13 Compressed data (LZMA stream) + +LZMA properties: + + name Range Description + + lc [0, 8] the number of "literal context" bits + lp [0, 4] the number of "literal pos" bits + pb [0, 4] the number of "pos" bits +dictSize [0, 2^32 - 1] the dictionary size + +The following code encodes LZMA properties: + +void EncodeProperties(Byte *properties) +{ + properties[0] = (Byte)((pb * 5 + lp) * 9 + lc); + Set_UInt32_LittleEndian(properties + 1, dictSize); +} + +If the value of dictionary size in properties is smaller than (1 << 12), +the LZMA decoder must set the dictionary size variable to (1 << 12). + +#define LZMA_DIC_MIN (1 << 12) + + unsigned lc, pb, lp; + UInt32 dictSize; + UInt32 dictSizeInProperties; + + void DecodeProperties(const Byte *properties) + { + unsigned d = properties[0]; + if (d >= (9 * 5 * 5)) + throw "Incorrect LZMA properties"; + lc = d % 9; + d /= 9; + pb = d / 5; + lp = d % 5; + dictSizeInProperties = 0; + for (int i = 0; i < 4; i++) + dictSizeInProperties |= (UInt32)properties[i + 1] << (8 * i); + dictSize = dictSizeInProperties; + if (dictSize < LZMA_DIC_MIN) + dictSize = LZMA_DIC_MIN; + } + +If "Uncompressed size" field contains ones in all 64 bits, it means that +uncompressed size is unknown and there is the "end marker" in stream, +that indicates the end of decoding point. +In opposite case, if the value from "Uncompressed size" field is not +equal to ((2^64) - 1), the LZMA stream decoding must be finished after +specified number of bytes (Uncompressed size) is decoded. And if there +is the "end marker", the LZMA decoder must read that marker also. + + +The new scheme to encode LZMA properties +---------------------------------------- + +If LZMA compression is used for some another format, it's recommended to +use a new improved scheme to encode LZMA properties. That new scheme was +used in xz format that uses the LZMA2 compression algorithm. +The LZMA2 is a new compression algorithm that is based on the LZMA algorithm. + +The dictionary size in LZMA2 is encoded with just one byte and LZMA2 supports +only reduced set of dictionary sizes: + (2 << 11), (3 << 11), + (2 << 12), (3 << 12), + ... + (2 << 30), (3 << 30), + (2 << 31) - 1 + +The dictionary size can be extracted from encoded value with the following code: + + dictSize = (p == 40) ? 0xFFFFFFFF : (((UInt32)2 | ((p) & 1)) << ((p) / 2 + 11)); + +Also there is additional limitation (lc + lp <= 4) in LZMA2 for values of +"lc" and "lp" properties: + + if (lc + lp > 4) + throw "Unsupported properties: (lc + lp) > 4"; + +There are some advantages for LZMA decoder with such (lc + lp) value +limitation. It reduces the maximum size of tables allocated by decoder. +And it reduces the complexity of initialization procedure, that can be +important to keep high speed of decoding of big number of small LZMA streams. + +It's recommended to use that limitation (lc + lp <= 4) for any new format +that uses LZMA compression. Note that the combinations of "lc" and "lp" +parameters, where (lc + lp > 4), can provide significant improvement in +compression ratio only in some rare cases. + +The LZMA properties can be encoded into two bytes in new scheme: + +Offset Size Description + + 0 1 The dictionary size encoded with LZMA2 scheme + 1 1 LZMA model properties (lc, lp, pb) in encoded form + + +The RAM usage +============= + +The RAM usage for LZMA decoder is determined by the following parts: + +1) The Sliding Window (from 4 KiB to 4 GiB). +2) The probability model counter arrays (arrays of 16-bit variables). +3) Some additional state variables (about 10 variables of 32-bit integers). + + +The RAM usage for Sliding Window +-------------------------------- + +There are two main scenarios of decoding: + +1) The decoding of full stream to one RAM buffer. + + If we decode full LZMA stream to one output buffer in RAM, the decoder + can use that output buffer as sliding window. So the decoder doesn't + need additional buffer allocated for sliding window. + +2) The decoding to some external storage. + + If we decode LZMA stream to external storage, the decoder must allocate + the buffer for sliding window. The size of that buffer must be equal + or larger than the value of dictionary size from properties of LZMA stream. + +In this specification we describe the code for decoding to some external +storage. The optimized version of code for decoding of full stream to one +output RAM buffer can require some minor changes in code. + + +The RAM usage for the probability model counters +------------------------------------------------ + +The size of the probability model counter arrays is calculated with the +following formula: + +size_of_prob_arrays = 1846 + 768 * (1 << (lp + lc)) + +Each probability model counter is 11-bit unsigned integer. +If we use 16-bit integer variables (2-byte integers) for these probability +model counters, the RAM usage required by probability model counter arrays +can be estimated with the following formula: + + RAM = 4 KiB + 1.5 KiB * (1 << (lp + lc)) + +For example, for default LZMA parameters (lp = 0 and lc = 3), the RAM usage is + + RAM_lc3_lp0 = 4 KiB + 1.5 KiB * 8 = 16 KiB + +The maximum RAM state usage is required for decoding the stream with lp = 4 +and lc = 8: + + RAM_lc8_lp4 = 4 KiB + 1.5 KiB * 4096 = 6148 KiB + +If the decoder uses LZMA2's limited property condition +(lc + lp <= 4), the RAM usage will be not larger than + + RAM_lc_lp_4 = 4 KiB + 1.5 KiB * 16 = 28 KiB + + +The RAM usage for encoder +------------------------- + +There are many variants for LZMA encoding code. +These variants have different values for memory consumption. +Note that memory consumption for LZMA Encoder can not be +smaller than memory consumption of LZMA Decoder for same stream. + +The RAM usage required by modern effective implementation of +LZMA Encoder can be estimated with the following formula: + + Encoder_RAM_Usage = 4 MiB + 11 * dictionarySize. + +But there are some modes of the encoder that require less memory. + + +LZMA Decoding +============= + +The LZMA compression algorithm uses LZ-based compression with Sliding Window +and Range Encoding as entropy coding method. + + +Sliding Window +-------------- + +LZMA uses Sliding Window compression similar to LZ77 algorithm. + +LZMA stream must be decoded to the sequence that consists +of MATCHES and LITERALS: + + - a LITERAL is a 8-bit character (one byte). + The decoder just puts that LITERAL to the uncompressed stream. + + - a MATCH is a pair of two numbers (DISTANCE-LENGTH pair). + The decoder takes one byte exactly "DISTANCE" characters behind + current position in the uncompressed stream and puts it to + uncompressed stream. The decoder must repeat it "LENGTH" times. + +The "DISTANCE" can not be larger than dictionary size. +And the "DISTANCE" can not be larger than the number of bytes in +the uncompressed stream that were decoded before that match. + +In this specification we use cyclic buffer to implement Sliding Window +for LZMA decoder: + +class COutWindow +{ + Byte *Buf; + UInt32 Pos; + UInt32 Size; + bool IsFull; + +public: + unsigned TotalPos; + COutStream OutStream; + + COutWindow(): Buf(NULL) {} + ~COutWindow() { delete []Buf; } + + void Create(UInt32 dictSize) + { + Buf = new Byte[dictSize]; + Pos = 0; + Size = dictSize; + IsFull = false; + TotalPos = 0; + } + + void PutByte(Byte b) + { + TotalPos++; + Buf[Pos++] = b; + if (Pos == Size) + { + Pos = 0; + IsFull = true; + } + OutStream.WriteByte(b); + } + + Byte GetByte(UInt32 dist) const + { + return Buf[dist <= Pos ? Pos - dist : Size - dist + Pos]; + } + + void CopyMatch(UInt32 dist, unsigned len) + { + for (; len > 0; len--) + PutByte(GetByte(dist)); + } + + bool CheckDistance(UInt32 dist) const + { + return dist <= Pos || IsFull; + } + + bool IsEmpty() const + { + return Pos == 0 && !IsFull; + } +}; + + +In another implementation it's possible to use one buffer that contains +Sliding Window and the whole data stream after uncompressing. + + +Range Decoder +------------- + +LZMA algorithm uses Range Encoding (1) as entropy coding method. + +LZMA stream contains just one very big number in big-endian encoding. +LZMA decoder uses the Range Decoder to extract a sequence of binary +symbols from that big number. + +The state of the Range Decoder: + +struct CRangeDecoder +{ + UInt32 Range; + UInt32 Code; + InputStream *InStream; + + bool Corrupted; +} + +The notes about UInt32 type for the "Range" and "Code" variables: + + It's possible to use 64-bit (unsigned or signed) integer type + for the "Range" and the "Code" variables instead of 32-bit unsigned, + but some additional code must be used to truncate the values to + low 32-bits after some operations. + + If the programming language does not support 32-bit unsigned integer type + (like in case of JAVA language), it's possible to use 32-bit signed integer, + but some code must be changed. For example, it's required to change the code + that uses comparison operations for UInt32 variables in this specification. + +The Range Decoder can be in some states that can be treated as +"Corruption" in LZMA stream. The Range Decoder uses the variable "Corrupted": + + (Corrupted == false), if the Range Decoder has not detected any corruption. + (Corrupted == true), if the Range Decoder has detected some corruption. + +The reference LZMA Decoder ignores the value of the "Corrupted" variable. +So it continues to decode the stream, even if the corruption can be detected +in the Range Decoder. To provide the full compatibility with output of the +reference LZMA Decoder, another LZMA Decoder implementations must also +ignore the value of the "Corrupted" variable. + +The LZMA Encoder is required to create only such LZMA streams, that will not +lead the Range Decoder to states, where the "Corrupted" variable is set to true. + +The Range Decoder reads first 5 bytes from input stream to initialize +the state: + +bool CRangeDecoder::Init() +{ + Corrupted = false; + Range = 0xFFFFFFFF; + Code = 0; + + Byte b = InStream->ReadByte(); + + for (int i = 0; i < 4; i++) + Code = (Code << 8) | InStream->ReadByte(); + + if (b != 0 || Code == Range) + Corrupted = true; + return b == 0; +} + +The LZMA Encoder always writes ZERO in initial byte of compressed stream. +That scheme allows to simplify the code of the Range Encoder in the +LZMA Encoder. If initial byte is not equal to ZERO, the LZMA Decoder must +stop decoding and report error. + +After the last bit of data was decoded by Range Decoder, the value of the +"Code" variable must be equal to 0. The LZMA Decoder must check it by +calling the IsFinishedOK() function: + + bool IsFinishedOK() const { return Code == 0; } + +If there is corruption in data stream, there is big probability that +the "Code" value will be not equal to 0 in the Finish() function. So that +check in the IsFinishedOK() function provides very good feature for +corruption detection. + +The value of the "Range" variable before each bit decoding can not be smaller +than ((UInt32)1 << 24). The Normalize() function keeps the "Range" value in +described range. + +#define kTopValue ((UInt32)1 << 24) + +void CRangeDecoder::Normalize() +{ + if (Range < kTopValue) + { + Range <<= 8; + Code = (Code << 8) | InStream->ReadByte(); + } +} + +Notes: if the size of the "Code" variable is larger than 32 bits, it's +required to keep only low 32 bits of the "Code" variable after the change +in Normalize() function. + +If the LZMA Stream is not corrupted, the value of the "Code" variable is +always smaller than value of the "Range" variable. +But the Range Decoder ignores some types of corruptions, so the value of +the "Code" variable can be equal or larger than value of the "Range" variable +for some "Corrupted" archives. + + +LZMA uses Range Encoding only with binary symbols of two types: + 1) binary symbols with fixed and equal probabilities (direct bits) + 2) binary symbols with predicted probabilities + +The DecodeDirectBits() function decodes the sequence of direct bits: + +UInt32 CRangeDecoder::DecodeDirectBits(unsigned numBits) +{ + UInt32 res = 0; + do + { + Range >>= 1; + Code -= Range; + UInt32 t = 0 - ((UInt32)Code >> 31); + Code += Range & t; + + if (Code == Range) + Corrupted = true; + + Normalize(); + res <<= 1; + res += t + 1; + } + while (--numBits); + return res; +} + + +The Bit Decoding with Probability Model +--------------------------------------- + +The task of Bit Probability Model is to estimate probabilities of binary +symbols. And then it provides the Range Decoder with that information. +The better prediction provides better compression ratio. +The Bit Probability Model uses statistical data of previous decoded +symbols. + +That estimated probability is presented as 11-bit unsigned integer value +that represents the probability of symbol "0". + +#define kNumBitModelTotalBits 11 + +Mathematical probabilities can be presented with the following formulas: + probability(symbol_0) = prob / 2048. + probability(symbol_1) = 1 - Probability(symbol_0) = + = 1 - prob / 2048 = + = (2048 - prob) / 2048 +where the "prob" variable contains 11-bit integer probability counter. + +It's recommended to use 16-bit unsigned integer type, to store these 11-bit +probability values: + +typedef UInt16 CProb; + +Each probability value must be initialized with value ((1 << 11) / 2), +that represents the state, where probabilities of symbols 0 and 1 +are equal to 0.5: + +#define PROB_INIT_VAL ((1 << kNumBitModelTotalBits) / 2) + +The INIT_PROBS macro is used to initialize the array of CProb variables: + +#define INIT_PROBS(p) \ + { for (unsigned i = 0; i < sizeof(p) / sizeof(p[0]); i++) p[i] = PROB_INIT_VAL; } + + +The DecodeBit() function decodes one bit. +The LZMA decoder provides the pointer to CProb variable that contains +information about estimated probability for symbol 0 and the Range Decoder +updates that CProb variable after decoding. The Range Decoder increases +estimated probability of the symbol that was decoded: + +#define kNumMoveBits 5 + +unsigned CRangeDecoder::DecodeBit(CProb *prob) +{ + unsigned v = *prob; + UInt32 bound = (Range >> kNumBitModelTotalBits) * v; + unsigned symbol; + if (Code < bound) + { + v += ((1 << kNumBitModelTotalBits) - v) >> kNumMoveBits; + Range = bound; + symbol = 0; + } + else + { + v -= v >> kNumMoveBits; + Code -= bound; + Range -= bound; + symbol = 1; + } + *prob = (CProb)v; + Normalize(); + return symbol; +} + + +The Binary Tree of bit model counters +------------------------------------- + +LZMA uses a tree of Bit model variables to decode symbol that needs +several bits for storing. There are two versions of such trees in LZMA: + 1) the tree that decodes bits from high bit to low bit (the normal scheme). + 2) the tree that decodes bits from low bit to high bit (the reverse scheme). + +Each binary tree structure supports different size of decoded symbol +(the size of binary sequence that contains value of symbol). +If that size of decoded symbol is "NumBits" bits, the tree structure +uses the array of (2 << NumBits) counters of CProb type. +But only ((2 << NumBits) - 1) items are used by encoder and decoder. +The first item (the item with index equal to 0) in array is unused. +That scheme with unused array's item allows to simplify the code. + +unsigned BitTreeReverseDecode(CProb *probs, unsigned numBits, CRangeDecoder *rc) +{ + unsigned m = 1; + unsigned symbol = 0; + for (unsigned i = 0; i < numBits; i++) + { + unsigned bit = rc->DecodeBit(&probs[m]); + m <<= 1; + m += bit; + symbol |= (bit << i); + } + return symbol; +} + +template +class CBitTreeDecoder +{ + CProb Probs[(unsigned)1 << NumBits]; + +public: + + void Init() + { + INIT_PROBS(Probs); + } + + unsigned Decode(CRangeDecoder *rc) + { + unsigned m = 1; + for (unsigned i = 0; i < NumBits; i++) + m = (m << 1) + rc->DecodeBit(&Probs[m]); + return m - ((unsigned)1 << NumBits); + } + + unsigned ReverseDecode(CRangeDecoder *rc) + { + return BitTreeReverseDecode(Probs, NumBits, rc); + } +}; + + +LZ part of LZMA +--------------- + +LZ part of LZMA describes details about the decoding of MATCHES and LITERALS. + + +The Literal Decoding +-------------------- + +The LZMA Decoder uses (1 << (lc + lp)) tables with CProb values, where +each table contains 0x300 CProb values: + + CProb *LitProbs; + + void CreateLiterals() + { + LitProbs = new CProb[(UInt32)0x300 << (lc + lp)]; + } + + void InitLiterals() + { + UInt32 num = (UInt32)0x300 << (lc + lp); + for (UInt32 i = 0; i < num; i++) + LitProbs[i] = PROB_INIT_VAL; + } + +To select the table for decoding it uses the context that consists of +(lc) high bits from previous literal and (lp) low bits from value that +represents current position in outputStream. + +If (State > 7), the Literal Decoder also uses "matchByte" that represents +the byte in OutputStream at position the is the DISTANCE bytes before +current position, where the DISTANCE is the distance in DISTANCE-LENGTH pair +of latest decoded match. + +The following code decodes one literal and puts it to Sliding Window buffer: + + void DecodeLiteral(unsigned state, UInt32 rep0) + { + unsigned prevByte = 0; + if (!OutWindow.IsEmpty()) + prevByte = OutWindow.GetByte(1); + + unsigned symbol = 1; + unsigned litState = ((OutWindow.TotalPos & ((1 << lp) - 1)) << lc) + (prevByte >> (8 - lc)); + CProb *probs = &LitProbs[(UInt32)0x300 * litState]; + + if (state >= 7) + { + unsigned matchByte = OutWindow.GetByte(rep0 + 1); + do + { + unsigned matchBit = (matchByte >> 7) & 1; + matchByte <<= 1; + unsigned bit = RangeDec.DecodeBit(&probs[((1 + matchBit) << 8) + symbol]); + symbol = (symbol << 1) | bit; + if (matchBit != bit) + break; + } + while (symbol < 0x100); + } + while (symbol < 0x100) + symbol = (symbol << 1) | RangeDec.DecodeBit(&probs[symbol]); + OutWindow.PutByte((Byte)(symbol - 0x100)); + } + + +The match length decoding +------------------------- + +The match length decoder returns normalized (zero-based value) +length of match. That value can be converted to real length of the match +with the following code: + +#define kMatchMinLen 2 + + matchLen = len + kMatchMinLen; + +The match length decoder can return the values from 0 to 271. +And the corresponded real match length values can be in the range +from 2 to 273. + +The following scheme is used for the match length encoding: + + Binary encoding Binary Tree structure Zero-based match length + sequence (binary + decimal): + + 0 xxx LowCoder[posState] xxx + 1 0 yyy MidCoder[posState] yyy + 8 + 1 1 zzzzzzzz HighCoder zzzzzzzz + 16 + +LZMA uses bit model variable "Choice" to decode the first selection bit. + +If the first selection bit is equal to 0, the decoder uses binary tree + LowCoder[posState] to decode 3-bit zero-based match length (xxx). + +If the first selection bit is equal to 1, the decoder uses bit model + variable "Choice2" to decode the second selection bit. + + If the second selection bit is equal to 0, the decoder uses binary tree + MidCoder[posState] to decode 3-bit "yyy" value, and zero-based match + length is equal to (yyy + 8). + + If the second selection bit is equal to 1, the decoder uses binary tree + HighCoder to decode 8-bit "zzzzzzzz" value, and zero-based + match length is equal to (zzzzzzzz + 16). + +LZMA uses "posState" value as context to select the binary tree +from LowCoder and MidCoder binary tree arrays: + + unsigned posState = OutWindow.TotalPos & ((1 << pb) - 1); + +The full code of the length decoder: + +class CLenDecoder +{ + CProb Choice; + CProb Choice2; + CBitTreeDecoder<3> LowCoder[1 << kNumPosBitsMax]; + CBitTreeDecoder<3> MidCoder[1 << kNumPosBitsMax]; + CBitTreeDecoder<8> HighCoder; + +public: + + void Init() + { + Choice = PROB_INIT_VAL; + Choice2 = PROB_INIT_VAL; + HighCoder.Init(); + for (unsigned i = 0; i < (1 << kNumPosBitsMax); i++) + { + LowCoder[i].Init(); + MidCoder[i].Init(); + } + } + + unsigned Decode(CRangeDecoder *rc, unsigned posState) + { + if (rc->DecodeBit(&Choice) == 0) + return LowCoder[posState].Decode(rc); + if (rc->DecodeBit(&Choice2) == 0) + return 8 + MidCoder[posState].Decode(rc); + return 16 + HighCoder.Decode(rc); + } +}; + +The LZMA decoder uses two instances of CLenDecoder class. +The first instance is for the matches of "Simple Match" type, +and the second instance is for the matches of "Rep Match" type: + + CLenDecoder LenDecoder; + CLenDecoder RepLenDecoder; + + +The match distance decoding +--------------------------- + +LZMA supports dictionary sizes up to 4 GiB minus 1. +The value of match distance (decoded by distance decoder) can be +from 1 to 2^32. But the distance value that is equal to 2^32 is used to +indicate the "End of stream" marker. So real largest match distance +that is used for LZ-window match is (2^32 - 1). + +LZMA uses normalized match length (zero-based length) +to calculate the context state "lenState" do decode the distance value: + +#define kNumLenToPosStates 4 + + unsigned lenState = len; + if (lenState > kNumLenToPosStates - 1) + lenState = kNumLenToPosStates - 1; + +The distance decoder returns the "dist" value that is zero-based value +of match distance. The real match distance can be calculated with the +following code: + + matchDistance = dist + 1; + +The state of the distance decoder and the initialization code: + + #define kEndPosModelIndex 14 + #define kNumFullDistances (1 << (kEndPosModelIndex >> 1)) + #define kNumAlignBits 4 + + CBitTreeDecoder<6> PosSlotDecoder[kNumLenToPosStates]; + CProb PosDecoders[1 + kNumFullDistances - kEndPosModelIndex]; + CBitTreeDecoder AlignDecoder; + + void InitDist() + { + for (unsigned i = 0; i < kNumLenToPosStates; i++) + PosSlotDecoder[i].Init(); + AlignDecoder.Init(); + INIT_PROBS(PosDecoders); + } + +At first stage the distance decoder decodes 6-bit "posSlot" value with bit +tree decoder from PosSlotDecoder array. It's possible to get 2^6=64 different +"posSlot" values. + + unsigned posSlot = PosSlotDecoder[lenState].Decode(&RangeDec); + +The encoding scheme for distance value is shown in the following table: + +posSlot (decimal) / + zero-based distance (binary) + 0 0 + 1 1 + 2 10 + 3 11 + + 4 10 x + 5 11 x + 6 10 xx + 7 11 xx + 8 10 xxx + 9 11 xxx +10 10 xxxx +11 11 xxxx +12 10 xxxxx +13 11 xxxxx + +14 10 yy zzzz +15 11 yy zzzz +16 10 yyy zzzz +17 11 yyy zzzz +... +62 10 yyyyyyyyyyyyyyyyyyyyyyyyyy zzzz +63 11 yyyyyyyyyyyyyyyyyyyyyyyyyy zzzz + +where + "x ... x" means the sequence of binary symbols encoded with binary tree and + "Reverse" scheme. It uses separated binary tree for each posSlot from 4 to 13. + "y" means direct bit encoded with range coder. + "zzzz" means the sequence of four binary symbols encoded with binary + tree with "Reverse" scheme, where one common binary tree "AlignDecoder" + is used for all posSlot values. + +If (posSlot < 4), the "dist" value is equal to posSlot value. + +If (posSlot >= 4), the decoder uses "posSlot" value to calculate the value of + the high bits of "dist" value and the number of the low bits. + + If (4 <= posSlot < kEndPosModelIndex), the decoder uses bit tree decoders. + (one separated bit tree decoder per one posSlot value) and "Reverse" scheme. + In this implementation we use one CProb array "PosDecoders" that contains + all CProb variables for all these bit decoders. + + if (posSlot >= kEndPosModelIndex), the middle bits are decoded as direct + bits from RangeDecoder and the low 4 bits are decoded with a bit tree + decoder "AlignDecoder" with "Reverse" scheme. + +The code to decode zero-based match distance: + + unsigned DecodeDistance(unsigned len) + { + unsigned lenState = len; + if (lenState > kNumLenToPosStates - 1) + lenState = kNumLenToPosStates - 1; + + unsigned posSlot = PosSlotDecoder[lenState].Decode(&RangeDec); + if (posSlot < 4) + return posSlot; + + unsigned numDirectBits = (unsigned)((posSlot >> 1) - 1); + UInt32 dist = ((2 | (posSlot & 1)) << numDirectBits); + if (posSlot < kEndPosModelIndex) + dist += BitTreeReverseDecode(PosDecoders + dist - posSlot, numDirectBits, &RangeDec); + else + { + dist += RangeDec.DecodeDirectBits(numDirectBits - kNumAlignBits) << kNumAlignBits; + dist += AlignDecoder.ReverseDecode(&RangeDec); + } + return dist; + } + + + +LZMA Decoding modes +------------------- + +There are 2 types of LZMA streams: + +1) The stream with "End of stream" marker. +2) The stream without "End of stream" marker. + +And the LZMA Decoder supports 3 modes of decoding: + +1) The unpack size is undefined. The LZMA decoder stops decoding after + getting "End of stream" marker. + The input variables for that case: + + markerIsMandatory = true + unpackSizeDefined = false + unpackSize contains any value + +2) The unpack size is defined and LZMA decoder supports both variants, + where the stream can contain "End of stream" marker or the stream is + finished without "End of stream" marker. The LZMA decoder must detect + any of these situations. + The input variables for that case: + + markerIsMandatory = false + unpackSizeDefined = true + unpackSize contains unpack size + +3) The unpack size is defined and the LZMA stream must contain + "End of stream" marker + The input variables for that case: + + markerIsMandatory = true + unpackSizeDefined = true + unpackSize contains unpack size + + +The main loop of decoder +------------------------ + +The main loop of LZMA decoder: + +Initialize the LZMA state. +loop +{ + // begin of loop + Check "end of stream" conditions. + Decode Type of MATCH / LITERAL. + If it's LITERAL, decode LITERAL value and put the LITERAL to Window. + If it's MATCH, decode the length of match and the match distance. + Check error conditions, check end of stream conditions and copy + the sequence of match bytes from sliding window to current position + in window. + Go to begin of loop +} + +The reference implementation of LZMA decoder uses "unpackSize" variable +to keep the number of remaining bytes in output stream. So it reduces +"unpackSize" value after each decoded LITERAL or MATCH. + +The following code contains the "end of stream" condition check at the start +of the loop: + + if (unpackSizeDefined && unpackSize == 0 && !markerIsMandatory) + if (RangeDec.IsFinishedOK()) + return LZMA_RES_FINISHED_WITHOUT_MARKER; + +LZMA uses three types of matches: + +1) "Simple Match" - the match with distance value encoded with bit models. + +2) "Rep Match" - the match that uses the distance from distance + history table. + +3) "Short Rep Match" - the match of single byte length, that uses the latest + distance from distance history table. + +The LZMA decoder keeps the history of latest 4 match distances that were used +by decoder. That set of 4 variables contains zero-based match distances and +these variables are initialized with zero values: + + UInt32 rep0 = 0, rep1 = 0, rep2 = 0, rep3 = 0; + +The LZMA decoder uses binary model variables to select type of MATCH or LITERAL: + +#define kNumStates 12 +#define kNumPosBitsMax 4 + + CProb IsMatch[kNumStates << kNumPosBitsMax]; + CProb IsRep[kNumStates]; + CProb IsRepG0[kNumStates]; + CProb IsRepG1[kNumStates]; + CProb IsRepG2[kNumStates]; + CProb IsRep0Long[kNumStates << kNumPosBitsMax]; + +The decoder uses "state" variable value to select exact variable +from "IsRep", "IsRepG0", "IsRepG1" and "IsRepG2" arrays. +The "state" variable can get the value from 0 to 11. +Initial value for "state" variable is zero: + + unsigned state = 0; + +The "state" variable is updated after each LITERAL or MATCH with one of the +following functions: + +unsigned UpdateState_Literal(unsigned state) +{ + if (state < 4) return 0; + else if (state < 10) return state - 3; + else return state - 6; +} +unsigned UpdateState_Match (unsigned state) { return state < 7 ? 7 : 10; } +unsigned UpdateState_Rep (unsigned state) { return state < 7 ? 8 : 11; } +unsigned UpdateState_ShortRep(unsigned state) { return state < 7 ? 9 : 11; } + +The decoder calculates "state2" variable value to select exact variable from +"IsMatch" and "IsRep0Long" arrays: + +unsigned posState = OutWindow.TotalPos & ((1 << pb) - 1); +unsigned state2 = (state << kNumPosBitsMax) + posState; + +The decoder uses the following code flow scheme to select exact +type of LITERAL or MATCH: + +IsMatch[state2] decode + 0 - the Literal + 1 - the Match + IsRep[state] decode + 0 - Simple Match + 1 - Rep Match + IsRepG0[state] decode + 0 - the distance is rep0 + IsRep0Long[state2] decode + 0 - Short Rep Match + 1 - Rep Match 0 + 1 - + IsRepG1[state] decode + 0 - Rep Match 1 + 1 - + IsRepG2[state] decode + 0 - Rep Match 2 + 1 - Rep Match 3 + + +LITERAL symbol +-------------- +If the value "0" was decoded with IsMatch[state2] decoding, we have "LITERAL" type. + +At first the LZMA decoder must check that it doesn't exceed +specified uncompressed size: + + if (unpackSizeDefined && unpackSize == 0) + return LZMA_RES_ERROR; + +Then it decodes literal value and puts it to sliding window: + + DecodeLiteral(state, rep0); + +Then the decoder must update the "state" value and "unpackSize" value; + + state = UpdateState_Literal(state); + unpackSize--; + +Then the decoder must go to the begin of main loop to decode next Match or Literal. + + +Simple Match +------------ + +If the value "1" was decoded with IsMatch[state2] decoding, +we have the "Simple Match" type. + +The distance history table is updated with the following scheme: + + rep3 = rep2; + rep2 = rep1; + rep1 = rep0; + +The zero-based length is decoded with "LenDecoder": + + len = LenDecoder.Decode(&RangeDec, posState); + +The state is update with UpdateState_Match function: + + state = UpdateState_Match(state); + +and the new "rep0" value is decoded with DecodeDistance: + + rep0 = DecodeDistance(len); + +That "rep0" will be used as zero-based distance for current match. + +If the value of "rep0" is equal to 0xFFFFFFFF, it means that we have +"End of stream" marker, so we can stop decoding and check finishing +condition in Range Decoder: + + if (rep0 == 0xFFFFFFFF) + return RangeDec.IsFinishedOK() ? + LZMA_RES_FINISHED_WITH_MARKER : + LZMA_RES_ERROR; + +If uncompressed size is defined, LZMA decoder must check that it doesn't +exceed that specified uncompressed size: + + if (unpackSizeDefined && unpackSize == 0) + return LZMA_RES_ERROR; + +Also the decoder must check that "rep0" value is not larger than dictionary size +and is not larger than the number of already decoded bytes: + + if (rep0 >= dictSize || !OutWindow.CheckDistance(rep0)) + return LZMA_RES_ERROR; + +Then the decoder must copy match bytes as described in +"The match symbols copying" section. + + +Rep Match +--------- + +If the LZMA decoder has decoded the value "1" with IsRep[state] variable, +we have "Rep Match" type. + +At first the LZMA decoder must check that it doesn't exceed +specified uncompressed size: + + if (unpackSizeDefined && unpackSize == 0) + return LZMA_RES_ERROR; + +Also the decoder must return error, if the LZ window is empty: + + if (OutWindow.IsEmpty()) + return LZMA_RES_ERROR; + +If the match type is "Rep Match", the decoder uses one of the 4 variables of +distance history table to get the value of distance for current match. +And there are 4 corresponding ways of decoding flow. + +The decoder updates the distance history with the following scheme +depending from type of match: + +- "Rep Match 0" or "Short Rep Match": + ; LZMA doesn't update the distance history + +- "Rep Match 1": + UInt32 dist = rep1; + rep1 = rep0; + rep0 = dist; + +- "Rep Match 2": + UInt32 dist = rep2; + rep2 = rep1; + rep1 = rep0; + rep0 = dist; + +- "Rep Match 3": + UInt32 dist = rep3; + rep3 = rep2; + rep2 = rep1; + rep1 = rep0; + rep0 = dist; + +Then the decoder decodes exact subtype of "Rep Match" using "IsRepG0", "IsRep0Long", +"IsRepG1", "IsRepG2". + +If the subtype is "Short Rep Match", the decoder updates the state, puts +the one byte from window to current position in window and goes to next +MATCH/LITERAL symbol (the begin of main loop): + + state = UpdateState_ShortRep(state); + OutWindow.PutByte(OutWindow.GetByte(rep0 + 1)); + unpackSize--; + continue; + +In other cases (Rep Match 0/1/2/3), it decodes the zero-based +length of match with "RepLenDecoder" decoder: + + len = RepLenDecoder.Decode(&RangeDec, posState); + +Then it updates the state: + + state = UpdateState_Rep(state); + +Then the decoder must copy match bytes as described in +"The Match symbols copying" section. + + +The match symbols copying +------------------------- + +If we have the match (Simple Match or Rep Match 0/1/2/3), the decoder must +copy the sequence of bytes with calculated match distance and match length. +If uncompressed size is defined, LZMA decoder must check that it doesn't +exceed that specified uncompressed size: + + len += kMatchMinLen; + bool isError = false; + if (unpackSizeDefined && unpackSize < len) + { + len = (unsigned)unpackSize; + isError = true; + } + OutWindow.CopyMatch(rep0 + 1, len); + unpackSize -= len; + if (isError) + return LZMA_RES_ERROR; + +Then the decoder must go to the begin of main loop to decode next MATCH or LITERAL. + + + +NOTES +----- + +This specification doesn't describe the variant of decoder implementation +that supports partial decoding. Such partial decoding case can require some +changes in "end of stream" condition checks code. Also such code +can use additional status codes, returned by decoder. + +This specification uses C++ code with templates to simplify describing. +The optimized version of LZMA decoder doesn't need templates. +Such optimized version can use just two arrays of CProb variables: + 1) The dynamic array of CProb variables allocated for the Literal Decoder. + 2) The one common array that contains all other CProb variables. + + +References: + +1. G. N. N. Martin, Range encoding: an algorithm for removing redundancy + from a digitized message, Video & Data Recording Conference, + Southampton, UK, July 24-27, 1979. diff --git a/other-licenses/7zstub/src/DOC/lzma.txt b/other-licenses/7zstub/src/DOC/lzma.txt new file mode 100644 index 0000000000..1f92142ead --- /dev/null +++ b/other-licenses/7zstub/src/DOC/lzma.txt @@ -0,0 +1,328 @@ +LZMA compression +---------------- +Version: 9.35 + +This file describes LZMA encoding and decoding functions written in C language. + +LZMA is an improved version of famous LZ77 compression algorithm. +It was improved in way of maximum increasing of compression ratio, +keeping high decompression speed and low memory requirements for +decompressing. + +Note: you can read also LZMA Specification (lzma-specification.txt from LZMA SDK) + +Also you can look source code for LZMA encoding and decoding: + C/Util/Lzma/LzmaUtil.c + + +LZMA compressed file format +--------------------------- +Offset Size Description + 0 1 Special LZMA properties (lc,lp, pb in encoded form) + 1 4 Dictionary size (little endian) + 5 8 Uncompressed size (little endian). -1 means unknown size + 13 Compressed data + + + +ANSI-C LZMA Decoder +~~~~~~~~~~~~~~~~~~~ + +Please note that interfaces for ANSI-C code were changed in LZMA SDK 4.58. +If you want to use old interfaces you can download previous version of LZMA SDK +from sourceforge.net site. + +To use ANSI-C LZMA Decoder you need the following files: +1) LzmaDec.h + LzmaDec.c + 7zTypes.h + Precomp.h + Compiler.h + +Look example code: + C/Util/Lzma/LzmaUtil.c + + +Memory requirements for LZMA decoding +------------------------------------- + +Stack usage of LZMA decoding function for local variables is not +larger than 200-400 bytes. + +LZMA Decoder uses dictionary buffer and internal state structure. +Internal state structure consumes + state_size = (4 + (1.5 << (lc + lp))) KB +by default (lc=3, lp=0), state_size = 16 KB. + + +How To decompress data +---------------------- + +LZMA Decoder (ANSI-C version) now supports 2 interfaces: +1) Single-call Decompressing +2) Multi-call State Decompressing (zlib-like interface) + +You must use external allocator: +Example: +void *SzAlloc(void *p, size_t size) { p = p; return malloc(size); } +void SzFree(void *p, void *address) { p = p; free(address); } +ISzAlloc alloc = { SzAlloc, SzFree }; + +You can use p = p; operator to disable compiler warnings. + + +Single-call Decompressing +------------------------- +When to use: RAM->RAM decompressing +Compile files: LzmaDec.h + LzmaDec.c + 7zTypes.h +Compile defines: no defines +Memory Requirements: + - Input buffer: compressed size + - Output buffer: uncompressed size + - LZMA Internal Structures: state_size (16 KB for default settings) + +Interface: + int LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, + const Byte *propData, unsigned propSize, ELzmaFinishMode finishMode, + ELzmaStatus *status, ISzAlloc *alloc); + In: + dest - output data + destLen - output data size + src - input data + srcLen - input data size + propData - LZMA properties (5 bytes) + propSize - size of propData buffer (5 bytes) + finishMode - It has meaning only if the decoding reaches output limit (*destLen). + LZMA_FINISH_ANY - Decode just destLen bytes. + LZMA_FINISH_END - Stream must be finished after (*destLen). + You can use LZMA_FINISH_END, when you know that + current output buffer covers last bytes of stream. + alloc - Memory allocator. + + Out: + destLen - processed output size + srcLen - processed input size + + Output: + SZ_OK + status: + LZMA_STATUS_FINISHED_WITH_MARK + LZMA_STATUS_NOT_FINISHED + LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK + SZ_ERROR_DATA - Data error + SZ_ERROR_MEM - Memory allocation error + SZ_ERROR_UNSUPPORTED - Unsupported properties + SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src). + + If LZMA decoder sees end_marker before reaching output limit, it returns OK result, + and output value of destLen will be less than output buffer size limit. + + You can use multiple checks to test data integrity after full decompression: + 1) Check Result and "status" variable. + 2) Check that output(destLen) = uncompressedSize, if you know real uncompressedSize. + 3) Check that output(srcLen) = compressedSize, if you know real compressedSize. + You must use correct finish mode in that case. */ + + +Multi-call State Decompressing (zlib-like interface) +---------------------------------------------------- + +When to use: file->file decompressing +Compile files: LzmaDec.h + LzmaDec.c + 7zTypes.h + +Memory Requirements: + - Buffer for input stream: any size (for example, 16 KB) + - Buffer for output stream: any size (for example, 16 KB) + - LZMA Internal Structures: state_size (16 KB for default settings) + - LZMA dictionary (dictionary size is encoded in LZMA properties header) + +1) read LZMA properties (5 bytes) and uncompressed size (8 bytes, little-endian) to header: + unsigned char header[LZMA_PROPS_SIZE + 8]; + ReadFile(inFile, header, sizeof(header) + +2) Allocate CLzmaDec structures (state + dictionary) using LZMA properties + + CLzmaDec state; + LzmaDec_Constr(&state); + res = LzmaDec_Allocate(&state, header, LZMA_PROPS_SIZE, &g_Alloc); + if (res != SZ_OK) + return res; + +3) Init LzmaDec structure before any new LZMA stream. And call LzmaDec_DecodeToBuf in loop + + LzmaDec_Init(&state); + for (;;) + { + ... + int res = LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen, + const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode); + ... + } + + +4) Free all allocated structures + LzmaDec_Free(&state, &g_Alloc); + +Look example code: + C/Util/Lzma/LzmaUtil.c + + +How To compress data +-------------------- + +Compile files: + 7zTypes.h + Threads.h + LzmaEnc.h + LzmaEnc.c + LzFind.h + LzFind.c + LzFindMt.h + LzFindMt.c + LzHash.h + +Memory Requirements: + - (dictSize * 11.5 + 6 MB) + state_size + +Lzma Encoder can use two memory allocators: +1) alloc - for small arrays. +2) allocBig - for big arrays. + +For example, you can use Large RAM Pages (2 MB) in allocBig allocator for +better compression speed. Note that Windows has bad implementation for +Large RAM Pages. +It's OK to use same allocator for alloc and allocBig. + + +Single-call Compression with callbacks +-------------------------------------- + +Look example code: + C/Util/Lzma/LzmaUtil.c + +When to use: file->file compressing + +1) you must implement callback structures for interfaces: +ISeqInStream +ISeqOutStream +ICompressProgress +ISzAlloc + +static void *SzAlloc(void *p, size_t size) { p = p; return MyAlloc(size); } +static void SzFree(void *p, void *address) { p = p; MyFree(address); } +static ISzAlloc g_Alloc = { SzAlloc, SzFree }; + + CFileSeqInStream inStream; + CFileSeqOutStream outStream; + + inStream.funcTable.Read = MyRead; + inStream.file = inFile; + outStream.funcTable.Write = MyWrite; + outStream.file = outFile; + + +2) Create CLzmaEncHandle object; + + CLzmaEncHandle enc; + + enc = LzmaEnc_Create(&g_Alloc); + if (enc == 0) + return SZ_ERROR_MEM; + + +3) initialize CLzmaEncProps properties; + + LzmaEncProps_Init(&props); + + Then you can change some properties in that structure. + +4) Send LZMA properties to LZMA Encoder + + res = LzmaEnc_SetProps(enc, &props); + +5) Write encoded properties to header + + Byte header[LZMA_PROPS_SIZE + 8]; + size_t headerSize = LZMA_PROPS_SIZE; + UInt64 fileSize; + int i; + + res = LzmaEnc_WriteProperties(enc, header, &headerSize); + fileSize = MyGetFileLength(inFile); + for (i = 0; i < 8; i++) + header[headerSize++] = (Byte)(fileSize >> (8 * i)); + MyWriteFileAndCheck(outFile, header, headerSize) + +6) Call encoding function: + res = LzmaEnc_Encode(enc, &outStream.funcTable, &inStream.funcTable, + NULL, &g_Alloc, &g_Alloc); + +7) Destroy LZMA Encoder Object + LzmaEnc_Destroy(enc, &g_Alloc, &g_Alloc); + + +If callback function return some error code, LzmaEnc_Encode also returns that code +or it can return the code like SZ_ERROR_READ, SZ_ERROR_WRITE or SZ_ERROR_PROGRESS. + + +Single-call RAM->RAM Compression +-------------------------------- + +Single-call RAM->RAM Compression is similar to Compression with callbacks, +but you provide pointers to buffers instead of pointers to stream callbacks: + +SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen, + const CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, int writeEndMark, + ICompressProgress *progress, ISzAlloc *alloc, ISzAlloc *allocBig); + +Return code: + SZ_OK - OK + SZ_ERROR_MEM - Memory allocation error + SZ_ERROR_PARAM - Incorrect paramater + SZ_ERROR_OUTPUT_EOF - output buffer overflow + SZ_ERROR_THREAD - errors in multithreading functions (only for Mt version) + + + +Defines +------- + +_LZMA_SIZE_OPT - Enable some optimizations in LZMA Decoder to get smaller executable code. + +_LZMA_PROB32 - It can increase the speed on some 32-bit CPUs, but memory usage for + some structures will be doubled in that case. + +_LZMA_UINT32_IS_ULONG - Define it if int is 16-bit on your compiler and long is 32-bit. + +_LZMA_NO_SYSTEM_SIZE_T - Define it if you don't want to use size_t type. + + +_7ZIP_PPMD_SUPPPORT - Define it if you don't want to support PPMD method in AMSI-C .7z decoder. + + +C++ LZMA Encoder/Decoder +~~~~~~~~~~~~~~~~~~~~~~~~ +C++ LZMA code use COM-like interfaces. So if you want to use it, +you can study basics of COM/OLE. +C++ LZMA code is just wrapper over ANSI-C code. + + +C++ Notes +~~~~~~~~~~~~~~~~~~~~~~~~ +If you use some C++ code folders in 7-Zip (for example, C++ code for .7z handling), +you must check that you correctly work with "new" operator. +7-Zip can be compiled with MSVC 6.0 that doesn't throw "exception" from "new" operator. +So 7-Zip uses "CPP\Common\NewHandler.cpp" that redefines "new" operator: +operator new(size_t size) +{ + void *p = ::malloc(size); + if (p == 0) + throw CNewException(); + return p; +} +If you use MSCV that throws exception for "new" operator, you can compile without +"NewHandler.cpp". So standard exception will be used. Actually some code of +7-Zip catches any exception in internal code and converts it to HRESULT code. +So you don't need to catch CNewException, if you call COM interfaces of 7-Zip. + +--- + +http://www.7-zip.org +http://www.7-zip.org/sdk.html +http://www.7-zip.org/support.html -- cgit v1.2.3