summaryrefslogtreecommitdiffstats
path: root/other-licenses/7zstub/src/Asm/x86/LzmaDecOpt.asm
diff options
context:
space:
mode:
Diffstat (limited to 'other-licenses/7zstub/src/Asm/x86/LzmaDecOpt.asm')
-rw-r--r--other-licenses/7zstub/src/Asm/x86/LzmaDecOpt.asm1258
1 files changed, 1258 insertions, 0 deletions
diff --git a/other-licenses/7zstub/src/Asm/x86/LzmaDecOpt.asm b/other-licenses/7zstub/src/Asm/x86/LzmaDecOpt.asm
new file mode 100644
index 0000000000..0a89eb735f
--- /dev/null
+++ b/other-licenses/7zstub/src/Asm/x86/LzmaDecOpt.asm
@@ -0,0 +1,1258 @@
+; LzmaDecOpt.asm -- ASM version of LzmaDec_DecodeReal_3() function
+; 2018-02-06: Igor Pavlov : Public domain
+;
+; 3 - is the code compatibility version of LzmaDec_DecodeReal_*()
+; function for check at link time.
+; That code is tightly coupled with LzmaDec_TryDummy()
+; and with another functions in LzmaDec.c file.
+; CLzmaDec structure, (probs) array layout, input and output of
+; LzmaDec_DecodeReal_*() must be equal in both versions (C / ASM).
+
+ifndef x64
+; x64=1
+; .err <x64_IS_REQUIRED>
+endif
+
+include 7zAsm.asm
+
+MY_ASM_START
+
+_TEXT$LZMADECOPT SEGMENT ALIGN(64) 'CODE'
+
+MY_ALIGN macro num:req
+ align num
+endm
+
+MY_ALIGN_16 macro
+ MY_ALIGN 16
+endm
+
+MY_ALIGN_32 macro
+ MY_ALIGN 32
+endm
+
+MY_ALIGN_64 macro
+ MY_ALIGN 64
+endm
+
+
+; _LZMA_SIZE_OPT equ 1
+
+; _LZMA_PROB32 equ 1
+
+ifdef _LZMA_PROB32
+ PSHIFT equ 2
+ PLOAD macro dest, mem
+ mov dest, dword ptr [mem]
+ endm
+ PSTORE macro src, mem
+ mov dword ptr [mem], src
+ endm
+else
+ PSHIFT equ 1
+ PLOAD macro dest, mem
+ movzx dest, word ptr [mem]
+ endm
+ PSTORE macro src, mem
+ mov word ptr [mem], @CatStr(src, _W)
+ endm
+endif
+
+PMULT equ (1 SHL PSHIFT)
+PMULT_HALF equ (1 SHL (PSHIFT - 1))
+PMULT_2 equ (1 SHL (PSHIFT + 1))
+
+
+; x0 range
+; x1 pbPos / (prob) TREE
+; x2 probBranch / prm (MATCHED) / pbPos / cnt
+; x3 sym
+;====== r4 === RSP
+; x5 cod
+; x6 t1 NORM_CALC / probs_state / dist
+; x7 t0 NORM_CALC / prob2 IF_BIT_1
+; x8 state
+; x9 match (MATCHED) / sym2 / dist2 / lpMask_reg
+; x10 kBitModelTotal_reg
+; r11 probs
+; x12 offs (MATCHED) / dic / len_temp
+; x13 processedPos
+; x14 bit (MATCHED) / dicPos
+; r15 buf
+
+
+cod equ x5
+cod_L equ x5_L
+range equ x0
+state equ x8
+state_R equ r8
+buf equ r15
+processedPos equ x13
+kBitModelTotal_reg equ x10
+
+probBranch equ x2
+probBranch_R equ r2
+probBranch_W equ x2_W
+
+pbPos equ x1
+pbPos_R equ r1
+
+cnt equ x2
+cnt_R equ r2
+
+lpMask_reg equ x9
+dicPos equ r14
+
+sym equ x3
+sym_R equ r3
+sym_L equ x3_L
+
+probs equ r11
+dic equ r12
+
+t0 equ x7
+t0_W equ x7_W
+t0_R equ r7
+
+prob2 equ t0
+prob2_W equ t0_W
+
+t1 equ x6
+t1_R equ r6
+
+probs_state equ t1
+probs_state_R equ t1_R
+
+prm equ r2
+match equ x9
+match_R equ r9
+offs equ x12
+offs_R equ r12
+bit equ x14
+bit_R equ r14
+
+sym2 equ x9
+sym2_R equ r9
+
+len_temp equ x12
+
+dist equ sym
+dist2 equ x9
+
+
+
+kNumBitModelTotalBits equ 11
+kBitModelTotal equ (1 SHL kNumBitModelTotalBits)
+kNumMoveBits equ 5
+kBitModelOffset equ ((1 SHL kNumMoveBits) - 1)
+kTopValue equ (1 SHL 24)
+
+NORM_2 macro
+ ; movzx t0, BYTE PTR [buf]
+ shl cod, 8
+ mov cod_L, BYTE PTR [buf]
+ shl range, 8
+ ; or cod, t0
+ inc buf
+endm
+
+
+NORM macro
+ cmp range, kTopValue
+ jae SHORT @F
+ NORM_2
+@@:
+endm
+
+
+; ---------- Branch MACROS ----------
+
+UPDATE_0 macro probsArray:req, probOffset:req, probDisp:req
+ mov prob2, kBitModelTotal_reg
+ sub prob2, probBranch
+ shr prob2, kNumMoveBits
+ add probBranch, prob2
+ PSTORE probBranch, probOffset * 1 + probsArray + probDisp * PMULT
+endm
+
+
+UPDATE_1 macro probsArray:req, probOffset:req, probDisp:req
+ sub prob2, range
+ sub cod, range
+ mov range, prob2
+ mov prob2, probBranch
+ shr probBranch, kNumMoveBits
+ sub prob2, probBranch
+ PSTORE prob2, probOffset * 1 + probsArray + probDisp * PMULT
+endm
+
+
+CMP_COD macro probsArray:req, probOffset:req, probDisp:req
+ PLOAD probBranch, probOffset * 1 + probsArray + probDisp * PMULT
+ NORM
+ mov prob2, range
+ shr range, kNumBitModelTotalBits
+ imul range, probBranch
+ cmp cod, range
+endm
+
+
+IF_BIT_1_NOUP macro probsArray:req, probOffset:req, probDisp:req, toLabel:req
+ CMP_COD probsArray, probOffset, probDisp
+ jae toLabel
+endm
+
+
+IF_BIT_1 macro probsArray:req, probOffset:req, probDisp:req, toLabel:req
+ IF_BIT_1_NOUP probsArray, probOffset, probDisp, toLabel
+ UPDATE_0 probsArray, probOffset, probDisp
+endm
+
+
+IF_BIT_0_NOUP macro probsArray:req, probOffset:req, probDisp:req, toLabel:req
+ CMP_COD probsArray, probOffset, probDisp
+ jb toLabel
+endm
+
+
+; ---------- CMOV MACROS ----------
+
+NORM_CALC macro prob:req
+ NORM
+ mov t0, range
+ shr range, kNumBitModelTotalBits
+ imul range, prob
+ sub t0, range
+ mov t1, cod
+ sub cod, range
+endm
+
+
+PUP macro prob:req, probPtr:req
+ sub t0, prob
+ ; only sar works for both 16/32 bit prob modes
+ sar t0, kNumMoveBits
+ add t0, prob
+ PSTORE t0, probPtr
+endm
+
+
+PUP_SUB macro prob:req, probPtr:req, symSub:req
+ sbb sym, symSub
+ PUP prob, probPtr
+endm
+
+
+PUP_COD macro prob:req, probPtr:req, symSub:req
+ mov t0, kBitModelOffset
+ cmovb cod, t1
+ mov t1, sym
+ cmovb t0, kBitModelTotal_reg
+ PUP_SUB prob, probPtr, symSub
+endm
+
+
+BIT_0 macro prob:req, probNext:req
+ PLOAD prob, probs + 1 * PMULT
+ PLOAD probNext, probs + 1 * PMULT_2
+
+ NORM_CALC prob
+
+ cmovae range, t0
+ PLOAD t0, probs + 1 * PMULT_2 + PMULT
+ cmovae probNext, t0
+ mov t0, kBitModelOffset
+ cmovb cod, t1
+ cmovb t0, kBitModelTotal_reg
+ mov sym, 2
+ PUP_SUB prob, probs + 1 * PMULT, 0 - 1
+endm
+
+
+BIT_1 macro prob:req, probNext:req
+ PLOAD probNext, probs + sym_R * PMULT_2
+ add sym, sym
+
+ NORM_CALC prob
+
+ cmovae range, t0
+ PLOAD t0, probs + sym_R * PMULT + PMULT
+ cmovae probNext, t0
+ PUP_COD prob, probs + t1_R * PMULT_HALF, 0 - 1
+endm
+
+
+BIT_2 macro prob:req, symSub:req
+ add sym, sym
+
+ NORM_CALC prob
+
+ cmovae range, t0
+ PUP_COD prob, probs + t1_R * PMULT_HALF, symSub
+endm
+
+
+; ---------- MATCHED LITERAL ----------
+
+LITM_0 macro
+ mov offs, 256 * PMULT
+ shl match, (PSHIFT + 1)
+ mov bit, offs
+ and bit, match
+ PLOAD x1, probs + 256 * PMULT + bit_R * 1 + 1 * PMULT
+ lea prm, [probs + 256 * PMULT + bit_R * 1 + 1 * PMULT]
+ ; lea prm, [probs + 256 * PMULT + 1 * PMULT]
+ ; add prm, bit_R
+ xor offs, bit
+ add match, match
+
+ NORM_CALC x1
+
+ cmovae offs, bit
+ mov bit, match
+ cmovae range, t0
+ mov t0, kBitModelOffset
+ cmovb cod, t1
+ cmovb t0, kBitModelTotal_reg
+ mov sym, 0
+ PUP_SUB x1, prm, -2-1
+endm
+
+
+LITM macro
+ and bit, offs
+ lea prm, [probs + offs_R * 1]
+ add prm, bit_R
+ PLOAD x1, prm + sym_R * PMULT
+ xor offs, bit
+ add sym, sym
+ add match, match
+
+ NORM_CALC x1
+
+ cmovae offs, bit
+ mov bit, match
+ cmovae range, t0
+ PUP_COD x1, prm + t1_R * PMULT_HALF, - 1
+endm
+
+
+LITM_2 macro
+ and bit, offs
+ lea prm, [probs + offs_R * 1]
+ add prm, bit_R
+ PLOAD x1, prm + sym_R * PMULT
+ add sym, sym
+
+ NORM_CALC x1
+
+ cmovae range, t0
+ PUP_COD x1, prm + t1_R * PMULT_HALF, 256 - 1
+endm
+
+
+; ---------- REVERSE BITS ----------
+
+REV_0 macro prob:req, probNext:req
+ ; PLOAD prob, probs + 1 * PMULT
+ ; lea sym2_R, [probs + 2 * PMULT]
+ ; PLOAD probNext, probs + 2 * PMULT
+ PLOAD probNext, sym2_R
+
+ NORM_CALC prob
+
+ cmovae range, t0
+ PLOAD t0, probs + 3 * PMULT
+ cmovae probNext, t0
+ cmovb cod, t1
+ mov t0, kBitModelOffset
+ cmovb t0, kBitModelTotal_reg
+ lea t1_R, [probs + 3 * PMULT]
+ cmovae sym2_R, t1_R
+ PUP prob, probs + 1 * PMULT
+endm
+
+
+REV_1 macro prob:req, probNext:req, step:req
+ add sym2_R, step * PMULT
+ PLOAD probNext, sym2_R
+
+ NORM_CALC prob
+
+ cmovae range, t0
+ PLOAD t0, sym2_R + step * PMULT
+ cmovae probNext, t0
+ cmovb cod, t1
+ mov t0, kBitModelOffset
+ cmovb t0, kBitModelTotal_reg
+ lea t1_R, [sym2_R + step * PMULT]
+ cmovae sym2_R, t1_R
+ PUP prob, t1_R - step * PMULT_2
+endm
+
+
+REV_2 macro prob:req, step:req
+ sub sym2_R, probs
+ shr sym2, PSHIFT
+ or sym, sym2
+
+ NORM_CALC prob
+
+ cmovae range, t0
+ lea t0, [sym - step]
+ cmovb sym, t0
+ cmovb cod, t1
+ mov t0, kBitModelOffset
+ cmovb t0, kBitModelTotal_reg
+ PUP prob, probs + sym2_R * PMULT
+endm
+
+
+REV_1_VAR macro prob:req
+ PLOAD prob, sym_R
+ mov probs, sym_R
+ add sym_R, sym2_R
+
+ NORM_CALC prob
+
+ cmovae range, t0
+ lea t0_R, [sym_R + sym2_R]
+ cmovae sym_R, t0_R
+ mov t0, kBitModelOffset
+ cmovb cod, t1
+ ; mov t1, kBitModelTotal
+ ; cmovb t0, t1
+ cmovb t0, kBitModelTotal_reg
+ add sym2, sym2
+ PUP prob, probs
+endm
+
+
+
+
+LIT_PROBS macro lpMaskParam:req
+ ; prob += (UInt32)3 * ((((processedPos << 8) + dic[(dicPos == 0 ? dicBufSize : dicPos) - 1]) & lpMask) << lc);
+ mov t0, processedPos
+ shl t0, 8
+ add sym, t0
+ and sym, lpMaskParam
+ add probs_state_R, pbPos_R
+ mov x1, LOC lc2
+ lea sym, dword ptr[sym_R + 2 * sym_R]
+ add probs, Literal * PMULT
+ shl sym, x1_L
+ add probs, sym_R
+ UPDATE_0 probs_state_R, 0, IsMatch
+ inc processedPos
+endm
+
+
+
+kNumPosBitsMax equ 4
+kNumPosStatesMax equ (1 SHL kNumPosBitsMax)
+
+kLenNumLowBits equ 3
+kLenNumLowSymbols equ (1 SHL kLenNumLowBits)
+kLenNumHighBits equ 8
+kLenNumHighSymbols equ (1 SHL kLenNumHighBits)
+kNumLenProbs equ (2 * kLenNumLowSymbols * kNumPosStatesMax + kLenNumHighSymbols)
+
+LenLow equ 0
+LenChoice equ LenLow
+LenChoice2 equ (LenLow + kLenNumLowSymbols)
+LenHigh equ (LenLow + 2 * kLenNumLowSymbols * kNumPosStatesMax)
+
+kNumStates equ 12
+kNumStates2 equ 16
+kNumLitStates equ 7
+
+kStartPosModelIndex equ 4
+kEndPosModelIndex equ 14
+kNumFullDistances equ (1 SHL (kEndPosModelIndex SHR 1))
+
+kNumPosSlotBits equ 6
+kNumLenToPosStates equ 4
+
+kNumAlignBits equ 4
+kAlignTableSize equ (1 SHL kNumAlignBits)
+
+kMatchMinLen equ 2
+kMatchSpecLenStart equ (kMatchMinLen + kLenNumLowSymbols * 2 + kLenNumHighSymbols)
+
+kStartOffset equ 1664
+SpecPos equ (-kStartOffset)
+IsRep0Long equ (SpecPos + kNumFullDistances)
+RepLenCoder equ (IsRep0Long + (kNumStates2 SHL kNumPosBitsMax))
+LenCoder equ (RepLenCoder + kNumLenProbs)
+IsMatch equ (LenCoder + kNumLenProbs)
+kAlign equ (IsMatch + (kNumStates2 SHL kNumPosBitsMax))
+IsRep equ (kAlign + kAlignTableSize)
+IsRepG0 equ (IsRep + kNumStates)
+IsRepG1 equ (IsRepG0 + kNumStates)
+IsRepG2 equ (IsRepG1 + kNumStates)
+PosSlot equ (IsRepG2 + kNumStates)
+Literal equ (PosSlot + (kNumLenToPosStates SHL kNumPosSlotBits))
+NUM_BASE_PROBS equ (Literal + kStartOffset)
+
+if kAlign ne 0
+ .err <Stop_Compiling_Bad_LZMA_kAlign>
+endif
+
+if NUM_BASE_PROBS ne 1984
+ .err <Stop_Compiling_Bad_LZMA_PROBS>
+endif
+
+
+PTR_FIELD equ dq ?
+
+CLzmaDec_Asm struct
+ lc db ?
+ lp db ?
+ pb db ?
+ _pad_ db ?
+ dicSize dd ?
+
+ probs_Spec PTR_FIELD
+ probs_1664 PTR_FIELD
+ dic_Spec PTR_FIELD
+ dicBufSize PTR_FIELD
+ dicPos_Spec PTR_FIELD
+ buf_Spec PTR_FIELD
+
+ range_Spec dd ?
+ code_Spec dd ?
+ processedPos_Spec dd ?
+ checkDicSize dd ?
+ rep0 dd ?
+ rep1 dd ?
+ rep2 dd ?
+ rep3 dd ?
+ state_Spec dd ?
+ remainLen dd ?
+CLzmaDec_Asm ends
+
+
+CLzmaDec_Asm_Loc struct
+ OLD_RSP PTR_FIELD
+ lzmaPtr PTR_FIELD
+ _pad0_ PTR_FIELD
+ _pad1_ PTR_FIELD
+ _pad2_ PTR_FIELD
+ dicBufSize PTR_FIELD
+ probs_Spec PTR_FIELD
+ dic_Spec PTR_FIELD
+
+ limit PTR_FIELD
+ bufLimit PTR_FIELD
+ lc2 dd ?
+ lpMask dd ?
+ pbMask dd ?
+ checkDicSize dd ?
+
+ _pad_ dd ?
+ remainLen dd ?
+ dicPos_Spec PTR_FIELD
+ rep0 dd ?
+ rep1 dd ?
+ rep2 dd ?
+ rep3 dd ?
+CLzmaDec_Asm_Loc ends
+
+
+GLOB_2 equ [sym_R].CLzmaDec_Asm.
+GLOB equ [r1].CLzmaDec_Asm.
+LOC_0 equ [r0].CLzmaDec_Asm_Loc.
+LOC equ [RSP].CLzmaDec_Asm_Loc.
+
+
+COPY_VAR macro name
+ mov t0, GLOB_2 name
+ mov LOC_0 name, t0
+endm
+
+
+RESTORE_VAR macro name
+ mov t0, LOC name
+ mov GLOB name, t0
+endm
+
+
+
+IsMatchBranch_Pre macro reg
+ ; prob = probs + IsMatch + (state << kNumPosBitsMax) + posState;
+ mov pbPos, LOC pbMask
+ and pbPos, processedPos
+ shl pbPos, (kLenNumLowBits + 1 + PSHIFT)
+ lea probs_state_R, [probs + state_R]
+endm
+
+
+IsMatchBranch macro reg
+ IsMatchBranch_Pre
+ IF_BIT_1 probs_state_R, pbPos_R, IsMatch, IsMatch_label
+endm
+
+
+CheckLimits macro reg
+ cmp buf, LOC bufLimit
+ jae fin_OK
+ cmp dicPos, LOC limit
+ jae fin_OK
+endm
+
+
+
+; RSP is (16x + 8) bytes aligned in WIN64-x64
+; LocalSize equ ((((SIZEOF CLzmaDec_Asm_Loc) + 7) / 16 * 16) + 8)
+
+PARAM_lzma equ REG_PARAM_0
+PARAM_limit equ REG_PARAM_1
+PARAM_bufLimit equ REG_PARAM_2
+
+; MY_ALIGN_64
+MY_PROC LzmaDec_DecodeReal_3, 3
+MY_PUSH_PRESERVED_REGS
+
+ lea r0, [RSP - (SIZEOF CLzmaDec_Asm_Loc)]
+ and r0, -128
+ mov r5, RSP
+ mov RSP, r0
+ mov LOC_0 Old_RSP, r5
+ mov LOC_0 lzmaPtr, PARAM_lzma
+
+ mov LOC_0 remainLen, 0 ; remainLen must be ZERO
+
+ mov LOC_0 bufLimit, PARAM_bufLimit
+ mov sym_R, PARAM_lzma ; CLzmaDec_Asm_Loc pointer for GLOB_2
+ mov dic, GLOB_2 dic_Spec
+ add PARAM_limit, dic
+ mov LOC_0 limit, PARAM_limit
+
+ COPY_VAR(rep0)
+ COPY_VAR(rep1)
+ COPY_VAR(rep2)
+ COPY_VAR(rep3)
+
+ mov dicPos, GLOB_2 dicPos_Spec
+ add dicPos, dic
+ mov LOC_0 dicPos_Spec, dicPos
+ mov LOC_0 dic_Spec, dic
+
+ mov x1_L, GLOB_2 pb
+ mov t0, 1
+ shl t0, x1_L
+ dec t0
+ mov LOC_0 pbMask, t0
+
+ ; unsigned pbMask = ((unsigned)1 << (p->prop.pb)) - 1;
+ ; unsigned lc = p->prop.lc;
+ ; unsigned lpMask = ((unsigned)0x100 << p->prop.lp) - ((unsigned)0x100 >> lc);
+
+ mov x1_L, GLOB_2 lc
+ mov x2, 100h
+ mov t0, x2
+ shr x2, x1_L
+ ; inc x1
+ add x1_L, PSHIFT
+ mov LOC_0 lc2, x1
+ mov x1_L, GLOB_2 lp
+ shl t0, x1_L
+ sub t0, x2
+ mov LOC_0 lpMask, t0
+ mov lpMask_reg, t0
+
+ ; mov probs, GLOB_2 probs_Spec
+ ; add probs, kStartOffset SHL PSHIFT
+ mov probs, GLOB_2 probs_1664
+ mov LOC_0 probs_Spec, probs
+
+ mov t0_R, GLOB_2 dicBufSize
+ mov LOC_0 dicBufSize, t0_R
+
+ mov x1, GLOB_2 checkDicSize
+ mov LOC_0 checkDicSize, x1
+
+ mov processedPos, GLOB_2 processedPos_Spec
+
+ mov state, GLOB_2 state_Spec
+ shl state, PSHIFT
+
+ mov buf, GLOB_2 buf_Spec
+ mov range, GLOB_2 range_Spec
+ mov cod, GLOB_2 code_Spec
+ mov kBitModelTotal_reg, kBitModelTotal
+ xor sym, sym
+
+ ; if (processedPos != 0 || checkDicSize != 0)
+ or x1, processedPos
+ jz @f
+
+ add t0_R, dic
+ cmp dicPos, dic
+ cmovnz t0_R, dicPos
+ movzx sym, byte ptr[t0_R - 1]
+
+@@:
+ IsMatchBranch_Pre
+ cmp state, 4 * PMULT
+ jb lit_end
+ cmp state, kNumLitStates * PMULT
+ jb lit_matched_end
+ jmp lz_end
+
+
+
+
+; ---------- LITERAL ----------
+MY_ALIGN_64
+lit_start:
+ xor state, state
+lit_start_2:
+ LIT_PROBS lpMask_reg
+
+ ifdef _LZMA_SIZE_OPT
+
+ PLOAD x1, probs + 1 * PMULT
+ mov sym, 1
+MY_ALIGN_16
+lit_loop:
+ BIT_1 x1, x2
+ mov x1, x2
+ cmp sym, 127
+ jbe lit_loop
+
+ else
+
+ BIT_0 x1, x2
+ BIT_1 x2, x1
+ BIT_1 x1, x2
+ BIT_1 x2, x1
+ BIT_1 x1, x2
+ BIT_1 x2, x1
+ BIT_1 x1, x2
+
+ endif
+
+ BIT_2 x2, 256 - 1
+
+ ; mov dic, LOC dic_Spec
+ mov probs, LOC probs_Spec
+ IsMatchBranch_Pre
+ mov byte ptr[dicPos], sym_L
+ inc dicPos
+
+ CheckLimits
+lit_end:
+ IF_BIT_0_NOUP probs_state_R, pbPos_R, IsMatch, lit_start
+
+ ; jmp IsMatch_label
+
+; ---------- MATCHES ----------
+; MY_ALIGN_32
+IsMatch_label:
+ UPDATE_1 probs_state_R, pbPos_R, IsMatch
+ IF_BIT_1 probs_state_R, 0, IsRep, IsRep_label
+
+ add probs, LenCoder * PMULT
+ add state, kNumStates * PMULT
+
+; ---------- LEN DECODE ----------
+len_decode:
+ mov len_temp, 8 - 1 - kMatchMinLen
+ IF_BIT_0_NOUP probs, 0, 0, len_mid_0
+ UPDATE_1 probs, 0, 0
+ add probs, (1 SHL (kLenNumLowBits + PSHIFT))
+ mov len_temp, -1 - kMatchMinLen
+ IF_BIT_0_NOUP probs, 0, 0, len_mid_0
+ UPDATE_1 probs, 0, 0
+ add probs, LenHigh * PMULT - (1 SHL (kLenNumLowBits + PSHIFT))
+ mov sym, 1
+ PLOAD x1, probs + 1 * PMULT
+
+MY_ALIGN_32
+len8_loop:
+ BIT_1 x1, x2
+ mov x1, x2
+ cmp sym, 64
+ jb len8_loop
+
+ mov len_temp, (kLenNumHighSymbols - kLenNumLowSymbols * 2) - 1 - kMatchMinLen
+ jmp len_mid_2
+
+MY_ALIGN_32
+len_mid_0:
+ UPDATE_0 probs, 0, 0
+ add probs, pbPos_R
+ BIT_0 x2, x1
+len_mid_2:
+ BIT_1 x1, x2
+ BIT_2 x2, len_temp
+ mov probs, LOC probs_Spec
+ cmp state, kNumStates * PMULT
+ jb copy_match
+
+
+; ---------- DECODE DISTANCE ----------
+ ; probs + PosSlot + ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << kNumPosSlotBits);
+
+ mov t0, 3 + kMatchMinLen
+ cmp sym, 3 + kMatchMinLen
+ cmovb t0, sym
+ add probs, PosSlot * PMULT - (kMatchMinLen SHL (kNumPosSlotBits + PSHIFT))
+ shl t0, (kNumPosSlotBits + PSHIFT)
+ add probs, t0_R
+
+ ; sym = Len
+ ; mov LOC remainLen, sym
+ mov len_temp, sym
+
+ ifdef _LZMA_SIZE_OPT
+
+ PLOAD x1, probs + 1 * PMULT
+ mov sym, 1
+MY_ALIGN_16
+slot_loop:
+ BIT_1 x1, x2
+ mov x1, x2
+ cmp sym, 32
+ jb slot_loop
+
+ else
+
+ BIT_0 x1, x2
+ BIT_1 x2, x1
+ BIT_1 x1, x2
+ BIT_1 x2, x1
+ BIT_1 x1, x2
+
+ endif
+
+ mov x1, sym
+ BIT_2 x2, 64-1
+
+ and sym, 3
+ mov probs, LOC probs_Spec
+ cmp x1, 32 + kEndPosModelIndex / 2
+ jb short_dist
+
+ ; unsigned numDirectBits = (unsigned)(((distance >> 1) - 1));
+ sub x1, (32 + 1 + kNumAlignBits)
+ ; distance = (2 | (distance & 1));
+ or sym, 2
+ PLOAD x2, probs + 1 * PMULT
+ shl sym, kNumAlignBits + 1
+ lea sym2_R, [probs + 2 * PMULT]
+
+ jmp direct_norm
+ ; lea t1, [sym_R + (1 SHL kNumAlignBits)]
+ ; cmp range, kTopValue
+ ; jb direct_norm
+
+; ---------- DIRECT DISTANCE ----------
+MY_ALIGN_32
+direct_loop:
+ shr range, 1
+ mov t0, cod
+ sub cod, range
+ cmovs cod, t0
+ cmovns sym, t1
+
+ comment ~
+ sub cod, range
+ mov x2, cod
+ sar x2, 31
+ lea sym, dword ptr [r2 + sym_R * 2 + 1]
+ and x2, range
+ add cod, x2
+ ~
+ dec x1
+ je direct_end
+
+ add sym, sym
+direct_norm:
+ lea t1, [sym_R + (1 SHL kNumAlignBits)]
+ cmp range, kTopValue
+ jae near ptr direct_loop
+ ; we align for 32 here with "near ptr" command above
+ NORM_2
+ jmp direct_loop
+
+MY_ALIGN_32
+direct_end:
+ ; prob = + kAlign;
+ ; distance <<= kNumAlignBits;
+ REV_0 x2, x1
+ REV_1 x1, x2, 2
+ REV_1 x2, x1, 4
+ REV_2 x1, 8
+
+decode_dist_end:
+
+ ; if (distance >= (checkDicSize == 0 ? processedPos: checkDicSize))
+
+ mov t0, LOC checkDicSize
+ test t0, t0
+ cmove t0, processedPos
+ cmp sym, t0
+ jae end_of_payload
+
+ ; rep3 = rep2;
+ ; rep2 = rep1;
+ ; rep1 = rep0;
+ ; rep0 = distance + 1;
+
+ inc sym
+ mov t0, LOC rep0
+ mov t1, LOC rep1
+ mov x1, LOC rep2
+ mov LOC rep0, sym
+ ; mov sym, LOC remainLen
+ mov sym, len_temp
+ mov LOC rep1, t0
+ mov LOC rep2, t1
+ mov LOC rep3, x1
+
+ ; state = (state < kNumStates + kNumLitStates) ? kNumLitStates : kNumLitStates + 3;
+ cmp state, (kNumStates + kNumLitStates) * PMULT
+ mov state, kNumLitStates * PMULT
+ mov t0, (kNumLitStates + 3) * PMULT
+ cmovae state, t0
+
+
+; ---------- COPY MATCH ----------
+copy_match:
+
+ ; len += kMatchMinLen;
+ ; add sym, kMatchMinLen
+
+ ; if ((rem = limit - dicPos) == 0)
+ ; {
+ ; p->dicPos = dicPos;
+ ; return SZ_ERROR_DATA;
+ ; }
+ mov cnt_R, LOC limit
+ sub cnt_R, dicPos
+ jz fin_ERROR
+
+ ; curLen = ((rem < len) ? (unsigned)rem : len);
+ cmp cnt_R, sym_R
+ ; cmovae cnt_R, sym_R ; 64-bit
+ cmovae cnt, sym ; 32-bit
+
+ mov dic, LOC dic_Spec
+ mov x1, LOC rep0
+
+ mov t0_R, dicPos
+ add dicPos, cnt_R
+ ; processedPos += curLen;
+ add processedPos, cnt
+ ; len -= curLen;
+ sub sym, cnt
+ mov LOC remainLen, sym
+
+ sub t0_R, dic
+
+ ; pos = dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0);
+ sub t0_R, r1
+ jae @f
+
+ mov r1, LOC dicBufSize
+ add t0_R, r1
+ sub r1, t0_R
+ cmp cnt_R, r1
+ ja copy_match_cross
+@@:
+ ; if (curLen <= dicBufSize - pos)
+
+; ---------- COPY MATCH FAST ----------
+ ; Byte *dest = dic + dicPos;
+ ; mov r1, dic
+ ; ptrdiff_t src = (ptrdiff_t)pos - (ptrdiff_t)dicPos;
+ ; sub t0_R, dicPos
+ ; dicPos += curLen;
+
+ ; const Byte *lim = dest + curLen;
+ add t0_R, dic
+ movzx sym, byte ptr[t0_R]
+ add t0_R, cnt_R
+ neg cnt_R
+ ; lea r1, [dicPos - 1]
+copy_common:
+ dec dicPos
+ ; cmp LOC rep0, 1
+ ; je rep0Label
+
+ ; t0_R - src_lim
+ ; r1 - dest_lim - 1
+ ; cnt_R - (-cnt)
+
+ IsMatchBranch_Pre
+ inc cnt_R
+ jz copy_end
+MY_ALIGN_16
+@@:
+ mov byte ptr[cnt_R * 1 + dicPos], sym_L
+ movzx sym, byte ptr[cnt_R * 1 + t0_R]
+ inc cnt_R
+ jnz @b
+
+copy_end:
+lz_end_match:
+ mov byte ptr[dicPos], sym_L
+ inc dicPos
+
+ ; IsMatchBranch_Pre
+ CheckLimits
+lz_end:
+ IF_BIT_1_NOUP probs_state_R, pbPos_R, IsMatch, IsMatch_label
+
+
+
+; ---------- LITERAL MATCHED ----------
+
+ LIT_PROBS LOC lpMask
+
+ ; matchByte = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)];
+ mov x1, LOC rep0
+ ; mov dic, LOC dic_Spec
+ mov LOC dicPos_Spec, dicPos
+
+ ; state -= (state < 10) ? 3 : 6;
+ lea t0, [state_R - 6 * PMULT]
+ sub state, 3 * PMULT
+ cmp state, 7 * PMULT
+ cmovae state, t0
+
+ sub dicPos, dic
+ sub dicPos, r1
+ jae @f
+ add dicPos, LOC dicBufSize
+@@:
+ comment ~
+ xor t0, t0
+ sub dicPos, r1
+ cmovb t0_R, LOC dicBufSize
+ ~
+
+ movzx match, byte ptr[dic + dicPos * 1]
+
+ ifdef _LZMA_SIZE_OPT
+
+ mov offs, 256 * PMULT
+ shl match, (PSHIFT + 1)
+ mov bit, match
+ mov sym, 1
+MY_ALIGN_16
+litm_loop:
+ LITM
+ cmp sym, 256
+ jb litm_loop
+ sub sym, 256
+
+ else
+
+ LITM_0
+ LITM
+ LITM
+ LITM
+ LITM
+ LITM
+ LITM
+ LITM_2
+
+ endif
+
+ mov probs, LOC probs_Spec
+ IsMatchBranch_Pre
+ ; mov dic, LOC dic_Spec
+ mov dicPos, LOC dicPos_Spec
+ mov byte ptr[dicPos], sym_L
+ inc dicPos
+
+ CheckLimits
+lit_matched_end:
+ IF_BIT_1_NOUP probs_state_R, pbPos_R, IsMatch, IsMatch_label
+ ; IsMatchBranch
+ mov lpMask_reg, LOC lpMask
+ sub state, 3 * PMULT
+ jmp lit_start_2
+
+
+
+; ---------- REP 0 LITERAL ----------
+MY_ALIGN_32
+IsRep0Short_label:
+ UPDATE_0 probs_state_R, pbPos_R, IsRep0Long
+
+ ; dic[dicPos] = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)];
+ mov dic, LOC dic_Spec
+ mov t0_R, dicPos
+ mov probBranch, LOC rep0
+ sub t0_R, dic
+
+ sub probs, RepLenCoder * PMULT
+ inc processedPos
+ ; state = state < kNumLitStates ? 9 : 11;
+ or state, 1 * PMULT
+ IsMatchBranch_Pre
+
+ sub t0_R, probBranch_R
+ jae @f
+ add t0_R, LOC dicBufSize
+@@:
+ movzx sym, byte ptr[dic + t0_R * 1]
+ jmp lz_end_match
+
+
+MY_ALIGN_32
+IsRep_label:
+ UPDATE_1 probs_state_R, 0, IsRep
+
+ ; The (checkDicSize == 0 && processedPos == 0) case was checked before in LzmaDec.c with kBadRepCode.
+ ; So we don't check it here.
+
+ ; mov t0, processedPos
+ ; or t0, LOC checkDicSize
+ ; jz fin_ERROR_2
+
+ ; state = state < kNumLitStates ? 8 : 11;
+ cmp state, kNumLitStates * PMULT
+ mov state, 8 * PMULT
+ mov probBranch, 11 * PMULT
+ cmovae state, probBranch
+
+ ; prob = probs + RepLenCoder;
+ add probs, RepLenCoder * PMULT
+
+ IF_BIT_1 probs_state_R, 0, IsRepG0, IsRepG0_label
+ IF_BIT_0_NOUP probs_state_R, pbPos_R, IsRep0Long, IsRep0Short_label
+ UPDATE_1 probs_state_R, pbPos_R, IsRep0Long
+ jmp len_decode
+
+MY_ALIGN_32
+IsRepG0_label:
+ UPDATE_1 probs_state_R, 0, IsRepG0
+ mov dist2, LOC rep0
+ mov dist, LOC rep1
+ mov LOC rep1, dist2
+
+ IF_BIT_1 probs_state_R, 0, IsRepG1, IsRepG1_label
+ mov LOC rep0, dist
+ jmp len_decode
+
+; MY_ALIGN_32
+IsRepG1_label:
+ UPDATE_1 probs_state_R, 0, IsRepG1
+ mov dist2, LOC rep2
+ mov LOC rep2, dist
+
+ IF_BIT_1 probs_state_R, 0, IsRepG2, IsRepG2_label
+ mov LOC rep0, dist2
+ jmp len_decode
+
+; MY_ALIGN_32
+IsRepG2_label:
+ UPDATE_1 probs_state_R, 0, IsRepG2
+ mov dist, LOC rep3
+ mov LOC rep3, dist2
+ mov LOC rep0, dist
+ jmp len_decode
+
+
+
+; ---------- SPEC SHORT DISTANCE ----------
+
+MY_ALIGN_32
+short_dist:
+ sub x1, 32 + 1
+ jbe decode_dist_end
+ or sym, 2
+ shl sym, x1_L
+ lea sym_R, [probs + sym_R * PMULT + SpecPos * PMULT + 1 * PMULT]
+ mov sym2, PMULT ; step
+MY_ALIGN_32
+spec_loop:
+ REV_1_VAR x2
+ dec x1
+ jnz spec_loop
+
+ mov probs, LOC probs_Spec
+ sub sym, sym2
+ sub sym, SpecPos * PMULT
+ sub sym_R, probs
+ shr sym, PSHIFT
+
+ jmp decode_dist_end
+
+
+; ---------- COPY MATCH CROSS ----------
+copy_match_cross:
+ ; t0_R - src pos
+ ; r1 - len to dicBufSize
+ ; cnt_R - total copy len
+
+ mov t1_R, t0_R ; srcPos
+ mov t0_R, dic
+ mov r1, LOC dicBufSize ;
+ neg cnt_R
+@@:
+ movzx sym, byte ptr[t1_R * 1 + t0_R]
+ inc t1_R
+ mov byte ptr[cnt_R * 1 + dicPos], sym_L
+ inc cnt_R
+ cmp t1_R, r1
+ jne @b
+
+ movzx sym, byte ptr[t0_R]
+ sub t0_R, cnt_R
+ jmp copy_common
+
+
+
+
+fin_ERROR:
+ mov LOC remainLen, len_temp
+; fin_ERROR_2:
+ mov sym, 1
+ jmp fin
+
+end_of_payload:
+ cmp sym, 0FFFFFFFFh ; -1
+ jne fin_ERROR
+
+ mov LOC remainLen, kMatchSpecLenStart
+ sub state, kNumStates * PMULT
+
+fin_OK:
+ xor sym, sym
+
+fin:
+ NORM
+
+ mov r1, LOC lzmaPtr
+
+ sub dicPos, LOC dic_Spec
+ mov GLOB dicPos_Spec, dicPos
+ mov GLOB buf_Spec, buf
+ mov GLOB range_Spec, range
+ mov GLOB code_Spec, cod
+ shr state, PSHIFT
+ mov GLOB state_Spec, state
+ mov GLOB processedPos_Spec, processedPos
+
+ RESTORE_VAR(remainLen)
+ RESTORE_VAR(rep0)
+ RESTORE_VAR(rep1)
+ RESTORE_VAR(rep2)
+ RESTORE_VAR(rep3)
+
+ mov x0, sym
+
+ mov RSP, LOC Old_RSP
+
+MY_POP_PRESERVED_REGS
+MY_ENDP
+
+_TEXT$LZMADECOPT ENDS
+
+end