From 37747043267fe86133228ac1c2375b7dd574c559 Mon Sep 17 00:00:00 2001 From: "jreiser@BitWagon.com" Date: Tue, 13 Sep 2016 20:14:44 -0700 Subject: [PATCH] forgotten file; 3 FIXME ! added src/stub/src/arch/arm/v8a/lzma_d-arm.S --- src/stub/src/arch/arm/v8a/lzma_d-arm.S | 525 +++++++++++++++++++++++++ 1 file changed, 525 insertions(+) create mode 100644 src/stub/src/arch/arm/v8a/lzma_d-arm.S diff --git a/src/stub/src/arch/arm/v8a/lzma_d-arm.S b/src/stub/src/arch/arm/v8a/lzma_d-arm.S new file mode 100644 index 00000000..39f37862 --- /dev/null +++ b/src/stub/src/arch/arm/v8a/lzma_d-arm.S @@ -0,0 +1,525 @@ +// In LZMA SDK 4.63 file lzma.txt (2008-12-30): +// LZMA SDK is written and placed in the public domain by Igor Pavlov. +// The creative expression of this hand compilation into assembly language, +// including (but not limited to) code organization and register assignment, +// remains copyright by John F. Reiser and licensed under GNU Lesser General +// Public License (GNU LGPL). + +// Hand compiled Copyright (c) 2006-2015 John F. Reiser (2007-06-18) +// from modified LzmaDecode.c. +// LZMA SDK 4.40 Copyright (c) 1999-2015 Igor Pavlov (2006-05-01) +// +// This file is licensed under either of these two licenses: +// 1) GNU Lesser General Public License (GNU LGPL) +// 2) Common Public License (CPL) +// See files LGPL.txt and CPL.html for the text of the licenses. + +#include "macros.S" + +kLzmaStreamWasFinishedId= (-1) + +kNumTopBits= 24 +kTopValue= 1<> 1)) + +kNumPosSlotBits= 6 +kNumLenToPosStates= 4 + +kNumAlignBits= 4 +kAlignTableSize= (1 << kNumAlignBits) + +kMatchMinLen= 2 + +IsMatch= 0 +IsRep= (IsMatch + (kNumStates << kNumPosBitsMax)) +IsRepG0= (IsRep + kNumStates) +IsRepG1= (IsRepG0 + kNumStates) +IsRepG2= (IsRepG1 + kNumStates) +IsRep0Long= (IsRepG2 + kNumStates) +PosSlot= (IsRep0Long + (kNumStates << kNumPosBitsMax)) +SpecPos= (PosSlot + (kNumLenToPosStates << kNumPosSlotBits)) +Align= (SpecPos + kNumFullDistances - kEndPosModelIndex) + + LenCoder= (Align + kAlignTableSize) +RepLenCoder= (LenCoder + kNumLenProbs) +Literal= (RepLenCoder + kNumLenProbs) + +LZMA_BASE_SIZE= Literal /* 1846 */ +LZMA_LIT_SIZE= 768 + +#define call bl /* subroutine call */ + +#define symbol mi +#define mo mi + +#define mi w0 +#define p_in x1 +#define t1 w2 +#define t1x x2 +#define t0 w3 +#define t0x x3 + +#define bit w4 +#define bitx x4 +#define i w5 +#define ix x5 +#define state w6 +#define Range w7 + +#define inPtr x8 +#define outPtr x9 +#define prob x10 +#define p x11 + +#define Code w12 +#define inLim x13 +#define outLim x14 + +#define rep0 w15 + +#define t2x x17 + +#define nowPos bit /* temporary only */ +#define nowPosx bitx /* temporary only */ +#define distance bit +#define numDirectBits bit +#define hibit bit +#define matchB i +#define probLen ix +#define probLit ix +#define posSlot posState +#define r_posSlot t0 + +#define r_len i + +#define vs x0 +vs_ps= 2 +vs_lp= 1 +vs_lc= 0 + +#if !defined(PARAMETER_STYLE) /*{*/ + #define PARAMETER_STYLE 1 +// Possible choices: +// 1 /* 0 bytes; 1-byte encoding of pb,lp,lc [default] */ +// 2 /* -24 bytes; 2-byte encoding requires no division */ +// 3 /* -32 bytes; separate bytes lc,lp,pb,xx at -4+ probs */ +#endif /*}*/ + +/* LzmaDecode(x0=vs, x1=inStream, w2=inSize, x3= &inSizeProcessed, + x4= outStream, w5= outSize, x6= &outSizeProcessed) +*/ +LzmaDecode: .globl LzmaDecode + // save &inSizeProcesesed and caller registers + PUSH5(x3,x4,x5,x6, x30) + mov inPtr,x1 + add inLim,x1,w2, uxtw + mov outPtr,x4 + add outLim,x4,w5, uxtw + +#if 1==PARAMETER_STYLE /*{ [0]: pb*45 + lp*9 + lc */ + mov p,vs + ldrb w6,[inPtr],#1 + + mov w0,#45; udiv w4,w6,w0 // w4 = quo(w6, 45) = pb + mul w0,w0,w4; sub w6,w6,w0 // w6 = rem(w6, 45) + + mov w0,#9; udiv w5,w6,w0 // w5 = quo(w6, 9) = lp + mul w0,w0,w5; sub w6,w6,w0 // w6 = rem(w6, 9) = lc +#endif /*}*/ + +#if 2==PARAMETER_STYLE /*{ [0]: ((lc + lp)<<3) | pb; [1]: (lp<<4) | lc */ + mov p,vs + ldrb w4,[inPtr],#1; and w4,w4,#7 // pb + ldrb w6,[inPtr],#1; mov w5,w6,lsr #4 // lp + and w6,w6,#0xf // lc + +#endif /*}*/ + +#if 3==PARAMETER_STYLE /*{ lc,lp,pb,xx in separate bytes before probs[] */ + add p,vs,#4 + ldrb w6,[vs, #vs_lc] + ldrb w5,[vs, #vs_lp] + ldrb w4,[vs, #vs_ps] +#endif /*}*/ +#undef vs + + add w14,w5,w6 // lp + lc + mov Range,#~0 + lsl w5,Range,w5; mvn w5,w5 // ~(~0<>1) // 0.5 starting probability + mov x1,p + orr x0,x0,x0,lsl #16 + orr x0,x0,x0,lsl #32 +L10: + str x0,[x1],#4*2 // 4 at a time + subs w2,w2,#4; bgt L10 + + add x0,inPtr,#5 // sentinel +L14: + call rcInit2; cmp x0,inPtr; bne L14 + +L200: // main loop + ldr t0x,outBuf + ldr t1,posStateMask + sub nowPosx,outPtr,t0x + and mi,nowPos,t1 + str mi,posState + add mi,mi,state, lsl #kNumPosBitsMax + add p_in,p,#IsMatch<<1 + call rcGetBit_mi0; bne L270 + + ldr t0,litPosMask + ldr t1,lc + and t0,t0,nowPos + ldrb mi,prevB + lsl t0,t0,t1 // (nowPos & litPosMask)<> (8- lc) + add t0,t0,t1 + add prob, p,#Literal<<1 + add t0,t0,t0,lsl #1 // *3 + uxtw t0x,t0 + mov symbol,#1 + add prob,prob,t0x,lsl #1+ 8 // *768 *2 + + cmp state,#kNumLitStates; blo L240 +L205: + sxtw t0x,rep0 + neg t0x,t0x + ldrb matchB,[outPtr,t0x] +L210: // symbol === mi === mo + lsl matchB,matchB,#1 + add p_in,prob,#0x100<<1 + and bit,matchB,#0x100 + uxtw t0x,bit + add p_in,p_in,t0x,lsl #1 + call rcGetBit_mi + and t0,symbol,#1 + cmp t0,bit,lsr #8; bne L243 // break + cmp symbol,#0x100; blo L210 + b L245 +L240: // symbol === mi === mo + mov p_in,prob + call rcGetBit_mi +L243: + cmp symbol,#0x100; blo L240 +L245: + mov t1,#3; mov t0,#6 + cmp state,#10; csel t0,t1, t0,lo + cmp state,# 4; csel t0,state,t0,lo + sub state,state,t0 + b L298 // assumes symbol===w0 +L270: + add p_in,p,#IsRep<<1 + call rcGetBit_state0; bne L290 + ldr t0,rep2 + ldr t1,rep1 + str t0,rep3 + str t1,rep2 + str rep0,rep1 + mov t0,#0 + cmp state,#kNumLitStates + mov state,#3 + csel state,t0,state,lo + add prob, p,#LenCoder<<1 + b L350 +L290: + add p_in,p,#IsRepG0<<1 + call rcGetBit_state0; bne L300 +L293: + ldr t0,posState + add p_in,p,#IsRep0Long<<1 + add mi,t0,state,lsl #kNumPosBitsMax + call rcGetBit_mi0; bne L340 +L295: + mov t0,#9 + cmp state,#kNumLitStates + mov state,#11 + csel state,t0,state,lo +L297: + ldr t0x,outBuf + sub nowPosx,outPtr,t0x + cmp nowPos,rep0; blo lzmaDataError + sxtw t0x,rep0 + neg t0x,t0x + ldrb w0,[outPtr,t0x] +L298: + strb w0,[outPtr],#1 + b L519 +L300: + add p_in,p,#IsRepG1<<1 + call rcGetBit_state0; ldr distance,rep1; beq L330 +L310: + add p_in,p,#IsRepG2<<1 + call rcGetBit_state0; ldr distance,rep2; beq L325 +L320: + ldr t0,rep2 + ldr distance,rep3 + str t0,rep3 +L325: + ldr t0,rep1 + str t0,rep2 +L330: + str rep0,rep1 + mov rep0,distance +L340: + mov t0,#8 + cmp state,#kNumLitStates + mov state,#11 + csel state,t0,state,lo + add prob, p,#RepLenCoder<<1 +L350: + add p_in,prob,#LenChoice<<1 + call rcGetBit_0; bne L360 + ldr t0,posState + add probLen,prob,#LenLow<<1 + uxtw t0x,t0 + mov t1,#0 + add probLen,probLen,t0x,lsl #1+ kLenNumLowBits + mov hibit,#1<=Range) Code-=Range; + adc rep0,rep0,rep0 // rep0 = (rep0<<1) + (Code>=Range) +L430: + subs numDirectBits,numDirectBits,#1; bne L420 + add prob,p, #Align<<1 + lsl rep0,rep0,#kNumAlignBits + mov numDirectBits,#kNumAlignBits +L438: + mov i,#1 + mov mi,#1 +L440: + mov p_in,prob; call rcGetBit_mi + tst mo,#1; beq L445 + orr rep0,rep0,i +L445: + lsl i,i,#1 + subs numDirectBits,numDirectBits,#1; bne L440 + b L465 +L450: +L460: + ldr rep0,posSlot +L465: + adds rep0,rep0,#1 +#if 0 /*{ only for the stream version */ + bne L470 + mov t0,#kLzmaStreamWasFinishedId + str t0,m_len + b L530 +L470: +#endif /*}*/ + ldr r_len,m_len +L500: + ldr t0x,outBuf + add r_len,r_len,#kMatchMinLen + sub t0x,outPtr,t0x // nowPos + cmp rep0,t0; bhi lzmaDataError + sxtw t0x,rep0 + neg t0x,t0x +L510: // const t0x= -rep0; + ldrb w0,[outPtr,t0x] + strb w0,[outPtr],#1 + cmp outPtr,outLim; bhs L530 + subs r_len,r_len,#1; bne L510 + // FIXME: prfm PLDL1KEEP,outPtr,#32 // fetch next cache line +L519: + strb w0,prevB // implicit &0xFF +L520: // bottom of while loop + cmp outPtr,outLim; blo L200 +L530: + call rcNormalize + mov w0,#0 // success +lzmaExit: + ldr t1x,inBuf + sub t0x,inPtr,t1x + ldr t1x,inSizeProcessed + str t0,[t1x] + + ldr t1x,outBuf + sub t0x,outPtr,t1x + ldr t1x,outSizeProcessed + str t0,[t1x] + + add sp,sp,#(14+1)*4 +// FIXME ldmia sp!,{r4,r5,r6,r7, r8,r9,r10,r11, pc} + +lzmaDataError: + mov w0,#1 // failure + b lzmaExit + + +rcNormalize: + lsr t0,Range,#kNumTopBits + cbnz t0,retNorm +rcLoad: + cmp inPtr,inLim + lsl Range,Range,#8 + bhs lzmaDataError +rcInit2: + ldrb t0,[inPtr],#1 + orr Code,t0,Code, lsl #8 +retNorm: + ret + +rcGetBit_state0: // rcGetBit(0, state + p_in) + mov mi,state +rcGetBit_mi0: // rcGetBit(0, mi + p_in) + add p_in,p_in,mi, uxtw #1 +rcGetBit_0: // rcGetBit(0, p_in) + mov mi,#0 +rcGetBit_mi: // rcGetBit(mi, mi + p_in) + add p_in,p_in,mi, uxtw #1 +rcGetBit: // Out: CC set on mo + lsr t0,Range,#kNumTopBits + mov t2x,x30 // save lr if need rcLoad + adr x30,rcGetBitCont; cbz t0,rcLoad // conditional subroutine call +rcGetBitCont: +#define starp t0 +#define bound t1 +#define y0tmp t1 + ldrh starp,[p_in] + lsr y0tmp,Range,#kNumBitModelTotalBits + mul bound,starp,y0tmp + cmp Code,bound; bhs rcGB1 +rcGB0: // Code < bound + mov y0tmp,#kBitModelTotal + mov Range,bound + sub y0tmp,y0tmp,starp + adcs mo,mi,mi // mo = (mi<<1) | (Code >= bound); set CC + add starp,starp,y0tmp, lsr #kNumMoveBits + strh starp,[p_in] + ret t2x +rcGB1: // Code >= bound + sub Code,Code,bound + sub Range,Range,bound + sub starp,starp,starp, lsr #kNumMoveBits + adcs mo,mi,mi // mo = (mi<<1) | (Code >= bound); set CC + strh starp,[p_in] + ret t2x +#undef y0tmp +#undef bound +#undef starp +#undef t1x +#undef t1 +#undef t0x +#undef t0 + +// vi:ts=8:et