forgotten file; 3 FIXME !
added src/stub/src/arch/arm/v8a/lzma_d-arm.S
This commit is contained in:
parent
4a1100b6a4
commit
3774704326
525
src/stub/src/arch/arm/v8a/lzma_d-arm.S
Normal file
525
src/stub/src/arch/arm/v8a/lzma_d-arm.S
Normal file
@ -0,0 +1,525 @@
|
||||
// In LZMA SDK 4.63 file lzma.txt (2008-12-30):
|
||||
// LZMA SDK is written and placed in the public domain by Igor Pavlov.
|
||||
// The creative expression of this hand compilation into assembly language,
|
||||
// including (but not limited to) code organization and register assignment,
|
||||
// remains copyright by John F. Reiser and licensed under GNU Lesser General
|
||||
// Public License (GNU LGPL).
|
||||
|
||||
// Hand compiled Copyright (c) 2006-2015 John F. Reiser (2007-06-18)
|
||||
// from modified LzmaDecode.c.
|
||||
// LZMA SDK 4.40 Copyright (c) 1999-2015 Igor Pavlov (2006-05-01)
|
||||
//
|
||||
// This file is licensed under either of these two licenses:
|
||||
// 1) GNU Lesser General Public License (GNU LGPL)
|
||||
// 2) Common Public License (CPL)
|
||||
// See files LGPL.txt and CPL.html for the text of the licenses.
|
||||
|
||||
#include "macros.S"
|
||||
|
||||
kLzmaStreamWasFinishedId= (-1)
|
||||
|
||||
kNumTopBits= 24
|
||||
kTopValue= 1<<kNumTopBits
|
||||
|
||||
kNumBitModelTotalBits= 11
|
||||
kBitModelTotal= (1 << kNumBitModelTotalBits)
|
||||
kNumMoveBits= 5
|
||||
|
||||
kNumPosBitsMax= 4
|
||||
kNumPosStatesMax= (1 << kNumPosBitsMax)
|
||||
|
||||
kLenNumLowBits= 3
|
||||
kLenNumLowSymbols= (1 << kLenNumLowBits)
|
||||
kLenNumMidBits= 3
|
||||
kLenNumMidSymbols= (1 << kLenNumMidBits)
|
||||
kLenNumHighBits= 8
|
||||
kLenNumHighSymbols= (1 << kLenNumHighBits)
|
||||
|
||||
LenChoice= 0
|
||||
LenChoice2= (LenChoice + 1)
|
||||
LenLow= (LenChoice2 + 1)
|
||||
LenMid= (LenLow + (kNumPosStatesMax << kLenNumLowBits))
|
||||
LenHigh= (LenMid + (kNumPosStatesMax << kLenNumMidBits))
|
||||
kNumLenProbs= (LenHigh + kLenNumHighSymbols)
|
||||
|
||||
kNumStates= 12
|
||||
kNumLitStates= 7
|
||||
|
||||
kStartPosModelIndex= 4
|
||||
kEndPosModelIndex= 14
|
||||
kNumFullDistances= (1 << (kEndPosModelIndex >> 1))
|
||||
|
||||
kNumPosSlotBits= 6
|
||||
kNumLenToPosStates= 4
|
||||
|
||||
kNumAlignBits= 4
|
||||
kAlignTableSize= (1 << kNumAlignBits)
|
||||
|
||||
kMatchMinLen= 2
|
||||
|
||||
IsMatch= 0
|
||||
IsRep= (IsMatch + (kNumStates << kNumPosBitsMax))
|
||||
IsRepG0= (IsRep + kNumStates)
|
||||
IsRepG1= (IsRepG0 + kNumStates)
|
||||
IsRepG2= (IsRepG1 + kNumStates)
|
||||
IsRep0Long= (IsRepG2 + kNumStates)
|
||||
PosSlot= (IsRep0Long + (kNumStates << kNumPosBitsMax))
|
||||
SpecPos= (PosSlot + (kNumLenToPosStates << kNumPosSlotBits))
|
||||
Align= (SpecPos + kNumFullDistances - kEndPosModelIndex)
|
||||
|
||||
LenCoder= (Align + kAlignTableSize)
|
||||
RepLenCoder= (LenCoder + kNumLenProbs)
|
||||
Literal= (RepLenCoder + kNumLenProbs)
|
||||
|
||||
LZMA_BASE_SIZE= Literal /* 1846 */
|
||||
LZMA_LIT_SIZE= 768
|
||||
|
||||
#define call bl /* subroutine call */
|
||||
|
||||
#define symbol mi
|
||||
#define mo mi
|
||||
|
||||
#define mi w0
|
||||
#define p_in x1
|
||||
#define t1 w2
|
||||
#define t1x x2
|
||||
#define t0 w3
|
||||
#define t0x x3
|
||||
|
||||
#define bit w4
|
||||
#define bitx x4
|
||||
#define i w5
|
||||
#define ix x5
|
||||
#define state w6
|
||||
#define Range w7
|
||||
|
||||
#define inPtr x8
|
||||
#define outPtr x9
|
||||
#define prob x10
|
||||
#define p x11
|
||||
|
||||
#define Code w12
|
||||
#define inLim x13
|
||||
#define outLim x14
|
||||
|
||||
#define rep0 w15
|
||||
|
||||
#define t2x x17
|
||||
|
||||
#define nowPos bit /* temporary only */
|
||||
#define nowPosx bitx /* temporary only */
|
||||
#define distance bit
|
||||
#define numDirectBits bit
|
||||
#define hibit bit
|
||||
#define matchB i
|
||||
#define probLen ix
|
||||
#define probLit ix
|
||||
#define posSlot posState
|
||||
#define r_posSlot t0
|
||||
|
||||
#define r_len i
|
||||
|
||||
#define vs x0
|
||||
vs_ps= 2
|
||||
vs_lp= 1
|
||||
vs_lc= 0
|
||||
|
||||
#if !defined(PARAMETER_STYLE) /*{*/
|
||||
#define PARAMETER_STYLE 1
|
||||
// Possible choices:
|
||||
// 1 /* 0 bytes; 1-byte encoding of pb,lp,lc [default] */
|
||||
// 2 /* -24 bytes; 2-byte encoding requires no division */
|
||||
// 3 /* -32 bytes; separate bytes lc,lp,pb,xx at -4+ probs */
|
||||
#endif /*}*/
|
||||
|
||||
/* LzmaDecode(x0=vs, x1=inStream, w2=inSize, x3= &inSizeProcessed,
|
||||
x4= outStream, w5= outSize, x6= &outSizeProcessed)
|
||||
*/
|
||||
LzmaDecode: .globl LzmaDecode
|
||||
// save &inSizeProcesesed and caller registers
|
||||
PUSH5(x3,x4,x5,x6, x30)
|
||||
mov inPtr,x1
|
||||
add inLim,x1,w2, uxtw
|
||||
mov outPtr,x4
|
||||
add outLim,x4,w5, uxtw
|
||||
|
||||
#if 1==PARAMETER_STYLE /*{ [0]: pb*45 + lp*9 + lc */
|
||||
mov p,vs
|
||||
ldrb w6,[inPtr],#1
|
||||
|
||||
mov w0,#45; udiv w4,w6,w0 // w4 = quo(w6, 45) = pb
|
||||
mul w0,w0,w4; sub w6,w6,w0 // w6 = rem(w6, 45)
|
||||
|
||||
mov w0,#9; udiv w5,w6,w0 // w5 = quo(w6, 9) = lp
|
||||
mul w0,w0,w5; sub w6,w6,w0 // w6 = rem(w6, 9) = lc
|
||||
#endif /*}*/
|
||||
|
||||
#if 2==PARAMETER_STYLE /*{ [0]: ((lc + lp)<<3) | pb; [1]: (lp<<4) | lc */
|
||||
mov p,vs
|
||||
ldrb w4,[inPtr],#1; and w4,w4,#7 // pb
|
||||
ldrb w6,[inPtr],#1; mov w5,w6,lsr #4 // lp
|
||||
and w6,w6,#0xf // lc
|
||||
|
||||
#endif /*}*/
|
||||
|
||||
#if 3==PARAMETER_STYLE /*{ lc,lp,pb,xx in separate bytes before probs[] */
|
||||
add p,vs,#4
|
||||
ldrb w6,[vs, #vs_lc]
|
||||
ldrb w5,[vs, #vs_lp]
|
||||
ldrb w4,[vs, #vs_ps]
|
||||
#endif /*}*/
|
||||
#undef vs
|
||||
|
||||
add w14,w5,w6 // lp + lc
|
||||
mov Range,#~0
|
||||
lsl w5,Range,w5; mvn w5,w5 // ~(~0<<lp) == litPosMask
|
||||
lsl w4,Range,w4; mvn w4,w4 // ~(~0<<ps) == posStateMask
|
||||
mov w3,#1
|
||||
mov w2,#1
|
||||
mov w1,#1
|
||||
mov w0,#1
|
||||
// FIXME stmdb sp!,{r0,r1,r2,r3, r4,r5,r6,r7, r8,r9,r10,r11, r12,r14}
|
||||
//#define rep0 [sp, #0*4]
|
||||
#define rep1 [sp, #1*4]
|
||||
#define rep2 [sp, #2*4]
|
||||
#define rep3 [sp, #3*4]
|
||||
#define posStateMask [sp, #4*4]
|
||||
#define litPosMask [sp, #5*4]
|
||||
#define lc [sp, #6*4]
|
||||
#define prevB [sp, #7*4]
|
||||
#define inBuf [sp, #8*4]
|
||||
#define outBuf [sp, #9*4]
|
||||
//#define outLim [sp, #10*4]
|
||||
#define posState [sp, #11*4]
|
||||
//#define inLim [sp, #12*4]
|
||||
#define m_len [sp, #13*4]
|
||||
|
||||
#define inSizeProcessed [sp, #(14 )*4]
|
||||
#define outSizeProcessed [sp, #(14+10+2)*4]
|
||||
|
||||
mov state,#0
|
||||
str state,prevB
|
||||
|
||||
|
||||
mov w2,#LZMA_LIT_SIZE
|
||||
mov w0,#LZMA_BASE_SIZE
|
||||
lsl w2,w2,w14 // LZMA_LIT_SIZE << (lp + lc)
|
||||
add w2,w2,w0
|
||||
mov x0,#(kBitModelTotal>>1) // 0.5 starting probability
|
||||
mov x1,p
|
||||
orr x0,x0,x0,lsl #16
|
||||
orr x0,x0,x0,lsl #32
|
||||
L10:
|
||||
str x0,[x1],#4*2 // 4 at a time
|
||||
subs w2,w2,#4; bgt L10
|
||||
|
||||
add x0,inPtr,#5 // sentinel
|
||||
L14:
|
||||
call rcInit2; cmp x0,inPtr; bne L14
|
||||
|
||||
L200: // main loop
|
||||
ldr t0x,outBuf
|
||||
ldr t1,posStateMask
|
||||
sub nowPosx,outPtr,t0x
|
||||
and mi,nowPos,t1
|
||||
str mi,posState
|
||||
add mi,mi,state, lsl #kNumPosBitsMax
|
||||
add p_in,p,#IsMatch<<1
|
||||
call rcGetBit_mi0; bne L270
|
||||
|
||||
ldr t0,litPosMask
|
||||
ldr t1,lc
|
||||
and t0,t0,nowPos
|
||||
ldrb mi,prevB
|
||||
lsl t0,t0,t1 // (nowPos & litPosMask)<<lc
|
||||
neg t1,t1; add t1,t1,#8 // rsb t1,t1,#8
|
||||
lsr t1,mi,t1 // prevB >> (8- lc)
|
||||
add t0,t0,t1
|
||||
add prob, p,#Literal<<1
|
||||
add t0,t0,t0,lsl #1 // *3
|
||||
uxtw t0x,t0
|
||||
mov symbol,#1
|
||||
add prob,prob,t0x,lsl #1+ 8 // *768 *2
|
||||
|
||||
cmp state,#kNumLitStates; blo L240
|
||||
L205:
|
||||
sxtw t0x,rep0
|
||||
neg t0x,t0x
|
||||
ldrb matchB,[outPtr,t0x]
|
||||
L210: // symbol === mi === mo
|
||||
lsl matchB,matchB,#1
|
||||
add p_in,prob,#0x100<<1
|
||||
and bit,matchB,#0x100
|
||||
uxtw t0x,bit
|
||||
add p_in,p_in,t0x,lsl #1
|
||||
call rcGetBit_mi
|
||||
and t0,symbol,#1
|
||||
cmp t0,bit,lsr #8; bne L243 // break
|
||||
cmp symbol,#0x100; blo L210
|
||||
b L245
|
||||
L240: // symbol === mi === mo
|
||||
mov p_in,prob
|
||||
call rcGetBit_mi
|
||||
L243:
|
||||
cmp symbol,#0x100; blo L240
|
||||
L245:
|
||||
mov t1,#3; mov t0,#6
|
||||
cmp state,#10; csel t0,t1, t0,lo
|
||||
cmp state,# 4; csel t0,state,t0,lo
|
||||
sub state,state,t0
|
||||
b L298 // assumes symbol===w0
|
||||
L270:
|
||||
add p_in,p,#IsRep<<1
|
||||
call rcGetBit_state0; bne L290
|
||||
ldr t0,rep2
|
||||
ldr t1,rep1
|
||||
str t0,rep3
|
||||
str t1,rep2
|
||||
str rep0,rep1
|
||||
mov t0,#0
|
||||
cmp state,#kNumLitStates
|
||||
mov state,#3
|
||||
csel state,t0,state,lo
|
||||
add prob, p,#LenCoder<<1
|
||||
b L350
|
||||
L290:
|
||||
add p_in,p,#IsRepG0<<1
|
||||
call rcGetBit_state0; bne L300
|
||||
L293:
|
||||
ldr t0,posState
|
||||
add p_in,p,#IsRep0Long<<1
|
||||
add mi,t0,state,lsl #kNumPosBitsMax
|
||||
call rcGetBit_mi0; bne L340
|
||||
L295:
|
||||
mov t0,#9
|
||||
cmp state,#kNumLitStates
|
||||
mov state,#11
|
||||
csel state,t0,state,lo
|
||||
L297:
|
||||
ldr t0x,outBuf
|
||||
sub nowPosx,outPtr,t0x
|
||||
cmp nowPos,rep0; blo lzmaDataError
|
||||
sxtw t0x,rep0
|
||||
neg t0x,t0x
|
||||
ldrb w0,[outPtr,t0x]
|
||||
L298:
|
||||
strb w0,[outPtr],#1
|
||||
b L519
|
||||
L300:
|
||||
add p_in,p,#IsRepG1<<1
|
||||
call rcGetBit_state0; ldr distance,rep1; beq L330
|
||||
L310:
|
||||
add p_in,p,#IsRepG2<<1
|
||||
call rcGetBit_state0; ldr distance,rep2; beq L325
|
||||
L320:
|
||||
ldr t0,rep2
|
||||
ldr distance,rep3
|
||||
str t0,rep3
|
||||
L325:
|
||||
ldr t0,rep1
|
||||
str t0,rep2
|
||||
L330:
|
||||
str rep0,rep1
|
||||
mov rep0,distance
|
||||
L340:
|
||||
mov t0,#8
|
||||
cmp state,#kNumLitStates
|
||||
mov state,#11
|
||||
csel state,t0,state,lo
|
||||
add prob, p,#RepLenCoder<<1
|
||||
L350:
|
||||
add p_in,prob,#LenChoice<<1
|
||||
call rcGetBit_0; bne L360
|
||||
ldr t0,posState
|
||||
add probLen,prob,#LenLow<<1
|
||||
uxtw t0x,t0
|
||||
mov t1,#0
|
||||
add probLen,probLen,t0x,lsl #1+ kLenNumLowBits
|
||||
mov hibit,#1<<kLenNumLowBits
|
||||
b L390
|
||||
L360:
|
||||
add p_in,prob,#LenChoice2<<1
|
||||
call rcGetBit_0; bne L370
|
||||
ldr t0,posState
|
||||
add probLen,prob,#LenMid<<1
|
||||
uxtw t0x,t0
|
||||
mov t1,#kLenNumLowSymbols
|
||||
add probLen,probLen,t0x,lsl #1+ kLenNumMidBits
|
||||
mov hibit,#1<<kLenNumMidBits
|
||||
b L390
|
||||
L370:
|
||||
add probLen,prob,#LenHigh<<1
|
||||
mov t1,#kLenNumLowSymbols + kLenNumMidSymbols
|
||||
mov hibit,#1<<kLenNumHighBits
|
||||
L390:
|
||||
str t1,m_len
|
||||
mov mi,#1
|
||||
L395: // RangeDecoderBitTreeDecode
|
||||
mov p_in,probLen
|
||||
call rcGetBit_mi; subs t0,mo,hibit; blo L395
|
||||
ldr r_len,m_len
|
||||
add r_len,r_len,t0
|
||||
str r_len,m_len
|
||||
cmp state,#4; bhs L500
|
||||
/*L400:*/
|
||||
add state,state,#kNumLitStates
|
||||
mov t0,#kNumLenToPosStates -1
|
||||
cmp r_len,#kNumLenToPosStates
|
||||
csel r_len,t0,r_len,hs
|
||||
uxtw t0x,r_len
|
||||
add probLit,p,t0x,lsl #1+ kNumPosSlotBits
|
||||
add probLit,probLit,#PosSlot<<1
|
||||
mov mi,#1
|
||||
mov hibit,#1<<kNumPosSlotBits
|
||||
L403: // RangeDecoderBitTreeDecode
|
||||
mov p_in,probLit
|
||||
call rcGetBit_mi; subs r_posSlot,mo,hibit; blo L403
|
||||
str r_posSlot,posSlot
|
||||
|
||||
cmp r_posSlot,#kStartPosModelIndex; blo L460
|
||||
L405:
|
||||
lsr numDirectBits,r_posSlot,#1
|
||||
sub numDirectBits,numDirectBits,#1
|
||||
and rep0,r_posSlot,#1
|
||||
orr rep0,rep0,#2
|
||||
cmp r_posSlot,#kEndPosModelIndex; bhs L410
|
||||
L407:
|
||||
lsl rep0,rep0,numDirectBits
|
||||
add prob,p, #(SpecPos -1)<<1
|
||||
sub t0,rep0,r_posSlot // r_posSlot dies
|
||||
uxtw t0x,t0
|
||||
add prob,prob,t0x,lsl #1
|
||||
b L438
|
||||
L410:
|
||||
sub numDirectBits,numDirectBits,#kNumAlignBits
|
||||
L420:
|
||||
call rcNormalize
|
||||
lsr Range,Range,#1
|
||||
subs t0,Code,Range
|
||||
csel Code,t0,Code,hs // if (Code>=Range) Code-=Range;
|
||||
adc rep0,rep0,rep0 // rep0 = (rep0<<1) + (Code>=Range)
|
||||
L430:
|
||||
subs numDirectBits,numDirectBits,#1; bne L420
|
||||
add prob,p, #Align<<1
|
||||
lsl rep0,rep0,#kNumAlignBits
|
||||
mov numDirectBits,#kNumAlignBits
|
||||
L438:
|
||||
mov i,#1
|
||||
mov mi,#1
|
||||
L440:
|
||||
mov p_in,prob; call rcGetBit_mi
|
||||
tst mo,#1; beq L445
|
||||
orr rep0,rep0,i
|
||||
L445:
|
||||
lsl i,i,#1
|
||||
subs numDirectBits,numDirectBits,#1; bne L440
|
||||
b L465
|
||||
L450:
|
||||
L460:
|
||||
ldr rep0,posSlot
|
||||
L465:
|
||||
adds rep0,rep0,#1
|
||||
#if 0 /*{ only for the stream version */
|
||||
bne L470
|
||||
mov t0,#kLzmaStreamWasFinishedId
|
||||
str t0,m_len
|
||||
b L530
|
||||
L470:
|
||||
#endif /*}*/
|
||||
ldr r_len,m_len
|
||||
L500:
|
||||
ldr t0x,outBuf
|
||||
add r_len,r_len,#kMatchMinLen
|
||||
sub t0x,outPtr,t0x // nowPos
|
||||
cmp rep0,t0; bhi lzmaDataError
|
||||
sxtw t0x,rep0
|
||||
neg t0x,t0x
|
||||
L510: // const t0x= -rep0;
|
||||
ldrb w0,[outPtr,t0x]
|
||||
strb w0,[outPtr],#1
|
||||
cmp outPtr,outLim; bhs L530
|
||||
subs r_len,r_len,#1; bne L510
|
||||
// FIXME: prfm PLDL1KEEP,outPtr,#32 // fetch next cache line
|
||||
L519:
|
||||
strb w0,prevB // implicit &0xFF
|
||||
L520: // bottom of while loop
|
||||
cmp outPtr,outLim; blo L200
|
||||
L530:
|
||||
call rcNormalize
|
||||
mov w0,#0 // success
|
||||
lzmaExit:
|
||||
ldr t1x,inBuf
|
||||
sub t0x,inPtr,t1x
|
||||
ldr t1x,inSizeProcessed
|
||||
str t0,[t1x]
|
||||
|
||||
ldr t1x,outBuf
|
||||
sub t0x,outPtr,t1x
|
||||
ldr t1x,outSizeProcessed
|
||||
str t0,[t1x]
|
||||
|
||||
add sp,sp,#(14+1)*4
|
||||
// FIXME ldmia sp!,{r4,r5,r6,r7, r8,r9,r10,r11, pc}
|
||||
|
||||
lzmaDataError:
|
||||
mov w0,#1 // failure
|
||||
b lzmaExit
|
||||
|
||||
|
||||
rcNormalize:
|
||||
lsr t0,Range,#kNumTopBits
|
||||
cbnz t0,retNorm
|
||||
rcLoad:
|
||||
cmp inPtr,inLim
|
||||
lsl Range,Range,#8
|
||||
bhs lzmaDataError
|
||||
rcInit2:
|
||||
ldrb t0,[inPtr],#1
|
||||
orr Code,t0,Code, lsl #8
|
||||
retNorm:
|
||||
ret
|
||||
|
||||
rcGetBit_state0: // rcGetBit(0, state + p_in)
|
||||
mov mi,state
|
||||
rcGetBit_mi0: // rcGetBit(0, mi + p_in)
|
||||
add p_in,p_in,mi, uxtw #1
|
||||
rcGetBit_0: // rcGetBit(0, p_in)
|
||||
mov mi,#0
|
||||
rcGetBit_mi: // rcGetBit(mi, mi + p_in)
|
||||
add p_in,p_in,mi, uxtw #1
|
||||
rcGetBit: // Out: CC set on mo
|
||||
lsr t0,Range,#kNumTopBits
|
||||
mov t2x,x30 // save lr if need rcLoad
|
||||
adr x30,rcGetBitCont; cbz t0,rcLoad // conditional subroutine call
|
||||
rcGetBitCont:
|
||||
#define starp t0
|
||||
#define bound t1
|
||||
#define y0tmp t1
|
||||
ldrh starp,[p_in]
|
||||
lsr y0tmp,Range,#kNumBitModelTotalBits
|
||||
mul bound,starp,y0tmp
|
||||
cmp Code,bound; bhs rcGB1
|
||||
rcGB0: // Code < bound
|
||||
mov y0tmp,#kBitModelTotal
|
||||
mov Range,bound
|
||||
sub y0tmp,y0tmp,starp
|
||||
adcs mo,mi,mi // mo = (mi<<1) | (Code >= bound); set CC
|
||||
add starp,starp,y0tmp, lsr #kNumMoveBits
|
||||
strh starp,[p_in]
|
||||
ret t2x
|
||||
rcGB1: // Code >= bound
|
||||
sub Code,Code,bound
|
||||
sub Range,Range,bound
|
||||
sub starp,starp,starp, lsr #kNumMoveBits
|
||||
adcs mo,mi,mi // mo = (mi<<1) | (Code >= bound); set CC
|
||||
strh starp,[p_in]
|
||||
ret t2x
|
||||
#undef y0tmp
|
||||
#undef bound
|
||||
#undef starp
|
||||
#undef t1x
|
||||
#undef t1
|
||||
#undef t0x
|
||||
#undef t0
|
||||
|
||||
// vi:ts=8:et
|
||||
Loading…
Reference in New Issue
Block a user