diff --git a/src/stub/src/amd64-linux.elf-fold.S b/src/stub/src/amd64-linux.elf-fold.S index c4c4f1ee..e9e50258 100644 --- a/src/stub/src/amd64-linux.elf-fold.S +++ b/src/stub/src/amd64-linux.elf-fold.S @@ -113,7 +113,7 @@ L90: pop %arg1 # ADRU pop %rcx # JMPU pop %rcx # breadcrumb size in pages - shl $12,%ecx + shl $12,%ecx addl %ecx,%arg1l # addr += crumb XXX 4GB subl %ecx,%arg2l # len -= crumb XXX 4GB push %rax # &entry diff --git a/src/stub/src/arch/arm/v4a/lzma_d-arm.S b/src/stub/src/arch/arm/v4a/lzma_d-arm.S index e06ecffd..d70768e1 100644 --- a/src/stub/src/arch/arm/v4a/lzma_d-arm.S +++ b/src/stub/src/arch/arm/v4a/lzma_d-arm.S @@ -1,5 +1,5 @@ // In LZMA SDK 4.63 file lzma.txt (2008-12-30): -// LZMA SDK is written and placed in the public domain by Igor Pavlov. +// LZMA SDK is written and placed in the public domain by Igor Pavlov. // The creative expression of this hand compilation into assembly language, // including (but not limited to) code organization and register assignment, // remains copyright by John F. Reiser and licensed under GNU Lesser General @@ -82,38 +82,38 @@ Literal= (RepLenCoder + kNumLenProbs) #define call bl /* subroutine call */ -#define symbol mi -#define mo mi +#define symbol mi +#define mo mi -#define mi r0 -#define p_in r1 -#define t1 r2 -#define t0 r3 +#define mi r0 +#define p_in r1 +#define t1 r2 +#define t0 r3 -#define bit r4 -#define i r5 -#define state r6 -#define Range r7 +#define bit r4 +#define i r5 +#define state r6 +#define Range r7 -#define inPtr r8 -#define outPtr r9 -#define prob r10 -#define p r11 +#define inPtr r8 +#define outPtr r9 +#define prob r10 +#define p r11 -#define Code r12 +#define Code r12 -#define nowPos bit /* temporary only */ -#define distance bit -#define numDirectBits bit -#define hibit bit -#define matchB i -#define probLen i -#define probLit i -#define posSlot posState -#define r_posSlot t0 +#define nowPos bit /* temporary only */ +#define distance bit +#define numDirectBits bit +#define hibit bit +#define matchB i +#define probLen i +#define probLit i +#define posSlot posState +#define r_posSlot t0 -#define r_len i -#define r_rep0 p_in +#define r_len i +#define r_rep0 p_in #define vs r0 vs_ps= 2 @@ -123,390 +123,392 @@ vs_lc= 0 #if !defined(PARAMETER_STYLE) /*{*/ #define PARAMETER_STYLE 1 // Possible choices: -// 1 /* 0 bytes; 1-byte encoding of pb,lp,lc [default] */ -// 2 /* -24 bytes; 2-byte encoding requires no division */ -// 3 /* -32 bytes; separate bytes lc,lp,pb,xx at -4+ probs */ +// 1 /* 0 bytes; 1-byte encoding of pb,lp,lc [default] */ +// 2 /* -24 bytes; 2-byte encoding requires no division */ +// 3 /* -32 bytes; separate bytes lc,lp,pb,xx at -4+ probs */ #endif /*}*/ /* LzmaDecode(r0=vs, r1=inStream, r2=inSize, r3= &inSizeProcessed, - [sp,#0]= outStream, [sp,#4]= outSize, [sp,#8]= &outSizeProcessed) + [sp,#0]= outStream, [sp,#4]= outSize, [sp,#8]= &outSizeProcessed) */ LzmaDecode: .globl LzmaDecode - // save &inSizeProcesesed and caller registers - stmdb sp!,{r3, r4,r5,r6,r7, r8,r9,r10,r11, lr} - add r12,r2,r1 // inLim - ldr outPtr,[sp, #10*4 + 0*4] // outStream - ldr r7,[sp, #10*4 + 1*4] // outSize - mov inPtr,r1 - add r10,r7,outPtr // outLim + // save &inSizeProcesesed and caller registers + stmdb sp!,{r3, r4,r5,r6,r7, r8,r9,r10,r11, lr} + add r12,r2,r1 // inLim + ldr outPtr,[sp, #10*4 + 0*4] // outStream + ldr r7,[sp, #10*4 + 1*4] // outSize + mov inPtr,r1 + add r10,r7,outPtr // outLim #if 1==PARAMETER_STYLE /*{ [0]: pb*45 + lp*9 + lc */ - mov p,vs - ldrb r6,[inPtr],#1 + mov p,vs + ldrb r6,[inPtr],#1 - mov r4,#0 // r4= pb; + mov r4,#0 // r4= pb; 1: - cmp r6,#45; subhs r6,r6,#45; addhs r4,r4,#1; bhs 1b + cmp r6,#45; subhs r6,r6,#45; addhs r4,r4,#1; bhs 1b - mov r5,#0 // r5= lp; + mov r5,#0 // r5= lp; 1: - cmp r6,#9; subhs r6,r6, #9; addhs r5,r5,#1; bhs 1b // r6= lc; + cmp r6,#9; subhs r6,r6, #9; addhs r5,r5,#1; bhs 1b // r6= lc; #endif /*}*/ #if 2==PARAMETER_STYLE /*{ [0]: ((lc + lp)<<3) | pb; [1]: (lp<<4) | lc */ - mov p,vs - ldrb r4,[inPtr],#1; and r4,r4,#7 // pb - ldrb r6,[inPtr],#1; mov r5,r6,lsr #4 // lp - and r6,r6,#0xf // lc + mov p,vs + ldrb r4,[inPtr],#1; and r4,r4,#7 // pb + ldrb r6,[inPtr],#1; mov r5,r6,lsr #4 // lp + and r6,r6,#0xf // lc #endif /*}*/ #if 3==PARAMETER_STYLE /*{ lc,lp,pb,xx in separate bytes before probs[] */ - add p,vs,#4 - ldrb r6,[vs, #vs_lc] - ldrb r5,[vs, #vs_lp] - ldrb r4,[vs, #vs_ps] + add p,vs,#4 + ldrb r6,[vs, #vs_lc] + ldrb r5,[vs, #vs_lp] + ldrb r4,[vs, #vs_ps] #endif /*}*/ #undef vs - add r14,r5,r6 // lp + lc - mvn Range,#0 // ~0 - mvn r5,Range,lsl r5 // ~(~0<>1) - orr r0,r0,#(kBitModelTotal>>1)<<16 + mov r2,#LZMA_LIT_SIZE + mov r2,r2, lsl r14 // LZMA_LIT_SIZE << (lp + lc) + add r2,r2,#lo8(LZMA_BASE_SIZE) + add r2,r2,#hi8(LZMA_BASE_SIZE) // nProbs + mov r1,p + mov r0, #(kBitModelTotal>>1) + orr r0,r0,#(kBitModelTotal>>1)<<16 L10: - str r0,[r1],#4 - subs r2,r2,#2; bgt L10 + str r0,[r1],#4 + subs r2,r2,#2; bgt L10 - add r0,inPtr,#5 // sentinel + add r0,inPtr,#5 // sentinel L14: - call rcInit2; cmp r0,inPtr; bne L14 + call rcInit2; cmp r0,inPtr; bne L14 L200: // main loop - ldr t0,outBuf - ldr t1,posStateMask - sub nowPos,outPtr,t0 - and mi,nowPos,t1 - str mi,posState - add mi,mi,state, lsl #kNumPosBitsMax - add p_in,p,#IsMatch<<1 - call rcGetBit_mi0; bne L270 + ldr t0,outBuf + ldr t1,posStateMask + sub nowPos,outPtr,t0 + and mi,nowPos,t1 + str mi,posState + add mi,mi,state, lsl #kNumPosBitsMax + add p_in,p,#IsMatch<<1 + call rcGetBit_mi0; bne L270 - ldr t0,litPosMask - ldr t1,lc - and t0,t0,nowPos - ldrb mi,prevB - mov t0,t0,lsl t1 // (nowPos & litPosMask)<> (8- lc)) - add prob, p,#lo8(Literal<<1) - add t0,t0,t0,lsl #1 // *3 - add prob,prob,#hi8(Literal<<1) - mov symbol,#1 - add prob,prob,t0,lsl #1+ 8 // *768 *2 + ldr t0,litPosMask + ldr t1,lc + and t0,t0,nowPos + ldrb mi,prevB + mov t0,t0,lsl t1 // (nowPos & litPosMask)<> (8- lc)) + add prob, p,#lo8(Literal<<1) + add t0,t0,t0,lsl #1 // *3 + add prob,prob,#hi8(Literal<<1) + mov symbol,#1 + add prob,prob,t0,lsl #1+ 8 // *768 *2 - cmp state,#kNumLitStates; blo L240 + cmp state,#kNumLitStates; blo L240 L205: - ldr r_rep0,rep0 - ldrb matchB,[outPtr, -r_rep0] + ldr r_rep0,rep0 + ldrb matchB,[outPtr, -r_rep0] L210: // symbol === mi === mo - mov matchB,matchB,lsl #1 - add p_in,prob,#0x100<<1 - and bit,matchB,#0x100 - add p_in,p_in,bit,lsl #1 - call rcGetBit_mi - and t0,symbol,#1 - cmp t0,bit,lsr #8; bne L243 // break - cmp symbol,#0x100; blo L210 - b L245 + mov matchB,matchB,lsl #1 + add p_in,prob,#0x100<<1 + and bit,matchB,#0x100 + add p_in,p_in,bit,lsl #1 + call rcGetBit_mi + and t0,symbol,#1 + cmp t0,bit,lsr #8; bne L243 // break + cmp symbol,#0x100; blo L210 + b L245 L240: // symbol === mi === mo - mov p_in,prob - call rcGetBit_mi + mov p_in,prob + call rcGetBit_mi L243: - cmp symbol,#0x100; blo L240 + cmp symbol,#0x100; blo L240 L245: - mov t0,#6 - cmp state,#10; movlo t0,#3 - cmp state,# 4; movlo t0,state - sub state,state,t0 - b L298 // assumes symbol===r0 + mov t0,#6 + cmp state,#10; movlo t0,#3 + cmp state,# 4; movlo t0,state + sub state,state,t0 + b L298 // assumes symbol===r0 L270: - add p_in,p,#IsRep<<1 - call rcGetBit_state0; bne L290 - ldr t0,rep2 - ldr t1,rep1 - ldr r_rep0,rep0 - str t0,rep3 - str t1,rep2 - str r_rep0,rep1 - cmp state,#kNumLitStates - mov state,#3 - movlo state,#0 - add prob, p,#lo8(LenCoder<<1) - add prob,prob,#hi8(LenCoder<<1) - b L350 + add p_in,p,#IsRep<<1 + call rcGetBit_state0; bne L290 + ldr t0,rep2 + ldr t1,rep1 + ldr r_rep0,rep0 + str t0,rep3 + str t1,rep2 + str r_rep0,rep1 + cmp state,#kNumLitStates + mov state,#3 + movlo state,#0 + add prob, p,#lo8(LenCoder<<1) + add prob,prob,#hi8(LenCoder<<1) + b L350 L290: - add p_in,p,#IsRepG0<<1 - call rcGetBit_state0; bne L300 + add p_in,p,#IsRepG0<<1 + call rcGetBit_state0; bne L300 L293: - ldr t0,posState - add p_in,p,#IsRep0Long<<1 - add mi,t0,state,lsl #kNumPosBitsMax - call rcGetBit_mi0; bne L340 + ldr t0,posState + add p_in,p,#IsRep0Long<<1 + add mi,t0,state,lsl #kNumPosBitsMax + call rcGetBit_mi0; bne L340 L295: - cmp state,#kNumLitStates - mov state,#11 - movlo state,#9 + cmp state,#kNumLitStates + mov state,#11 + movlo state,#9 L297: - ldr t0,outBuf - ldr r_rep0,rep0 - sub nowPos,outPtr,t0 - cmp nowPos,r_rep0; blo lzmaDataError - ldrb r0,[outPtr, -r_rep0] + ldr t0,outBuf + ldr r_rep0,rep0 + sub nowPos,outPtr,t0 + cmp nowPos,r_rep0; blo lzmaDataError + ldrb r0,[outPtr, -r_rep0] L298: - strb r0,[outPtr],#1 - b L519 + strb r0,[outPtr],#1 + b L519 L300: - add p_in,p,#IsRepG1<<1 - call rcGetBit_state0; ldr distance,rep1; beq L330 + add p_in,p,#IsRepG1<<1 + call rcGetBit_state0; ldr distance,rep1; beq L330 L310: - add p_in,p,#IsRepG2<<1 - call rcGetBit_state0; ldr distance,rep2; beq L325 + add p_in,p,#IsRepG2<<1 + call rcGetBit_state0; ldr distance,rep2; beq L325 L320: - ldr t0,rep2 - ldr distance,rep3 - str t0,rep3 + ldr t0,rep2 + ldr distance,rep3 + str t0,rep3 L325: - ldr t0,rep1 - str t0,rep2 + ldr t0,rep1 + str t0,rep2 L330: - ldr r_rep0,rep0 - str distance,rep0 - str r_rep0,rep1 + ldr r_rep0,rep0 + str distance,rep0 + str r_rep0,rep1 L340: - cmp state,#kNumLitStates - mov state,#11 - movlo state,#8 - add prob, p,#lo8(RepLenCoder<<1) - add prob,prob,#hi8(RepLenCoder<<1) + cmp state,#kNumLitStates + mov state,#11 + movlo state,#8 + add prob, p,#lo8(RepLenCoder<<1) + add prob,prob,#hi8(RepLenCoder<<1) L350: - add p_in,prob,#LenChoice<<1 - call rcGetBit_0; bne L360 - ldr t0,posState - add probLen,prob,#LenLow<<1 - mov t1,#0 - add probLen,probLen,t0,lsl #1+ kLenNumLowBits - mov hibit,#1<=Range) Code-=Range; - adc r_rep0,r_rep0,r_rep0 // r_rep0 = (r_rep0<<1) + (Code>=Range) + call rcNormalize + mov Range,Range,lsr #1 + cmp Code,Range + subhs Code,Code,Range // if (Code>=Range) Code-=Range; + adc r_rep0,r_rep0,r_rep0 // r_rep0 = (r_rep0<<1) + (Code>=Range) L430: - subs numDirectBits,numDirectBits,#1; bne L420 - add prob,p, #lo8(Align<<1) - add prob,prob,#hi8(Align<<1) - mov r_rep0,r_rep0,lsl #kNumAlignBits - mov numDirectBits,#kNumAlignBits + subs numDirectBits,numDirectBits,#1; bne L420 + add prob,p, #lo8(Align<<1) + add prob,prob,#hi8(Align<<1) + mov r_rep0,r_rep0,lsl #kNumAlignBits + mov numDirectBits,#kNumAlignBits L438: - str r_rep0,rep0 - mov i,#1 - mov mi,#1 + str r_rep0,rep0 + mov i,#1 + mov mi,#1 L440: - mov p_in,prob; call rcGetBit_mi - tst mo,#1; beq L445 - ldr r_rep0,rep0 - orr r_rep0,r_rep0,i - str r_rep0,rep0 + mov p_in,prob; call rcGetBit_mi + tst mo,#1; beq L445 + ldr r_rep0,rep0 + orr r_rep0,r_rep0,i + str r_rep0,rep0 L445: - mov i,i,lsl #1 - subs numDirectBits,numDirectBits,#1; bne L440 - ldr r_rep0,rep0 - b L465 + mov i,i,lsl #1 + subs numDirectBits,numDirectBits,#1; bne L440 + ldr r_rep0,rep0 + b L465 L450: L460: - ldr r_rep0,posSlot + ldr r_rep0,posSlot L465: - adds r_rep0,r_rep0,#1 + adds r_rep0,r_rep0,#1 #if 0 /*{ only for the stream version */ - bne L470 - mov t0,#kLzmaStreamWasFinishedId - str t0,m_len - b L530 + bne L470 + mov t0,#kLzmaStreamWasFinishedId + str t0,m_len + b L530 L470: #endif /*}*/ - str r_rep0,rep0 - ldr r_len,m_len + str r_rep0,rep0 + ldr r_len,m_len L500: - ldr t0,outBuf - add r_len,r_len,#kMatchMinLen - sub t0,outPtr,t0 // nowPos - cmp r_rep0,t0; bhi lzmaDataError - ldr t1,outLim + ldr t0,outBuf + add r_len,r_len,#kMatchMinLen + sub t0,outPtr,t0 // nowPos + cmp r_rep0,t0; bhi lzmaDataError + ldr t1,outLim L510: - ldrb r0,[outPtr, -r_rep0] - strb r0,[outPtr],#1 - cmp outPtr,t1; bhs L530 - subs r_len,r_len,#1; bne L510 + ldrb r0,[outPtr, -r_rep0] + strb r0,[outPtr],#1 + cmp outPtr,t1; bhs L530 + subs r_len,r_len,#1; bne L510 #if HAS_ARM_PLD /*{*/ - pld [outPtr,#32] // fetch next cache line + pld [outPtr,#32] // fetch next cache line #endif /*}*/ L519: - strb r0,prevB // implicit &0xFF + strb r0,prevB // implicit &0xFF L520: // bottom of while loop - ldr t1,outLim - cmp outPtr,t1; blo L200 + ldr t1,outLim + cmp outPtr,t1; blo L200 L530: - call rcNormalize - mov r0,#0 // success + call rcNormalize + mov r0,#0 // success lzmaExit: - ldr t1,inBuf - sub t0,inPtr,t1 - ldr t1,inSizeProcessed - str t0,[t1] + ldr t1,inBuf + sub t0,inPtr,t1 + ldr t1,inSizeProcessed + str t0,[t1] - ldr t1,outBuf - sub t0,outPtr,t1 - ldr t1,outSizeProcessed - str t0,[t1] + ldr t1,outBuf + sub t0,outPtr,t1 + ldr t1,outSizeProcessed + str t0,[t1] - add sp,sp,#(14+1)*4 - ldmia sp!,{r4,r5,r6,r7, r8,r9,r10,r11, pc} + add sp,sp,#(14+1)*4 + ldmia sp!,{r4,r5,r6,r7, r8,r9,r10,r11, pc} lzmaDataError: - mov r0,#1 // failure - b lzmaExit + mov r0,#1 // failure + b lzmaExit rcNormalize: - cmp Range,#kTopValue - movhs pc,lr + cmp Range,#kTopValue + movhs pc,lr rcLoad: - ldr t0,inLim - mov Range,Range, lsl #8 - cmp t0,inPtr - beq lzmaDataError + ldr t0,inLim + mov Range,Range, lsl #8 + cmp t0,inPtr + beq lzmaDataError rcInit2: - ldrb t0,[inPtr],#1 - orr Code,t0,Code, lsl #8 - mov pc,lr + ldrb t0,[inPtr],#1 + orr Code,t0,Code, lsl #8 + mov pc,lr rcGetBit_state0: // rcGetBit(0, state + p_in) - mov mi,state + mov mi,state rcGetBit_mi0: // rcGetBit(0, mi + p_in) - add p_in,p_in,mi, lsl #1 + add p_in,p_in,mi, lsl #1 rcGetBit_0: // rcGetBit(0, p_in) - mov mi,#0 + mov mi,#0 rcGetBit_mi: // rcGetBit(mi, mi + p_in) - add p_in,p_in,mi, lsl #1 + add p_in,p_in,mi, lsl #1 rcGetBit: // Out: CC set on mo - cmp Range,#kTopValue - mov t1,lr - bllo rcLoad // conditional subroutine call + cmp Range,#kTopValue + mov t1,lr + bllo rcLoad // conditional subroutine call #define starp t0 #define bound lr #define y0tmp lr - ldrh starp,[p_in] - mov y0tmp,Range, lsr #kNumBitModelTotalBits - mul bound,starp,y0tmp - cmp Code,bound - movlo Range, bound - subhs Range,Range,bound - rsblo y0tmp,starp,#kBitModelTotal - subhs Code, Code, bound - addlo starp,starp,y0tmp, lsr #kNumMoveBits - subhs starp,starp,starp, lsr #kNumMoveBits - adcs mo,mi,mi // mo = (mi<<1) | (Code >= bound); set CC - strh starp,[p_in] + ldrh starp,[p_in] + mov y0tmp,Range, lsr #kNumBitModelTotalBits + mul bound,starp,y0tmp + cmp Code,bound + movlo Range, bound + subhs Range,Range,bound + rsblo y0tmp,starp,#kBitModelTotal + subhs Code, Code, bound + addlo starp,starp,y0tmp, lsr #kNumMoveBits + subhs starp,starp,starp, lsr #kNumMoveBits + adcs mo,mi,mi // mo = (mi<<1) | (Code >= bound); set CC + strh starp,[p_in] #undef y0tmp #undef bound #undef starp - mov pc,t1 + mov pc,t1 + +// vi:ts=8:et