passes first tests! Only untested feature: large offset which forces
minimum match length to be >=3. arm_nrv2b_d32.S arm_nrv2e_d32.S committer: jreiser <jreiser> 1140677145 +0000
This commit is contained in:
parent
f3b8c9f34e
commit
20e4cbfd13
@ -1,4 +1,4 @@
|
||||
/* arm_nrv2b_d32.S -- ARM decompressor for NRV2E
|
||||
/* arm_nrv2b_d32.S -- ARM decompressor for NRV2B
|
||||
|
||||
This file is part of the UPX executable compressor.
|
||||
|
||||
@ -35,79 +35,93 @@
|
||||
#define tmp r3
|
||||
#define bits r4
|
||||
#define off r5
|
||||
#define lr2 r6
|
||||
#define g32 r7
|
||||
#define g32 r6
|
||||
#define wrnk r7 /* 0xd00 M2_MAX_OFFSET before "wrinkle" */
|
||||
|
||||
ucl_nrv2b_decompress_32: .globl ucl_nrv2b_decompress_32
|
||||
@ ARM mode (char *src, int len_src, char *dst, int *plen_dst)
|
||||
add r1,len,src @ r1= eof_src;
|
||||
stmfd sp!,{r1,r2,r3, r4,r5,r6,r7,lr}
|
||||
mov bits,#1<<31
|
||||
blx hitch_n2b
|
||||
hitch_n2b_r:
|
||||
ldmfd sp!,{r4,r5,r6,r7,pc}
|
||||
|
||||
get32: @ ARM mode; In: Carry set (unchanged until final adc)
|
||||
ldrb bits,[src],#1
|
||||
ldrb tmp, [src],#1; orr bits,bits,tmp,lsl #1*8
|
||||
ldrb tmp, [src],#1; orr bits,bits,tmp,lsl #2*8
|
||||
ldrb tmp, [src],#1; orr bits,bits,tmp,lsl #3*8
|
||||
adc bits,bits,bits
|
||||
bx lr
|
||||
unhitch_n2b:
|
||||
/* r12 ("ip") is assumed to be a scratch register. */
|
||||
|
||||
#define GETBIT \
|
||||
add bits,bits; beq 1f; 0: \
|
||||
.subsection 1; \
|
||||
1: blx getb; b 0b; \
|
||||
1: blx g32; b 0b; \
|
||||
.subsection 0
|
||||
|
||||
#define getnextb(reg) GETBIT; adc reg,reg
|
||||
#define jnextb0 GETBIT; bcc
|
||||
#define jnextb1 GETBIT; bcs
|
||||
|
||||
.code 16 @ THUMB mode
|
||||
ucl_nrv2b_decompress_32: .globl ucl_nrv2b_decompress_32 @ ARM mode
|
||||
.type ucl_nrv2b_decompress_32, %function
|
||||
/* error = (*)(char const *src, int len_src, char *dst, int *plen_dst) */
|
||||
adr r12,1+thumb_nrv2b_d32; bx r12 @ enter THUMB mode
|
||||
.code 16 @ THUMB mode
|
||||
thumb_nrv2b_d32:
|
||||
add r1,len,src @ r1= eof_src;
|
||||
push {r1,r2,r3, r4,r5,r6,r7, lr}
|
||||
mov bits,#1; neg off,bits @ off= -1 initial condition
|
||||
lsl bits,#31 @ 1<<31: refill next time
|
||||
mov wrnk,#0xd
|
||||
lsl wrnk,#8 @ 0xd00
|
||||
adr g32,get32 @ load pc-relative address
|
||||
b top_n2b
|
||||
|
||||
eof_n2b:
|
||||
pop {r1,r3,r4} @ r1= eof_src; r3= orig_dst; r4= plen_dst
|
||||
sub src,r1 @ 0 if src length OK
|
||||
sub dst,r3 @ actual dst length
|
||||
str dst,[r4]
|
||||
sub g32,#get32 - hitch_n2b_r @ g32= &hitch_n2b_r
|
||||
bx g32
|
||||
hitch_n2b:
|
||||
mov g32,lr
|
||||
add g32,#get32 - hitch_n2b_r @ g32= &get32
|
||||
b top
|
||||
pop {r1,r3,r4} @ r1= eof_src; r3= orig_dst; r4= plen_dst
|
||||
sub src,r1 @ 0 if actual src length equals expected length
|
||||
sub dst,r3 @ actual dst length
|
||||
str dst,[r4]
|
||||
pop {r4,r5,r6,r7, pc} @ return
|
||||
|
||||
lit_n2b:
|
||||
ldrb tmp,[src]; add src,#1
|
||||
strb tmp,[dst]; add dst,#1
|
||||
ldrb tmp,[src]; add src,#1
|
||||
strb tmp,[dst]; add dst,#1
|
||||
top_n2b:
|
||||
jnextb1 lit_n2b
|
||||
jnextb1 lit_n2b
|
||||
|
||||
bl ss11 @ len= offset
|
||||
sub len,#3; bcs off_same
|
||||
ldrb off,[src]; add src,#1 @ low 8 bits
|
||||
lsl len,#8
|
||||
orr off,len
|
||||
mvn off,off; beq eof_n2b @ off= ~off
|
||||
off_same:
|
||||
bl ss11 @ len= count -2
|
||||
mov tmp,#0xd; lsl tmp,#8
|
||||
cmn tmp,len
|
||||
mov tmp,#2 @ does not modify Carry
|
||||
adc len,tmp @ len += 2+ (0xd00<=offset)
|
||||
ldrb tmp,[dst] @ force cacheline allocate
|
||||
copy_n2v:
|
||||
ldrb tmp,[dst,off]
|
||||
strb tmp,[dst]; add dst,#1
|
||||
sub len,#1; bne copy_n2b
|
||||
b top_n2b
|
||||
bl ss11 @ len= encoded offset [2..)
|
||||
sub tmp,len,#3 @ set Carry
|
||||
mov len,#0 @ Carry unaffected
|
||||
blo offprev_n2b @ ss11 returned 2
|
||||
lsl tmp,#8
|
||||
ldrb off,[src]; add src,#1 @ low 8 bits
|
||||
orr off,tmp
|
||||
mvn off,off; beq eof_n2b @ off= ~off
|
||||
offprev_n2b: @ In: 0==len
|
||||
getnextb(len); getnextb(len); bne plus1_n2b @ two bits; 1,2,3 ==> 2,3,4
|
||||
bl ss11 @ len= encoded length [2..)
|
||||
add len,#2 @ [2..) ==> [4..);
|
||||
plus1_n2b:
|
||||
add len,#1 @ 1,2,3 ==> 2,3,4; [4..) ==> [5..)
|
||||
/* 'cmn': add the inputs, set condition codes, discard the sum */
|
||||
cmn off,wrnk; bcs near_n2b @ within M2_MAX_OFFSET
|
||||
add len,#1 @ too far away, so minimum match length is 3
|
||||
near_n2b:
|
||||
ldrb tmp,[dst] @ force cacheline allocate
|
||||
copy_n2b:
|
||||
ldrb tmp,[dst,off]
|
||||
strb tmp,[dst]; add dst,#1
|
||||
sub len,#1; bne copy_n2b
|
||||
b top_n2b
|
||||
|
||||
ss11:
|
||||
mov len,#1 @ the msb
|
||||
mov lr2,lr @ save return address
|
||||
mov len,#1 @ the msb
|
||||
mov r12,lr @ return address; alternate: "push {lr}"
|
||||
ss11a:
|
||||
getnextb(len)
|
||||
jnextb0 ss11a
|
||||
mov pc,lr2 @ return
|
||||
getnextb(len)
|
||||
jnextb0 ss11a
|
||||
mov pc,r12 @ return; alternate: "pop {pc}"
|
||||
|
||||
.code 32 @ ARM mode for ease of Carry manipulation
|
||||
get32: @ In: Carry set (unchanged until final adcs)
|
||||
ldrb bits,[src],#1
|
||||
ldrb tmp, [src],#1; orr bits,bits,tmp,lsl #1*8
|
||||
ldrb tmp, [src],#1; orr bits,bits,tmp,lsl #2*8
|
||||
ldrb tmp, [src],#1; orr bits,bits,tmp,lsl #3*8
|
||||
adcs bits,bits,bits @ Set Carry out
|
||||
bx lr
|
||||
|
||||
.size ucl_nrv2b_decompress_32, .-ucl_nrv2b_decompress_32
|
||||
/*
|
||||
vi:ts=8:et:nowrap
|
||||
*/
|
||||
|
||||
|
||||
@ -40,22 +40,6 @@
|
||||
|
||||
#define cnt r1 /* overlaps 'len' while reading an offset */
|
||||
|
||||
ucl_nrv2e_decompress_32: .globl ucl_nrv2e_decompress_32 @ ARM mode
|
||||
/* error = (*)(char const *src, int len_src, char *dst, int *plen_dst) */
|
||||
add r1,len,src @ r1= eof_src;
|
||||
stmfd sp!,{r1,r2,r3, r4,r5,r6,r7,lr}
|
||||
blx hitch_n2e
|
||||
hitch_n2e_r:
|
||||
ldmfd sp!,{r4,r5,r6,r7,pc}
|
||||
|
||||
get32: @ ARM mode; In: Carry set (unchanged until final adcs)
|
||||
ldrb bits,[src],#1
|
||||
ldrb tmp, [src],#1; orr bits,bits,tmp,lsl #1*8
|
||||
ldrb tmp, [src],#1; orr bits,bits,tmp,lsl #2*8
|
||||
ldrb tmp, [src],#1; orr bits,bits,tmp,lsl #3*8
|
||||
adcs bits,bits,bits @ Set Carry out
|
||||
bx lr
|
||||
|
||||
#define GETBIT \
|
||||
add bits,bits; beq 1f; 0: \
|
||||
.subsection 1; \
|
||||
@ -66,27 +50,31 @@ get32: @ ARM mode; In: Carry set (unchanged until final adcs)
|
||||
#define jnextb0 GETBIT; bcc
|
||||
#define jnextb1 GETBIT; bcs
|
||||
|
||||
.code 16 @ THUMB mode
|
||||
eof_n2e:
|
||||
pop {r1,r3,r4} @ r1= eof_src; r3= orig_dst; r4= plen_dst
|
||||
sub src,r1 @ 0 if actual src length equals expected length
|
||||
sub dst,r3 @ actual dst length
|
||||
str dst,[r4]
|
||||
sub g32,#get32 - hitch_n2e_r @ g32= &hitch_n2e_r
|
||||
bx g32
|
||||
|
||||
hitch_n2e:
|
||||
mov g32,lr @ return address
|
||||
add g32,#get32 - hitch_n2e_r @ g32= &get32
|
||||
mov bits,#1; neg off,bits @ off= -1 initial condition
|
||||
ucl_nrv2e_decompress_32: .globl ucl_nrv2e_decompress_32 @ ARM mode
|
||||
.type ucl_nrv2e_decompress_32, %function
|
||||
/* error = (*)(char const *src, int len_src, char *dst, int *plen_dst) */
|
||||
adr r12,1+thumb_nrv2e_d32; bx r12 @ enter THUMB mode
|
||||
.code 16 @ THUMB mode
|
||||
thumb_nrv2e_d32:
|
||||
add r1,len,src @ r1= eof_src;
|
||||
push {r1,r2,r3, r4,r5,r6,r7, lr}
|
||||
mov bits,#1; neg off,bits @ off= -1 initial condition
|
||||
lsl bits,#31 @ 1<<31; refill next time
|
||||
mov wrnk,#5
|
||||
lsl wrnk,#8 @ 0x500
|
||||
b top_n2e
|
||||
adr g32,get32 @ load pc-relative address
|
||||
b top_n2e
|
||||
|
||||
eof_n2e:
|
||||
pop {r1,r3,r4} @ r1= eof_src; r3= orig_dst; r4= plen_dst
|
||||
sub src,r1 @ 0 if actual src length equals expected length
|
||||
sub dst,r3 @ actual dst length
|
||||
str dst,[r4]
|
||||
pop {r4,r5,r6,r7, pc} @ return
|
||||
|
||||
lit_n2e:
|
||||
ldrb tmp,[src]; add src,#1
|
||||
strb tmp,[dst]; add dst,#1
|
||||
ldrb tmp,[src]; add src,#1
|
||||
strb tmp,[dst]; add dst,#1
|
||||
top_n2e:
|
||||
jnextb1 lit_n2e
|
||||
mov cnt,#1; b getoff_n2e
|
||||
@ -101,11 +89,11 @@ getoff_n2e:
|
||||
sub tmp,cnt,#3 @ set Carry
|
||||
mov len,#0 @ Carry unaffected
|
||||
blo offprev_n2e @ cnt was 2; tests Carry only
|
||||
lsl off,tmp,#8
|
||||
ldrb tmp,[src]; add src,#1
|
||||
orr off,tmp
|
||||
mvn off,off; beq eof_n2e @ off= ~off
|
||||
asr off,#1; bcs lenlast_n2e
|
||||
lsl tmp,#8
|
||||
ldrb off,[src]; add src,#1 @ low 7+1 bits
|
||||
orr off,tmp
|
||||
mvn off,off; beq eof_n2e @ off= ~off
|
||||
asr off,#1; bcs lenlast_n2e
|
||||
b lenmore_n2e
|
||||
|
||||
offprev_n2e:
|
||||
@ -126,12 +114,23 @@ gotlen_n2e: @ 'cmn': add the inputs, set condition codes, discard the sum
|
||||
cmn off,wrnk; bcs near_n2e @ within M2_MAX_OFFSET
|
||||
add len,#1 @ too far away, so minimum match length is 3
|
||||
near_n2e:
|
||||
ldrb tmp,[dst] @ force cacheline allocate
|
||||
ldrb tmp,[dst] @ force cacheline allocate
|
||||
copy_n2e:
|
||||
ldrb tmp,[dst,off]
|
||||
strb tmp,[dst]; add dst,#1
|
||||
sub len,#1; bne copy_n2e
|
||||
b top_n2e
|
||||
ldrb tmp,[dst,off]
|
||||
strb tmp,[dst]; add dst,#1
|
||||
sub len,#1; bne copy_n2e
|
||||
b top_n2e
|
||||
|
||||
.code 32 @ ARM mode for ease of Carry manipulation
|
||||
get32: @ In: Carry set (unchanged until final adcs)
|
||||
ldrb bits,[src],#1
|
||||
ldrb tmp, [src],#1; orr bits,bits,tmp,lsl #1*8
|
||||
ldrb tmp, [src],#1; orr bits,bits,tmp,lsl #2*8
|
||||
ldrb tmp, [src],#1; orr bits,bits,tmp,lsl #3*8
|
||||
adcs bits,bits,bits @ Set Carry out
|
||||
bx lr
|
||||
|
||||
.size ucl_nrv2e_decompress_32, .-ucl_nrv2e_decompress_32
|
||||
|
||||
/*
|
||||
vi:ts=8:et:nowrap
|
||||
|
||||
Loading…
Reference in New Issue
Block a user