From 536b2c2475716e70f3796bd2f034a71dc49f5db6 Mon Sep 17 00:00:00 2001 From: John Reiser Date: Tue, 30 Jul 2024 14:21:01 -0700 Subject: [PATCH] i386 LzmaDecode (lzma_d_c*.S) compiled code exited not at the end ... so the build recipe required acrobatics to make it "fall through" modified: stub/src/arch/i386/Makefile.extra new file: stub/src/arch/i386/fall-through.sed modified: stub/src/arch/i386/lzma-multi_d.S modified: stub/src/arch/i386/lzma_d_cf.S modified: stub/src/arch/i386/lzma_d_cs.S modified: stub/src/arm.v4a-linux.elf-entry.S modified: stub/src/i386-expand.S --- src/stub/src/arch/i386/Makefile.extra | 10 ++++++---- src/stub/src/arch/i386/fall-through.sed | 22 ++++++++++++++++++++++ src/stub/src/arch/i386/lzma-multi_d.S | 2 +- src/stub/src/arch/i386/lzma_d_cf.S | 6 +++--- src/stub/src/arch/i386/lzma_d_cs.S | 6 +++--- src/stub/src/arm.v4a-linux.elf-entry.S | 5 +++++ src/stub/src/i386-expand.S | 14 +++++++++----- 7 files changed, 49 insertions(+), 16 deletions(-) create mode 100644 src/stub/src/arch/i386/fall-through.sed diff --git a/src/stub/src/arch/i386/Makefile.extra b/src/stub/src/arch/i386/Makefile.extra index 2aef9f3f..ab3bbf1e 100644 --- a/src/stub/src/arch/i386/Makefile.extra +++ b/src/stub/src/arch/i386/Makefile.extra @@ -42,12 +42,14 @@ $c += -I$(UPX_LZMADIR) $c += -I$(top_srcdir)/src lzma_d_c%.S : lzma_d_c.c - $(call tc,gcc) $(PP_FLAGS) -c $< -o tmp/$T.o + echo; echo TARGET=$@ PATH=$$PATH + $(call tc,gcc) $(PP_FLAGS) -S $< -o tmp/$T.s1 + sed -f fall-through.sed < tmp/$T.s1 > tmp/$T.s + $(call tc,gcc) -x assembler-with-cpp -c tmp/$T.s -o tmp/$T.o $(call tc,f-objstrip,tmp/$T.o) $(call tc,objcopy) -O binary --only-section .text.LzmaDecode tmp/$T.o tmp/$T.bin - head -c-1 tmp/$T.bin > tmp/$T.out - $(call tc,objdump) -b binary -m i386 -D tmp/$T.out | $(RTRIM) > tmp/$T.out.disasm - $(call tc,bin2h) --mode=gas tmp/$T.out $@ + $(call tc,objdump) -b binary -m i386 -D tmp/$T.bin | $(RTRIM) > tmp/$T.bin.disasm + $(call tc,bin2h) --mode=gas tmp/$T.bin $@ lzma_d_cf.% : PP_FLAGS = -DFAST lzma_d_cs.% : PP_FLAGS = -DSMALL diff --git a/src/stub/src/arch/i386/fall-through.sed b/src/stub/src/arch/i386/fall-through.sed new file mode 100644 index 00000000..e7be7011 --- /dev/null +++ b/src/stub/src/arch/i386/fall-through.sed @@ -0,0 +1,22 @@ +# Take the compiler-generated assembly code for subroutine LzmaDecode(), +# and edit it to become a fall-through block. +# The exit epilog "pop %ebx; pop %esi; pop %edi; pop %ebp; ret" +# might appear twice, and in the middle of the code. +# Move it to the end, delete the 'ret', +# and 'jmp' to the moved epilog. +/popl\t%ebx/{ + h + c \ + jmp Exit_LzmaDecode +} +/popl\t%esi/,/popl\t%ebp/{ + H + d +} +/ret$/d +/\.size\tLzmaDecode, \.-LzmaDecode/{ + H + i \ +Exit_LzmaDecode: + g +} diff --git a/src/stub/src/arch/i386/lzma-multi_d.S b/src/stub/src/arch/i386/lzma-multi_d.S index c52531af..c77cc6a9 100644 --- a/src/stub/src/arch/i386/lzma-multi_d.S +++ b/src/stub/src/arch/i386/lzma-multi_d.S @@ -124,6 +124,7 @@ section LZMA_ELF00 #if 1 // alloca() via lea +// LZMA_DEC10 or LZMA_DEC20 initializes the Markov matrix (16-bit words) to 0x400 anyway. // CLzmaDecoderState:{*_bits, CProb[LZMA_BASE_SIZE + (LZMA_LIT_SIZE<<%cl)]} lea esp,[0 -4 - 2*LZMA_BASE_SIZE + 2*ebx + esp] and esp, (~0<<5) // 32-byte align @@ -178,7 +179,6 @@ section LZMA_DEC20 // // cleanup section LZMA_DEC30 - // These are needed only because followed by LEXEC015. LzmaDecode() has set // *O_OUTS, and returned %eax=0 for success, %eax=1 for failure. Therefore // "neg eax" would be enough (followed by clearing %ecx to agree with NRV.) diff --git a/src/stub/src/arch/i386/lzma_d_cf.S b/src/stub/src/arch/i386/lzma_d_cf.S index 6bedbe53..dad029d9 100644 --- a/src/stub/src/arch/i386/lzma_d_cf.S +++ b/src/stub/src/arch/i386/lzma_d_cf.S @@ -46,7 +46,7 @@ /* 0x02d0 */ .byte 124, 36, 52,255,255,255, 0,119, 12, 59,108, 36, 56,184, 1, 0 /* 0x02e0 */ .byte 0, 0,116, 32, 69, 43,108, 36,124,184, 0, 0, 0, 0,139,148 /* 0x02f0 */ .byte 36,132, 0, 0, 0,139, 92, 36, 92,139,140, 36,144, 0, 0, 0 -/* 0x0300 */ .byte 137, 42,137, 25,131,196,100, 91, 94, 95, 93,195,129,124, 36, 52 +/* 0x0300 */ .byte 137, 42,137, 25,131,196,100,233, 46, 8, 0, 0,129,124, 36, 52 /* 0x0310 */ .byte 255,255,255, 0,141, 52, 27,139, 68, 36, 20,141, 28, 6,119, 29 /* 0x0320 */ .byte 59,108, 36, 56, 15,132, 52, 6, 0, 0,193,100, 36, 48, 8,193 /* 0x0330 */ .byte 100, 36, 52, 8, 15,182, 69, 0, 69, 9, 68, 36, 48,139, 68, 36 @@ -148,7 +148,7 @@ /* 0x0930 */ .byte 36, 72,137, 68, 36, 72,137, 84, 36, 68,131,124, 36, 76, 6,184 /* 0x0940 */ .byte 0, 0, 0, 0,139, 92, 36, 96, 15,159,192,129,195,104, 10, 0 /* 0x0950 */ .byte 0,141, 68, 64, 8,137, 68, 36, 76,233, 9,251,255,255,131,196 -/* 0x0960 */ .byte 100,184, 1, 0, 0, 0, 91, 94, 95, 93,195,139,116, 36, 4, 70 +/* 0x0960 */ .byte 100,184, 1, 0, 0, 0,233,207, 1, 0, 0,139,116, 36, 4, 70 /* 0x0970 */ .byte 137,116, 36, 72, 15,132, 85,249,255,255,139, 76, 36, 12,139, 68 /* 0x0980 */ .byte 36, 92,131,193, 2, 57, 68, 36, 72,119,211,137,194,139,132, 36 /* 0x0990 */ .byte 136, 0, 0, 0, 43, 68, 36, 72, 3,148, 36,136, 0, 0, 0, 3 @@ -177,4 +177,4 @@ /* 0x0b00 */ .byte 15,182, 69, 0,193,227, 8, 9, 68, 36, 48, 69,233, 59,255,255 /* 0x0b10 */ .byte 255,137,222, 41, 68, 36, 48, 41,198,137,208,102,193,232, 5,139 /* 0x0b20 */ .byte 76, 36, 64,102, 41,194,139, 68, 36, 60,102,137,151,200, 1, 0 -/* 0x0b30 */ .byte 0,137, 76, 36, 60,233, 72,255,255 +/* 0x0b30 */ .byte 0,137, 76, 36, 60,233, 72,255,255,255, 91, 94, 95, 93 diff --git a/src/stub/src/arch/i386/lzma_d_cs.S b/src/stub/src/arch/i386/lzma_d_cs.S index 6bedbe53..dad029d9 100644 --- a/src/stub/src/arch/i386/lzma_d_cs.S +++ b/src/stub/src/arch/i386/lzma_d_cs.S @@ -46,7 +46,7 @@ /* 0x02d0 */ .byte 124, 36, 52,255,255,255, 0,119, 12, 59,108, 36, 56,184, 1, 0 /* 0x02e0 */ .byte 0, 0,116, 32, 69, 43,108, 36,124,184, 0, 0, 0, 0,139,148 /* 0x02f0 */ .byte 36,132, 0, 0, 0,139, 92, 36, 92,139,140, 36,144, 0, 0, 0 -/* 0x0300 */ .byte 137, 42,137, 25,131,196,100, 91, 94, 95, 93,195,129,124, 36, 52 +/* 0x0300 */ .byte 137, 42,137, 25,131,196,100,233, 46, 8, 0, 0,129,124, 36, 52 /* 0x0310 */ .byte 255,255,255, 0,141, 52, 27,139, 68, 36, 20,141, 28, 6,119, 29 /* 0x0320 */ .byte 59,108, 36, 56, 15,132, 52, 6, 0, 0,193,100, 36, 48, 8,193 /* 0x0330 */ .byte 100, 36, 52, 8, 15,182, 69, 0, 69, 9, 68, 36, 48,139, 68, 36 @@ -148,7 +148,7 @@ /* 0x0930 */ .byte 36, 72,137, 68, 36, 72,137, 84, 36, 68,131,124, 36, 76, 6,184 /* 0x0940 */ .byte 0, 0, 0, 0,139, 92, 36, 96, 15,159,192,129,195,104, 10, 0 /* 0x0950 */ .byte 0,141, 68, 64, 8,137, 68, 36, 76,233, 9,251,255,255,131,196 -/* 0x0960 */ .byte 100,184, 1, 0, 0, 0, 91, 94, 95, 93,195,139,116, 36, 4, 70 +/* 0x0960 */ .byte 100,184, 1, 0, 0, 0,233,207, 1, 0, 0,139,116, 36, 4, 70 /* 0x0970 */ .byte 137,116, 36, 72, 15,132, 85,249,255,255,139, 76, 36, 12,139, 68 /* 0x0980 */ .byte 36, 92,131,193, 2, 57, 68, 36, 72,119,211,137,194,139,132, 36 /* 0x0990 */ .byte 136, 0, 0, 0, 43, 68, 36, 72, 3,148, 36,136, 0, 0, 0, 3 @@ -177,4 +177,4 @@ /* 0x0b00 */ .byte 15,182, 69, 0,193,227, 8, 9, 68, 36, 48, 69,233, 59,255,255 /* 0x0b10 */ .byte 255,137,222, 41, 68, 36, 48, 41,198,137,208,102,193,232, 5,139 /* 0x0b20 */ .byte 76, 36, 64,102, 41,194,139, 68, 36, 60,102,137,151,200, 1, 0 -/* 0x0b30 */ .byte 0,137, 76, 36, 60,233, 72,255,255 +/* 0x0b30 */ .byte 0,137, 76, 36, 60,233, 72,255,255,255, 91, 94, 95, 93 diff --git a/src/stub/src/arm.v4a-linux.elf-entry.S b/src/stub/src/arm.v4a-linux.elf-entry.S index 596a8e2f..a9230a7a 100644 --- a/src/stub/src/arm.v4a-linux.elf-entry.S +++ b/src/stub/src/arm.v4a-linux.elf-entry.S @@ -378,6 +378,11 @@ mempcpy: .globl mempcpy // (dst, src, n) .globl unlink; unlink: do_sys __NR_unlink; ret .globl write; write: do_sys __NR_write; ret + .globl my_bkpt +my_bkpt: + bkpt // my_bkpt + ret + // __NR_oldmmap gets ENOSYS! Must use __NR_mmap2 with all args in registers // Called from C (5th and 6th arg on stack), so must preserve r4 and r5 mmap: .globl mmap diff --git a/src/stub/src/i386-expand.S b/src/stub/src/i386-expand.S index c6c780dc..99fec069 100644 --- a/src/stub/src/i386-expand.S +++ b/src/stub/src/i386-expand.S @@ -208,22 +208,26 @@ not_nrv2b: #undef displ #undef dispq -#undef src -#undef dst - section LZMA_DAISY cmp $M_LZMA,meth; jne not_lzma + push (sz_cpr - sz_binfo)(src) // MATCH_90 inSize +#define O_INS (0*NBPW) +#define O_OUTS (4*NBPW) + #undef meth +#undef src +#undef dst // lzma code is written in intel syntax! ///* lzma has its own 'section's */ .intel_syntax noprefix -#include "arch/i386/lzma_d.S" +#include "arch/i386/lzma-multi_d.S" .att_syntax section LZMA_DEC30 // REPLACES LZMA_DEC30 from src/arch/i386/lzma_d.S //eof_lzma: already has set retval and *dstlen - pop %ebp // MARCH_54 + pop %edx // MATCH_90 toss inSize + pop %ebp // MATCH_54 add $3*NBPW,%esp // MATCH_53, MATCH_52, MATCH_51 ret