From df9db96bd1c013c07da1d7ec740021d588ab2815 Mon Sep 17 00:00:00 2001 From: John Reiser Date: Thu, 18 Jan 2001 05:45:25 +0000 Subject: [PATCH] filters for unix(linux) Modified Files: filteri.cpp filter/ctojr.h linker.cpp linker.h mem.cpp mem.h p_lx_elf.cpp p_lx_elf.h p_lx_exc.cpp p_lx_exc.h p_lx_sh.cpp p_lx_sh.h p_unix.cpp packer.cpp packer.h packhead.cpp stub/Makefile stub/macros.ash stub/l_lx_elf.c stub/l_lx_elf86.asm stub/l_lx_elf86.lds stub/l_lx_exec.c stub/l_lx_exec86.asm stub/l_lx_exec86.lds stub/l_lx_sh86.asm stub/l_lx_sh86.lds Added Files: filter/ctok.h stub/fold_elf86.asm stub/fold_exec86.asm stub/fold_sh86.asm committer: jreiser 979796725 +0000 --- src/filter/ctojr.h | 188 ++++++++++++++------- src/filter/ctok.h | 166 ++++++++++++++++++ src/filteri.cpp | 54 ++++-- src/linker.cpp | 6 +- src/linker.h | 3 +- src/mem.cpp | 41 +++++ src/mem.h | 9 + src/p_lx_elf.cpp | 222 +++++++++++------------- src/p_lx_elf.h | 7 +- src/p_lx_exc.cpp | 270 +++++++++++++++++++++-------- src/p_lx_exc.h | 20 ++- src/p_lx_sh.cpp | 97 ++++------- src/p_lx_sh.h | 6 +- src/p_unix.cpp | 66 +++++--- src/packer.cpp | 302 ++++++++++++++++++++++++++++++--- src/packer.h | 7 +- src/packhead.cpp | 8 +- src/stub/Makefile | 130 +++++++------- src/stub/fold_elf86.asm | 127 ++++++++++++++ src/stub/fold_exec86.asm | 46 +++++ src/stub/fold_sh86.asm | 134 +++++++++++++++ src/stub/l_lx_elf.c | 22 ++- src/stub/l_lx_elf86.asm | 348 +++----------------------------------- src/stub/l_lx_elf86.lds | 2 +- src/stub/l_lx_exec.c | 172 +++++++++---------- src/stub/l_lx_exec86.asm | 147 +++++++--------- src/stub/l_lx_exec86.lds | 2 +- src/stub/l_lx_sh86.asm | 149 ++-------------- src/stub/l_lx_sh86.lds | 13 +- src/stub/macros.ash | 355 +++++++++++++++++++++++++++++++++++++++ 30 files changed, 2014 insertions(+), 1105 deletions(-) create mode 100644 src/filter/ctok.h create mode 100644 src/stub/fold_elf86.asm create mode 100644 src/stub/fold_exec86.asm create mode 100644 src/stub/fold_sh86.asm diff --git a/src/filter/ctojr.h b/src/filter/ctojr.h index 61e2a6b9..af03de3d 100644 --- a/src/filter/ctojr.h +++ b/src/filter/ctojr.h @@ -33,7 +33,29 @@ **************************************************************************/ #ifdef U //{ -static int const N_MRU = 1024; // does not have to be a power of 2 +#define NOFILT 0 // no filter +#define FNOMRU 1 // filter, but not using mru +#define MRUFLT 2 // mru filter + +static unsigned +f80_call(Filter const *f) +{ + return (1+ (0x0f & f->id)) % 3; +} + +static unsigned +f80_jmp1(Filter const *f) +{ + return ((1+ (0x0f & f->id)) / 3) % 3; +} + +static unsigned +f80_jcc2(Filter const *f) +{ + return f80_jmp1(f); +} + +static int const N_MRU = 32; // does not have to be a power of 2 // Adaptively remember recent destinations. static void @@ -70,7 +92,6 @@ static int F(Filter *f) #ifdef U // filter upx_byte *const b = f->buf; - const unsigned addvalue = f->addvalue; #else // scan const upx_byte *b = f->buf; @@ -80,22 +101,30 @@ static int F(Filter *f) unsigned ic, jc, kc; unsigned calls = 0, noncalls = 0, noncalls2 = 0; unsigned lastnoncall = size, lastcall = 0; + unsigned wtally[3]; memset(wtally, 0, sizeof(wtally)); #ifdef U //{ + unsigned const f_call = f80_call(f); + unsigned const f_jmp1 = f80_jmp1(f); + unsigned const f_jcc2 = f80_jcc2(f); + int hand = 0, tail = 0; unsigned mru[N_MRU]; memset(&mru[0], 0, sizeof(mru)); - f->n_mru = N_MRU; + assert(N_MRU<=256); + f->n_mru = (MRUFLT==f_call || MRUFLT==f_jmp1 || MRUFLT==f_jcc2) ? + N_MRU : 0; #endif //} // FIXME: We must fit into 8MB because we steal one bit. // find a 16MB large empty address space { + int which; unsigned char buf[256]; memset(buf,0,256); for (ic = 0; ic < size - 5; ic++) - if (CONDF(b,ic,lastcall) && get_le32(b+ic+1)+ic+1 >= size) + if (CONDF(which,b,ic,lastcall) && get_le32(b+ic+1)+ic+1 >= size) { buf[b[ic+1]] |= 1; } @@ -110,14 +139,17 @@ static int F(Filter *f) for (ic = 0; ic < size - 5; ic++) { - if (!CONDF(b,ic,lastcall)) + int which; + int f_on = 0; + if (!CONDF(which,b,ic,lastcall)) continue; + ++wtally[which]; jc = get_le32(b+ic+1)+ic+1; // try to detect 'real' calls only if (jc < size) { #ifdef U - if (COND2(b,lastcall,ic,ic-1,ic)) { // 6-byte Jcc + if (2==which && NOFILT!=f_jcc2) { // 6-byte Jcc // Prefix 0x0f is constant, but opcode condition 0x80..0x8f // varies. Because we store the destination (or its mru index) // in be32 big endian format, the low-addressed bytes @@ -130,9 +162,13 @@ static int F(Filter *f) } // FIXME [?]: Extend to 8 bytes if "ADD ESP, byte 4*n" follows CALL. // This will require two related cto's (consecutive, or otherwise). - { + if ((0==which && MRUFLT==f_call) + || (1==which && MRUFLT==f_jmp1) + || (2==which && MRUFLT==f_jcc2) ) { + f_on = 1; // Recode the destination: narrower mru indices // should compress better than wider addresses. + // (But not when offset of match is unlimited?) int k; for (k = 0; k < N_MRU; ++k) { int kh = hand + k; @@ -153,38 +189,45 @@ static int F(Filter *f) } mru[hand] = jc; } + } else + if ((0==which && NOFILT!=f_call) + || (1==which && NOFILT!=f_jmp1) + || (2==which && NOFILT!=f_jcc2) ) { + f_on = 1; + set_be32(b+ic+1, jc+cto); } - jc += addvalue; #endif - if (ic - lastnoncall < 5) - { - // check the last 4 bytes before this call - for (kc = 4; kc; kc--) - if (CONDF(b,ic-kc,lastcall) && b[ic-kc+1] == cto8) - break; - if (kc) - { -#ifdef U - // restore original - if (COND2(b,lastcall,ic,ic,ic-1)) { - // Unswap prefix and opcode for 6-byte Jcc - unsigned char const t = - b[ic-1]; - b[ic-1] = b[ic]; - b[ic] = t; - } - set_le32(b+ic+1,jc-ic-1); -#endif - if (b[ic+1] == cto8) - return 1; // fail - buffer not restored - lastnoncall = ic; - noncalls2++; - continue; - } + if (f_on) { + if (ic - lastnoncall < 5) + { + // check the last 4 bytes before this call + for (kc = 4; kc; kc--) + if (CONDF(which,b,ic-kc,lastcall) && b[ic-kc+1] == cto8) + break; + if (kc) + { + #ifdef U + // restore original + if (2==which) { + // Unswap prefix and opcode for 6-byte Jcc + unsigned char const t = + b[ic-1]; + b[ic-1] = b[ic]; + b[ic] = t; + } + set_le32(b+ic+1,jc-ic-1); + #endif + if (b[ic+1] == cto8) + return 1; // fail - buffer not restored + lastnoncall = ic; + noncalls2++; + continue; + } + } + calls++; + ic += 4; + lastcall = ic+1; } - calls++; - ic += 4; - lastcall = ic+1; } else { @@ -198,9 +241,11 @@ static int F(Filter *f) f->noncalls = noncalls; f->lastcall = lastcall; -#ifdef TESTING - printf("\ncalls=%d noncalls=%d noncalls2=%d text_size=%x calltrickoffset=%x\n",calls,noncalls,noncalls2,size,cto); -#endif +/*#ifdef TESTING*/ + printf("\ncalls=%d noncalls=%d noncalls2=%d text_size=%x calltrickoffset=%x\n", + calls,noncalls,noncalls2,size,cto8); + printf("CALL/JMP/JCC %d %d %d\n",wtally[0],wtally[1],wtally[2]); +/*#endif*/ return 0; } @@ -219,43 +264,62 @@ static int U(Filter *f) const unsigned cto = (unsigned)f->cto << 24; unsigned lastcall = 0; int hand = 0, tail = 0; + unsigned const f_call = f80_call(f); + unsigned const f_jmp1 = f80_jmp1(f); + unsigned const f_jcc2 = f80_jcc2(f); + unsigned mru[N_MRU]; memset(&mru[0], 0, sizeof(mru)); for (ic = 0; ic < size5; ic++) { - if (CONDU(b,ic,lastcall)) + int which; + if (CONDU(which,b,ic,lastcall)) { + unsigned f_on = 0; jc = get_be32(b+ic+1) - cto; if (b[ic+1] == f->cto) { - if (1&jc) { // 1st time at this destination - jc >>= 1; - if (0 > --hand) { - hand = N_MRU -1; - } - mru[hand] = jc; + if ((0==which && MRUFLT==f_call) + || (1==which && MRUFLT==f_jmp1) + || (2==which && MRUFLT==f_jcc2) ) { + f_on = 1; + if (1&jc) { // 1st time at this destination + jc >>= 1; + if (0 > --hand) { + hand = N_MRU -1; + } + mru[hand] = jc; + } + else { // not 1st time at this destination + jc >>= 1; + int kh = jc + hand; + if (N_MRU <= kh) { + kh -= N_MRU; + } + jc = mru[kh]; + update_mru(jc, kh, mru, hand, tail); + } + set_le32(b+ic+1,jc-ic-1); + } else + if ((0==which && NOFILT!=f_call) + || (1==which && NOFILT!=f_jmp1) + || (2==which && NOFILT!=f_jcc2) ) { + f_on = 1; + set_le32(b+ic+1,jc-ic-1); } - else { // not 1st time at this destination - jc >>= 1; - int kh = jc + hand; - if (N_MRU <= kh) { - kh -= N_MRU; - } - jc = mru[kh]; - update_mru(jc, kh, mru, hand, tail); - } - set_le32(b+ic+1,jc-ic-1); - - if (COND2(b,lastcall,ic,ic,ic-1)) { + if (2==which && NOFILT!=f_jcc2) { // Unswap prefix and opcode for 6-byte Jcc unsigned char const t = b[ic-1]; b[ic-1] = b[ic]; - b[ic] = t; + b[ic] = t; + } + + if (f_on) { + f->calls++; + ic += 4; + f->lastcall = lastcall = ic+1; } - f->calls++; - ic += 4; - f->lastcall = lastcall = ic+1; } else f->noncalls++; diff --git a/src/filter/ctok.h b/src/filter/ctok.h new file mode 100644 index 00000000..b82403ba --- /dev/null +++ b/src/filter/ctok.h @@ -0,0 +1,166 @@ +/* ctok.h -- filter CTO implementation + + This file is part of the UPX executable compressor. + + Copyright (C) 1996-2001 Markus Franz Xaver Johannes Oberhumer + Copyright (C) 1996-2001 Laszlo Molnar + Copyright (C) 2000-2001 John F. Reiser + All Rights Reserved. + + UPX and the UCL library are free software; you can redistribute them + and/or modify them under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; see the file COPYING. + If not, write to the Free Software Foundation, Inc., + 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + + Markus F.X.J. Oberhumer Laszlo Molnar John F. Reiser + markus@oberhumer.com ml1050@cdata.tvnet.hu jreiser@BitWagon.com + */ + + + +/************************************************************************* +// filter / scan +**************************************************************************/ + +static int F(Filter *f) +{ +#ifdef U + // filter + upx_byte *b = f->buf; + const unsigned addvalue = f->addvalue; +#else + // scan + const upx_byte *b = f->buf; +#endif + const unsigned size = f->buf_len; + unsigned const id = f->id; + + unsigned ic, jc, kc; + unsigned calls = 0, noncalls = 0, noncalls2 = 0; + unsigned lastnoncall = size, lastcall = 0; + + // find a 16MB large empty address space + { + unsigned char buf[256]; + memset(buf,0,256); + + for (ic = 0; ic < size - 5; ic++) + if (COND(b,ic,lastcall,id) && get_le32(b+ic+1)+ic+1 >= size) + { + buf[b[ic+1]] |= 1; + } + + if (getcto(f, buf) < 0) + return -1; + } + const unsigned char cto8 = f->cto; +#ifdef U + const unsigned cto = (unsigned)f->cto << 24; +#endif + + for (ic = 0; ic < size - 5; ic++) + { + if (!COND(b,ic,lastcall,id)) + continue; + jc = get_le32(b+ic+1)+ic+1; + // try to detect 'real' calls only + if (jc < size) + { +#ifdef U + set_be32(b+ic+1,jc+addvalue+cto); +#endif + if (ic - lastnoncall < 5) + { + // check the last 4 bytes before this call + for (kc = 4; kc; kc--) + if (COND(b,ic-kc,lastcall,id) && b[ic-kc+1] == cto8) + break; + if (kc) + { +#ifdef U + // restore original + set_le32(b+ic+1,jc-ic-1); +#endif + if (b[ic+1] == cto8) + return 1; // fail - buffer not restored + lastnoncall = ic; + noncalls2++; + continue; + } + } + calls++; + ic += 4; + lastcall = ic+1; + } + else + { + assert(b[ic+1] != cto8); // this should not happen + lastnoncall = ic; + noncalls++; + } + } + + f->calls = calls; + f->noncalls = noncalls; + f->lastcall = lastcall; + +#ifdef TESTING + printf("\ncalls=%d noncalls=%d noncalls2=%d text_size=%x calltrickoffset=%x\n",calls,noncalls,noncalls2,size,cto); +#endif + return 0; +} + + +/************************************************************************* +// unfilter +**************************************************************************/ + +#ifdef U +static int U(Filter *f) +{ + upx_byte *b = f->buf; + const unsigned size5 = f->buf_len - 5; + const unsigned addvalue = f->addvalue; + const unsigned cto = (unsigned)f->cto << 24; + unsigned const id = f->id; + unsigned lastcall = 0; + + unsigned ic, jc; + + for (ic = 0; ic < size5; ic++) + if (COND(b,ic,lastcall,id)) + { + jc = get_be32(b+ic+1); + if (b[ic+1] == f->cto) + { + set_le32(b+ic+1,jc-ic-1-addvalue-cto); + f->calls++; + ic += 4; + f->lastcall = lastcall = ic+1; + } + else + f->noncalls++; + } + return 0; +} +#endif + + +#undef F +#undef U + + +/* +vi:ts=4:et:nowrap +*/ + diff --git a/src/filteri.cpp b/src/filteri.cpp index 6f6eb169..7b20cce5 100644 --- a/src/filteri.cpp +++ b/src/filteri.cpp @@ -88,9 +88,7 @@ // cto calltrick with jmp **************************************************************************/ -#define COND(b,x,lastcall) \ - (b[x] == 0xe8 || b[x] == 0xe9 \ - || (lastcall!=(x) && 0xf==b[(x)-1] && 0x80<=b[x] && b[x]<=0x8f) ) +#define COND(b,x,lastcall) (b[x] == 0xe8 || b[x] == 0xe9) #define F f_ctoj32_e8e9_bswap_le #define U u_ctoj32_e8e9_bswap_le #include "filter/ctoj.h" @@ -100,15 +98,37 @@ /************************************************************************* -// cto calltrick with jmp and relative renumbering +// cto calltrick with jmp, optional jcc **************************************************************************/ -#define COND1(b,x) (b[x] == 0xe8 || b[x] == 0xe9) -#define COND2(b,lastcall,x,y,z) \ - (lastcall!=(x) && 0xf==b[y] && 0x80<=b[z] && b[z]<=0x8f) +#define COND1(b,x) (b[x] == 0xe8 || b[x] == 0xe9) +#define COND2(b,x,lc) (lc!=(x) && 0xf==b[(x)-1] && 0x80<=b[x] && b[x]<=0x8f) +#define COND(b,x,lc,id) (COND1(b,x) || ((9<=(0xf&(id))) && COND2(b,x,lc))) +#define F f_ctok32_e8e9_bswap_le +#define U u_ctok32_e8e9_bswap_le +#include "filter/ctok.h" +#define F s_ctok32_e8e9_bswap_le +#include "filter/ctok.h" +#undef COND +#undef COND2 +#undef COND1 -#define CONDF(b,x,lastcall) (COND1(b,x) || COND2(b,lastcall,x,(x)-1, x )) -#define CONDU(b,x,lastcall) (COND1(b,x) || COND2(b,lastcall,x, x ,(x)-1)) + +/************************************************************************* +// cto calltrick with jmp and jcc and relative renumbering +**************************************************************************/ + +#define COND_CALL(which,b,x) ((which = 0), b[x] == 0xe8) +#define COND_JMP( which,b,x) ((which = 1), b[x] == 0xe9) +#define COND_JCC( which,b,lastcall,x,y,z) ((which = 2), \ + (lastcall!=(x) && 0xf==b[y] && 0x80<=b[z] && b[z]<=0x8f)) +#define COND1(which,b,x) (COND_CALL(which,b,x) || COND_JMP(which,b,x)) +#define COND2(which,b,lastcall,x,y,z) COND_JCC(which,b,lastcall,x,y,z) + +#define CONDF(which,b,x,lastcall) \ + (COND1(which,b,x) || COND2(which,b,lastcall,x,(x)-1, x )) +#define CONDU(which,b,x,lastcall) \ + (COND1(which,b,x) || COND2(which,b,lastcall,x, x ,(x)-1)) #define F f_ctojr32_e8e9_bswap_le #define U u_ctojr32_e8e9_bswap_le @@ -120,6 +140,9 @@ #undef CONDF #undef COND2 #undef COND1 +#undef COND_JCC +#undef COND_JMP +#undef COND_CALL /************************************************************************* @@ -178,8 +201,19 @@ const FilterImp::FilterEntry FilterImp::filters[] = { // 32-bit cto calltrick with jmp { 0x36, 6, 0x00ffffff, f_ctoj32_e8e9_bswap_le, u_ctoj32_e8e9_bswap_le, s_ctoj32_e8e9_bswap_le }, - // 32-bit cto calltrick with jmp and relative renumbering + // 32-bit calltrick with jmp, optional jcc; runtime can unfilter more than one block + { 0x46, 6, 0x00ffffff, f_ctok32_e8e9_bswap_le, u_ctok32_e8e9_bswap_le, s_ctok32_e8e9_bswap_le }, + { 0x49, 6, 0x00ffffff, f_ctok32_e8e9_bswap_le, u_ctok32_e8e9_bswap_le, s_ctok32_e8e9_bswap_le }, + + // 32-bit cto calltrick with jmp and jcc(swap 0x0f/0x8Y) and relative renumbering { 0x80, 8, 0x00ffffff, f_ctojr32_e8e9_bswap_le, u_ctojr32_e8e9_bswap_le, s_ctojr32_e8e9_bswap_le }, + { 0x81, 8, 0x00ffffff, f_ctojr32_e8e9_bswap_le, u_ctojr32_e8e9_bswap_le, s_ctojr32_e8e9_bswap_le }, + { 0x82, 8, 0x00ffffff, f_ctojr32_e8e9_bswap_le, u_ctojr32_e8e9_bswap_le, s_ctojr32_e8e9_bswap_le }, + { 0x83, 8, 0x00ffffff, f_ctojr32_e8e9_bswap_le, u_ctojr32_e8e9_bswap_le, s_ctojr32_e8e9_bswap_le }, + { 0x84, 8, 0x00ffffff, f_ctojr32_e8e9_bswap_le, u_ctojr32_e8e9_bswap_le, s_ctojr32_e8e9_bswap_le }, + { 0x85, 8, 0x00ffffff, f_ctojr32_e8e9_bswap_le, u_ctojr32_e8e9_bswap_le, s_ctojr32_e8e9_bswap_le }, + { 0x86, 8, 0x00ffffff, f_ctojr32_e8e9_bswap_le, u_ctojr32_e8e9_bswap_le, s_ctojr32_e8e9_bswap_le }, + { 0x87, 8, 0x00ffffff, f_ctojr32_e8e9_bswap_le, u_ctojr32_e8e9_bswap_le, s_ctojr32_e8e9_bswap_le }, // simple delta filter { 0x90, 2, 0, f_sub8_1, u_sub8_1, s_sub8_1 }, diff --git a/src/linker.cpp b/src/linker.cpp index 0f93e3ce..73c96d47 100644 --- a/src/linker.cpp +++ b/src/linker.cpp @@ -46,7 +46,6 @@ struct Linker::jump int toffs; }; - Linker::Linker(const void *pdata, int plen, int pinfo) { iloader = new char[(ilen = plen) + 4096]; @@ -101,7 +100,7 @@ Linker::~Linker() } -void Linker::addSection(const char *sect) +int Linker::addSection(const char *sect) { int ic; while (*sect) @@ -129,11 +128,12 @@ void Linker::addSection(const char *sect) olen += sections[ic].len; break; } - //printf("%8.8s",section); + //printf("%8.8s",sect); assert(ic!=nsections); } sect += 8; } + return olen; } diff --git a/src/linker.h b/src/linker.h index 19445237..02d24d8e 100644 --- a/src/linker.h +++ b/src/linker.h @@ -35,10 +35,11 @@ class Linker public: Linker(const void *pdata, int plen, int pinfo); virtual ~Linker(); - void addSection(const char *sect); + int addSection(const char *sect); void addSection(const char *sname, const void *sdata, unsigned len); const char *getLoader(int *llen); int getSection(const char *name, int *slen) const; + int getLoaderSize() const { return olen; } protected: // little endian diff --git a/src/mem.cpp b/src/mem.cpp index 863e7f8c..05968c71 100644 --- a/src/mem.cpp +++ b/src/mem.cpp @@ -47,6 +47,14 @@ MemBuffer::~MemBuffer() this->free(); } +MemBufferIO::MemBufferIO(unsigned size) : + MemBuffer(size) +{ +} + +MemBufferIO::~MemBufferIO() +{ +} void MemBuffer::free() { @@ -67,6 +75,39 @@ unsigned MemBuffer::getSize() const } +unsigned MemBufferIO::seek(unsigned offset, int whence) +{ + switch (whence) { + default: { + assert(false); + } break; + case SEEK_SET: { + assert(offset<=alloc_size); + ptr = offset + alloc_ptr; + } break; + case SEEK_CUR: { + assert((offset + (ptr - alloc_ptr))<=alloc_size); + ptr += offset; + } break; + case SEEK_END: { + assert((offset + alloc_size)<=alloc_size); + ptr = offset + alloc_size + alloc_ptr; + } break; + } + return ptr - alloc_ptr; +} + +unsigned MemBufferIO::write(void const *data, unsigned size) +{ + unsigned const avail = getSize(); + unsigned const len = UPX_MIN(size, avail); + if (data!=ptr) { + memmove(ptr, data, len); + } + ptr += len; + return len; +} + void MemBuffer::alloc(unsigned size, unsigned base_offset) { #if 0 diff --git a/src/mem.h b/src/mem.h index 195e78e4..53aad038 100644 --- a/src/mem.h +++ b/src/mem.h @@ -55,6 +55,7 @@ public: private: void alloc(unsigned size, unsigned base_offset); +protected: unsigned char *ptr; unsigned char *alloc_ptr; unsigned alloc_size; @@ -71,6 +72,14 @@ private: //static void operator delete[] (void *) {} }; +class MemBufferIO : public MemBuffer { +public: + MemBufferIO(unsigned size=0); + ~MemBufferIO(); + + unsigned seek(unsigned offset, int whence); // returns new position + unsigned write(void const *data, unsigned size); // returns xfer count +}; #endif /* already included */ diff --git a/src/p_lx_elf.cpp b/src/p_lx_elf.cpp index 19be9d3d..6424ba92 100644 --- a/src/p_lx_elf.cpp +++ b/src/p_lx_elf.cpp @@ -31,6 +31,7 @@ #include "file.h" #include "filter.h" +#include "linker.h" #include "packer.h" #include "p_elf.h" #include "p_unix.h" @@ -45,9 +46,9 @@ **************************************************************************/ static const -#include "stub/l_le_n2b.h" +#include "stub/l_lx_elf86.h" static const -#include "stub/l_le_n2d.h" +#include "stub/fold_elf86.h" PackLinuxI386elf::PackLinuxI386elf(InputFile *f) : @@ -64,7 +65,12 @@ int const * PackLinuxI386elf::getFilters() const { static const int filters[] = { - 0x80, 0x36, 0x26, 0x24, 0x16, 0x13, 0x14, 0x11, 0x25, 0x15, 0x12, + 0x49, 0x46, + 0x83, 0x36, 0x26, + 0x86, 0x80, + 0x84, 0x87, 0x81, + 0x82, 0x85, + 0x24, 0x16, 0x13, 0x14, 0x11, 0x25, 0x15, 0x12, -1 }; return filters; } @@ -72,103 +78,34 @@ PackLinuxI386elf::getFilters() const int PackLinuxI386elf::buildLoader(const Filter *ft) { - if (ft) { - n_mru = ft->n_mru; - } - else { - n_mru = 0; - } - return super::buildLoader(ft); -} - -const upx_byte *PackLinuxI386elf::getLoader() const -{ - if (M_IS_NRV2B(ph.method)) - return linux_i386elf_nrv2b_loader; - if (M_IS_NRV2D(ph.method)) - return linux_i386elf_nrv2d_loader; - return NULL; -} - -int PackLinuxI386elf::getLoaderSize() const -{ - if (0 != lsize) { - return lsize; - } - if (M_IS_NRV2B(ph.method)) - return sizeof(linux_i386elf_nrv2b_loader); - if (M_IS_NRV2D(ph.method)) - return sizeof(linux_i386elf_nrv2d_loader); - return 0; + return buildLinuxLoader( + linux_i386elf_loader, sizeof(linux_i386elf_loader), + linux_i386elf_fold, sizeof(linux_i386elf_fold), + ft, getbrk(phdri, ehdri.e_phnum) ); } void PackLinuxI386elf::updateLoader(OutputFile *fo) { #define PAGE_MASK (~0<<12) - Elf_LE32_Phdr *const phdro = (Elf_LE32_Phdr *)(sizeof(Elf_LE32_Ehdr)+loader); + upx_byte *const ptr = const_cast(getLoader()); + Elf_LE32_Phdr *const phdro = (Elf_LE32_Phdr *)(sizeof(Elf_LE32_Ehdr) + ptr); off_t const totlen = fo->getBytesWritten(); phdro->p_filesz = totlen; // pre-calculate for benefit of runtime disappearing act via munmap() phdro->p_memsz = PAGE_MASK & (~PAGE_MASK + totlen); - int d1=0x80, d2=0x80; - if (this->n_mru) { - int const bytes = ('?'<<8) | 0x0f; - d1 += 4+ patch_le32(d1+loader, 400, "NMRU", this->n_mru); - d2 += 4+ patch_le32(d2+loader, 400, &bytes, (ph.filter_cto<<8)|0x0f); - } patchLoaderChecksum(); - fo->seek(0, SEEK_SET); - fo->rewrite(loader, UPX_MAX3(0x80, d1, d2)); #undef PAGE_MASK } - void PackLinuxI386elf::patchLoader() { + unsigned char *const ptr = const_cast(getLoader()); lsize = getLoaderSize(); - Elf_LE32_Ehdr *const ehdr = (Elf_LE32_Ehdr *)(void *)loader; - Elf_LE32_Phdr *const phdr = (Elf_LE32_Phdr *)(1+ehdr); - - // stub/scripts/setfold.pl puts address of 'fold_begin' in phdr[1].p_offset - off_t const fold_begin = phdr[1].p_offset; - assert(fold_begin > 0); - assert(fold_begin < (off_t)lsize); - MemBuffer cprLoader(lsize); - - // compress compiled C-code portion of loader - upx_uint const uncLsize = lsize - fold_begin; - upx_uint cprLsize; - int r = upx_compress(loader + fold_begin, uncLsize, cprLoader, &cprLsize, - NULL, ph.method, 10, NULL, NULL); - if (r != UPX_E_OK || cprLsize >= uncLsize) - throwInternalError("loaded compression failed"); - - set_le32(0+fold_begin+loader, uncLsize); - set_le32(4+fold_begin+loader, cprLsize); - memcpy( 8+fold_begin+loader, cprLoader, cprLsize); - lsize = 8 + fold_begin + cprLsize; - patchVersion(loader,lsize); - - // Info for OS kernel to set the brk() - unsigned const brka = getbrk(phdri, ehdri.e_phnum); - phdr[1].p_offset = 0xfff&brka; - phdr[1].p_vaddr = brka; - phdr[1].p_paddr = brka; - phdr[1].p_filesz = 0; - phdr[1].p_memsz = 0; - - // The beginning of our loader consists of a elf_hdr (52 bytes) and - // two sections elf_phdr (2 * 32 byte), so we have 12 free bytes - // from offset 116 to the program start at offset 128. - assert(ehdr->e_phoff == sizeof(Elf_LE32_Ehdr)); - assert(ehdr->e_ehsize == sizeof(Elf_LE32_Ehdr)); - assert(ehdr->e_phentsize == sizeof(Elf_LE32_Phdr)); - assert(ehdr->e_phnum == 2); - assert(ehdr->e_shnum == 0); assert(lsize > 128 && lsize < 4096); + patchVersion(ptr, lsize); patchLoaderChecksum(); } @@ -228,10 +165,13 @@ bool PackLinuxI386elf::canPack() return super::canPack(); } +struct cprBlkHdr { + unsigned sz_unc; // uncompressed size (0 means EOF) + unsigned sz_cpr; // (compressed_size<<8) | cto8 +}; void PackLinuxI386elf::packExtent( Extent const &x, - OutputFile *fo, unsigned &total_in, unsigned &total_out, Filter *ft @@ -249,11 +189,13 @@ void PackLinuxI386elf::packExtent( // file is e.g. blocksize + 1 bytes long // compress + unsigned char *const hdrptr = obuf; + obuf.seek(sizeof(cprBlkHdr), SEEK_CUR); ph.u_len = l; ph.overlap_overhead = 0; if (ft) { ft->buf_len = l; - compressWithFilters(ft, OVERHEAD); + compressWithFilters(ft, OVERHEAD, ((opt->filter > 0) ? -2 : 2)); } else { (void) compress(ibuf, obuf); // ignore return value @@ -273,19 +215,18 @@ void PackLinuxI386elf::packExtent( } // write block sizes - unsigned char size[8]; - set_native32(size+0, ph.u_len); - set_native32(size+4, ph.c_len); - fo->write(size, 8); + set_native32(0+hdrptr, ph.u_len); + set_native32(4+hdrptr, (ph.c_len<<8) | ph.filter_cto); // write compressed data if (ph.c_len < ph.u_len) { - fo->write(obuf, ph.c_len); - verifyOverlappingDecompression(); + obuf.write(obuf, ph.c_len); + // FIXME: obuf is not discardable! + // verifyOverlappingDecompression(); } else - fo->write(ibuf, ph.u_len); + obuf.write(ibuf, ph.u_len); total_in += ph.u_len; total_out += ph.c_len; @@ -300,30 +241,24 @@ void PackLinuxI386elf::pack(OutputFile *fo) progid = 0; // not used fi->readx(&ehdri, sizeof(ehdri)); + assert(ehdri.e_phoff == sizeof(Elf_LE32_Ehdr)); // checked by canPack() off_t const sz_phdrs = ehdri.e_phnum * ehdri.e_phentsize; phdri = new Elf_LE32_Phdr[ehdri.e_phnum]; fi->seek(ehdri.e_phoff, SEEK_SET); fi->readx(phdri, sz_phdrs); - // prepare loader - lsize = getLoaderSize(); - loader.alloc(lsize + sizeof(p_info)); - memcpy(loader,getLoader(),lsize); - - // patch loader, prepare header info, write loader + header info - patchLoader(); // can change lsize by packing upx_main - p_info *const hbuf = (p_info *)(loader + lsize); - set_native32(&hbuf->p_progid, progid); - set_native32(&hbuf->p_filesize, file_size); - set_native32(&hbuf->p_blocksize, blocksize); - fo->write(loader, lsize + sizeof(p_info)); - // init compression buffers ibuf.alloc(blocksize); obuf.allocForCompression(blocksize); + { + p_info hbuf; + set_native32(&hbuf.p_progid, progid); + set_native32(&hbuf.p_filesize, file_size); + set_native32(&hbuf.p_blocksize, blocksize); + obuf.write(&hbuf, sizeof(hbuf)); + } - assert(ehdri.e_phoff == sizeof(Elf_LE32_Ehdr)); // checked by canPack() Extent x; unsigned k; @@ -354,52 +289,69 @@ void PackLinuxI386elf::pack(OutputFile *fo) unsigned total_in = 0; unsigned total_out = 0; + ui_pass = -1; x.offset = 0; x.size = sizeof(Elf_LE32_Ehdr) + sz_phdrs; - ui_pass = -1; - packExtent(x, fo, total_in, total_out, 0); - ui_pass = 0; + { + int const old_level = ph.level; ph.level = 10; + packExtent(x, total_in, total_out, 0); + ph.level = old_level; + } + ui_pass = 0; Filter ft(ph.level); + ft.addvalue = 0; nx = 0; for (k = 0; k < ehdri.e_phnum; ++k) if (PT_LOAD==phdri[k].p_type) { - ft.addvalue = (unsigned)phdri[k].p_paddr; + if (ft.id < 0x40) { + // FIXME: ?? ft.addvalue = phdri[k].p_vaddr; + } x.offset = phdri[k].p_offset; x.size = phdri[k].p_filesz; if (0 == nx) { // 1st PT_LOAD must cover Ehdr at 0==p_offset unsigned const delta = sizeof(Elf_LE32_Ehdr) + sz_phdrs; - ft.addvalue += delta; + if (ft.id < 0x40) { + // FIXME: ?? ft.addvalue += delta; + } x.offset += delta; x.size -= delta; } - packExtent(x, fo, total_in, total_out, - ((Elf_LE32_Phdr::PF_X & phdri[k].p_flags) ? &ft : 0) ); - // FIXME: All PF_X Phdrs must use same ft.cto - // (There is usually only one such Phdr, so we're mostly lucky.) + packExtent(x, total_in, total_out, + ((Elf_LE32_Phdr::PF_X & phdri[k].p_flags) + ? &ft : 0 ) ); ++nx; } if (ptload0hi < ptload1lo) { // alignment hole? x.offset = ptload0hi; x.size = ptload1lo - ptload0hi; - packExtent(x, fo, total_in, total_out, 0); + packExtent(x, total_in, total_out, 0); } if ((off_t)total_in < file_size) { // non-PT_LOAD stuff x.offset = total_in; x.size = file_size - total_in; - packExtent(x, fo, total_in, total_out, 0); + packExtent(x, total_in, total_out, 0); } if ((off_t)total_in != file_size) throwEOFException(); // write block end marker (uncompressed size 0) - fo->write("\x00\x00\x00\x00", 4); + set_native32(obuf, 0); + obuf.write(obuf, 4); // update header with totals ph.u_len = total_in; ph.c_len = total_out; + upx_byte const *p = getLoader(); + lsize = getLoaderSize(); + patchFilter32(const_cast(p), lsize, &ft); + fo->write(p, lsize); + + unsigned pos = obuf.seek(0, SEEK_CUR); + fo->write(obuf - pos, pos); + // write packheader writePackHeader(fo); @@ -408,6 +360,8 @@ void PackLinuxI386elf::pack(OutputFile *fo) fo->write(obuf, 4); updateLoader(fo); + fo->seek(0, SEEK_SET); + fo->rewrite(p, lsize); // finally check the compression ratio if (!checkFinalCompressionRatio(fo)) @@ -417,12 +371,16 @@ void PackLinuxI386elf::pack(OutputFile *fo) void PackLinuxI386elf::unpackExtent(unsigned wanted, OutputFile *fo, unsigned &total_in, unsigned &total_out, - unsigned &c_adler, unsigned &u_adler) + unsigned &c_adler, unsigned &u_adler, + bool first_PF_X +) { while (wanted) { fi->readx(ibuf, 8); int const sz_unc = ph.u_len = get_native32(ibuf+0); - int const sz_cpr = ph.c_len = get_native32(ibuf+4); + unsigned const tmp = get_native32(ibuf+4); + int const sz_cpr = ph.c_len = tmp>>8; + ph.filter_cto = tmp<<24; if (sz_unc == 0) { // must never happen while 0!=wanted throwCompressedDataViolation(); @@ -441,6 +399,15 @@ void PackLinuxI386elf::unpackExtent(unsigned wanted, OutputFile *fo, if (sz_cpr < sz_unc) { decompress(ibuf+j, ibuf, false); + if (first_PF_X) { // Elf32_Ehdr is never filtered + first_PF_X = false; // but everything else might be + } + else if (ph.filter) { + Filter ft(ph.level); + ft.init(ph.filter, 0); + ft.cto = ph.filter_cto; + ft.unfilter(ibuf, sz_unc); + } j = 0; } // update checksum of uncompressed data @@ -473,7 +440,9 @@ void PackLinuxI386elf::unpack(OutputFile *fo) ibuf.alloc(blocksize + OVERHEAD); fi->readx(ibuf, 2*4); ph.u_len = get_native32(0+ibuf); - ph.c_len = get_native32(4+ibuf); + unsigned const tmp = get_native32(4+ibuf); + ph.c_len = tmp>>8; + ph.filter_cto = tmp<<24; // Uncompress Ehdr and Phdrs. fi->readx(ibuf, ph.c_len); @@ -486,6 +455,7 @@ void PackLinuxI386elf::unpack(OutputFile *fo) off_t ptload0hi=0, ptload1lo=0; // decompress PT_LOAD + bool first_PF_X = true; fi->seek(- (off_t) (2*4 + ph.c_len), SEEK_CUR); for (unsigned j=0; j < ehdr->e_phnum; ++phdr, ++j) { if (PT_LOAD==phdr->p_type) { @@ -497,19 +467,29 @@ void PackLinuxI386elf::unpack(OutputFile *fo) } if (fo) fo->seek(phdr->p_offset, SEEK_SET); - unpackExtent(phdr->p_filesz, fo, total_in, total_out, c_adler, u_adler); + if (Elf_LE32_Phdr::PF_X & phdr->p_flags) { + unpackExtent(phdr->p_filesz, fo, total_in, total_out, + c_adler, u_adler, first_PF_X); + first_PF_X = false; + } + else { + unpackExtent(phdr->p_filesz, fo, total_in, total_out, + c_adler, u_adler, false); + } } } if (ptload0hi < ptload1lo) { // alignment hole? if (fo) fo->seek(ptload0hi, SEEK_SET); - unpackExtent(ptload1lo - ptload0hi, fo, total_in, total_out, c_adler, u_adler); + unpackExtent(ptload1lo - ptload0hi, fo, total_in, total_out, + c_adler, u_adler, false); } if (total_out != orig_file_size) { // non-PT_LOAD stuff if (fo) fo->seek(0, SEEK_END); - unpackExtent(orig_file_size - total_out, fo, total_in, total_out, c_adler, u_adler); + unpackExtent(orig_file_size - total_out, fo, total_in, total_out, + c_adler, u_adler, false); } // check for end-of-file diff --git a/src/p_lx_elf.h b/src/p_lx_elf.h index 39669f19..0f8b63f2 100644 --- a/src/p_lx_elf.h +++ b/src/p_lx_elf.h @@ -59,15 +59,17 @@ protected: off_t offset; off_t size; }; - virtual void packExtent(Extent const &x, OutputFile *fo, + virtual void packExtent(Extent const &x, unsigned &total_in, unsigned &total_out, Filter *); virtual void unpackExtent(unsigned wanted, OutputFile *fo, unsigned &total_in, unsigned &total_out, - unsigned &c_adler, unsigned &u_adler); + unsigned &c_adler, unsigned &u_adler, bool first_PF_X); protected: +#if 0 //{ virtual const upx_byte *getLoader() const; virtual int getLoaderSize() const; +#endif //} virtual void patchLoader(); virtual void updateLoader(OutputFile *); @@ -75,7 +77,6 @@ protected: Elf_LE32_Ehdr ehdri; // from input file Elf_LE32_Phdr *phdri; // for input file - unsigned n_mru; }; diff --git a/src/p_lx_exc.cpp b/src/p_lx_exc.cpp index d55caca7..45e8e471 100644 --- a/src/p_lx_exc.cpp +++ b/src/p_lx_exc.cpp @@ -4,6 +4,7 @@ Copyright (C) 1996-2001 Markus Franz Xaver Johannes Oberhumer Copyright (C) 1996-2001 Laszlo Molnar + Copyright (C) 2001 John F. Reiser All Rights Reserved. UPX and the UCL library are free software; you can redistribute them @@ -21,14 +22,16 @@ If not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - Markus F.X.J. Oberhumer Laszlo Molnar - markus@oberhumer.com ml1050@cdata.tvnet.hu + Markus F.X.J. Oberhumer Laszlo Molnar John F. Reiser + markus@oberhumer.com ml1050@cdata.tvnet.hu jreiser@BitWagon.com */ #include "conf.h" #include "file.h" +#include "filter.h" +#include "linker.h" #include "packer.h" #include "p_elf.h" #include "p_unix.h" @@ -42,9 +45,9 @@ **************************************************************************/ static const -#include "stub/l_lx_n2b.h" +#include "stub/l_lx_exec86.h" static const -#include "stub/l_lx_n2d.h" +#include "stub/fold_exec86.h" const int *PackLinuxI386::getCompressionMethods(int method, int level) const @@ -61,29 +64,196 @@ const int *PackLinuxI386::getCompressionMethods(int method, int level) const return m_nrv2d; } - -const upx_byte *PackLinuxI386::getLoader() const +int const * +PackLinuxI386::getFilters() const { - if (M_IS_NRV2B(ph.method)) - return linux_i386exec_nrv2b_loader; - if (M_IS_NRV2D(ph.method)) - return linux_i386exec_nrv2d_loader; - return NULL; + static const int filters[] = { + 0x49, 0x46, + 0x83, 0x86, 0x80, 0x84, 0x87, 0x81, 0x82, 0x85, + 0x24, 0x16, 0x13, 0x14, 0x11, 0x25, 0x15, 0x12, + -1 }; + return filters; } +struct cprElfhdr { + struct Elf_LE32_Ehdr ehdr; + struct Elf_LE32_Phdr phdr[2]; + struct PackUnix::l_info linfo; +}; -int PackLinuxI386::getLoaderSize() const +int +PackLinuxI386::buildLinuxLoader( + upx_byte const *const proto, + unsigned const szproto, + upx_byte const *const fold, + unsigned const szfold, + Filter const *ft, + unsigned const brka +) { - if (0!=lsize) { - return lsize; + initLoader(proto, szproto); + + struct cprElfhdr elfhdr = *(struct cprElfhdr const *)fold; + memset(&elfhdr.linfo, 0, sizeof(elfhdr.linfo)); + + // Info for OS kernel to set the brk() + if (brka) { + assert(ph.format==UPX_F_LINUX_ELF_i386 + || ph.format==UPX_F_LINUX_SH_i386 ); + if (ph.format==UPX_F_LINUX_SH_i386) { + assert(elfhdr.ehdr.e_phnum==1); + elfhdr.ehdr.e_phnum = 2; + } + elfhdr.phdr[1].p_offset = 0xfff&brka; + elfhdr.phdr[1].p_vaddr = brka; + elfhdr.phdr[1].p_paddr = brka; + elfhdr.phdr[1].p_filesz = 0; + elfhdr.phdr[1].p_memsz = 0; } - if (M_IS_NRV2B(ph.method)) - return sizeof(linux_i386exec_nrv2b_loader); - if (M_IS_NRV2D(ph.method)) - return sizeof(linux_i386exec_nrv2d_loader); - return 0; + + // The beginning of our loader consists of a elf_hdr (52 bytes) and + // two sections elf_phdr (2 * 32 byte), so we have 12 free bytes + // from offset 116 to the program start at offset 128. + assert(elfhdr.ehdr.e_phoff == sizeof(Elf_LE32_Ehdr)); + assert(elfhdr.ehdr.e_shoff == 0); + assert(elfhdr.ehdr.e_ehsize == sizeof(Elf_LE32_Ehdr)); + assert(elfhdr.ehdr.e_phentsize == sizeof(Elf_LE32_Phdr)); + assert(elfhdr.ehdr.e_phnum == (unsigned)(1+ (0!=brka))); + assert(elfhdr.ehdr.e_shnum == 0); + linker->addSection("ELFHEADX", (unsigned char const *)&elfhdr, sizeof(elfhdr)); + addLoader("ELFHEADX", 0); + + struct { + upx_uint sz_unc; // uncompressed + upx_uint sz_cpr; // compressed + } h = { + szfold - sizeof(elfhdr), 0 + }; + unsigned char const *const uncLoader = (unsigned char const *)(1+ + (struct cprElfhdr const *)fold ); + unsigned char *const cprLoader = new unsigned char[sizeof(h) + h.sz_unc]; + + int r = upx_compress(uncLoader, h.sz_unc, sizeof(h) + cprLoader, &h.sz_cpr, + NULL, ph.method, 10, NULL, NULL ); + if (r != UPX_E_OK || h.sz_cpr >= h.sz_unc) + throwInternalError("loader compression failed"); + memcpy(cprLoader, &h, sizeof(h)); + linker->addSection("FOLDEXEC", cprLoader, sizeof(h) + h.sz_cpr); + delete cprLoader; + + n_mru = ft->n_mru; + + addLoader("IDENTSTR", 0); + addLoader("LEXEC000", 0); + + if (ft->id) { + if (ph.format==UPX_F_LINUX_ELF_i386) { // decompr, unfilter are separate + addLoader("LXUNF000", 0); + addLoader("LXUNF002", 0); + if (0x80==(ft->id & 0xF0)) { + if (256==n_mru) { + addLoader("MRUBYTE0", 0); + } + else if (n_mru) { + addLoader("LXMRU005", 0); + } + if (n_mru) { + addLoader("LXMRU006", 0); + } + else { + addLoader("LXMRU007", 0); + } + } + else if (0x40==(ft->id & 0xF0)) { + addLoader("LXUNF008", 0); + } + addLoader("LXUNF010", 0); + } + if (n_mru) { + addLoader("LEXEC009", 0); + } + } + addLoader("LEXEC010", 0); + addLoader(getDecompressor(), 0); + addLoader("LEXEC015", 0); + if (ft->id) { + if (ph.format==UPX_F_LINUX_ELF_i386) { // decompr, unfilter are separate + if (0x80!=(ft->id & 0xF0)) { + addLoader("LXUNF042", 0); + } + } + else { // decompr, unfilter not separate + if (0x80==(ft->id & 0xF0)) { + addLoader("LEXEC110", 0); + if (n_mru) { + addLoader("LEXEC100", 0); + } + // bug in APP: jmp and label must be in same .asx/.asy + addLoader("LEXEC016", 0); + } + } + addFilter32(ft->id); + if (ph.format==UPX_F_LINUX_ELF_i386) { // decompr, unfilter are separate + if (0x80==(ft->id & 0xF0)) { + if (0==n_mru) { + addLoader("LXMRU058", 0); + } + } + addLoader("LXUNF035", 0); + } + else { // decompr always unfilters + addLoader("LEXEC017", 0); + } + } + + addLoader("LEXEC020", 0); + addLoader("FOLDEXEC", 0); + + struct Elf_LE32_Ehdr *const ldrEhdr = + (struct Elf_LE32_Ehdr *)const_cast(getLoader()); + unsigned e_entry = getLoaderSectionStart("LEXEC000"); + ldrEhdr->e_entry = e_entry + elfhdr.phdr[0].p_vaddr; + + char *ptr_cto = e_entry + (char *)ldrEhdr; + int sz_cto = getLoaderSize() - e_entry; + if (0x20==(ft->id & 0xF0) || 0x30==(ft->id & 0xF0)) { // push byte '?' ; cto8 + patch_le16(ptr_cto, sz_cto, "\x6a?", 0x6a + (ft->cto << 8)); + } + + // FIXME ?? patchVersion((char *)ldrEhdr, e_entry); + return getLoaderSize(); } +int +PackLinuxI386::buildLoader(Filter const *ft) +{ + unsigned const sz_fold = sizeof(linux_i386exec_fold); + MemBuffer buf(sz_fold); + memcpy(buf, linux_i386exec_fold, sz_fold); + + // patch loader + // note: we only can use /proc//fd when exetype > 0. + // also, we sleep much longer when compressing a script. + checkPatch(0,0,0,0); // reset + patch_le32(buf,sz_fold,"UPX4",exetype > 0 ? 3 : 15); // sleep time + patch_le32(buf,sz_fold,"UPX3",progid); + patch_le32(buf,sz_fold,"UPX2",exetype > 0 ? 0 : 0x7fffffff); + + // get fresh filter + Filter fold_ft = *ft; + fold_ft.init(ft->id, ft->addvalue); + int preferred_ctos[2] = {ft->cto, -1}; + fold_ft.preferred_ctos = preferred_ctos; + + // filter + optimizeFilter(&fold_ft, buf, sz_fold); + bool success = fold_ft.filter(buf + sizeof(cprElfhdr), sz_fold - sizeof(cprElfhdr)); + (void)success; + + return buildLinuxLoader( + linux_i386exec_loader, sizeof(linux_i386exec_loader), + buf, sz_fold, ft, 0 ); +} int PackLinuxI386::getLoaderPrefixSize() const { @@ -198,58 +368,13 @@ bool PackLinuxI386::canPack() void PackLinuxI386::patchLoader() { - lsize = getLoaderSize(); - - // patch loader - // note: we only can use /proc//fd when exetype > 0. - // also, we sleep much longer when compressing a script. - patch_le32(loader,lsize,"UPX4",exetype > 0 ? 3 : 15); // sleep time - patch_le32(loader,lsize,"UPX3",exetype > 0 ? 0 : 0x7fffffff); - patch_le32(loader,lsize,"UPX2",progid); - patchVersion(loader,lsize); - - Elf_LE32_Ehdr *const ehdr = (Elf_LE32_Ehdr *)(void *)loader; - Elf_LE32_Phdr *const phdr = (Elf_LE32_Phdr *)(1+ehdr); - - // stub/scripts/setfold.pl puts address of 'fold_begin' in phdr[1].p_offset - off_t const fold_begin = phdr[1].p_offset; - assert(fold_begin > 0); - assert(fold_begin < (off_t)lsize); - MemBuffer cprLoader(lsize); - - // compress compiled C-code portion of loader - upx_uint const uncLsize = lsize - fold_begin; - upx_uint cprLsize; - int r = upx_compress(loader + fold_begin, uncLsize, cprLoader, &cprLsize, - NULL, ph.method, 10, NULL, NULL); - if (r != UPX_E_OK || cprLsize >= uncLsize) - throwInternalError("loaded compression failed"); - - memcpy(fold_begin+loader, cprLoader, cprLsize); - lsize = fold_begin + cprLsize; - phdr->p_filesz = lsize; - // phdr->p_memsz is the decompressed size - - // The beginning of our loader consists of a elf_hdr (52 bytes) and - // one section elf_phdr (32 byte) now, - // another section elf_phdr (32 byte) later, so we have 12 free bytes - // from offset 116 to the program start at offset 128. - // These 12 bytes are used for l_info by ::patchLoaderChecksum(). - assert(get_le32(loader + 28) == 52); // e_phoff - assert(get_le32(loader + 32) == 0); // e_shoff - assert(get_le16(loader + 40) == 52); // e_ehsize - assert(get_le16(loader + 42) == 32); // e_phentsize - assert(get_le16(loader + 44) == 1); // e_phnum - assert(get_le16(loader + 48) == 0); // e_shnum - assert(lsize > 128 && lsize < 4096); - - patchLoaderChecksum(); } void PackLinuxI386::patchLoaderChecksum() { - l_info *const lp = (l_info *)(loader + getLoaderPrefixSize()); + unsigned char *const ptr = const_cast(getLoader()); + l_info *const lp = (l_info *)(ptr + getLoaderPrefixSize()); // checksum for loader + p_info lp->l_checksum = 0; lp->l_magic = UPX_ELF_MAGIC; @@ -258,7 +383,7 @@ void PackLinuxI386::patchLoaderChecksum() lp->l_format = (unsigned char) ph.format; // INFO: lp->l_checksum is currently unused unsigned adler = upx_adler32(0,NULL,0); - adler = upx_adler32(adler, loader, lsize + sizeof(p_info)); + adler = upx_adler32(adler, ptr, lsize + sizeof(p_info)); lp->l_checksum = adler; } @@ -266,13 +391,18 @@ void PackLinuxI386::patchLoaderChecksum() void PackLinuxI386::updateLoader(OutputFile *fo) { #define PAGE_MASK (~0<<12) - Elf_LE32_Ehdr *const ehdr = (Elf_LE32_Ehdr *)(unsigned char *)loader; + Elf_LE32_Ehdr *const ehdr = (Elf_LE32_Ehdr *)const_cast(getLoader()); + Elf_LE32_Phdr *const phdr = (Elf_LE32_Phdr *)(1+ehdr); + ehdr->e_phnum = 2; + phdr->p_filesz = lsize; + // phdr->p_memsz is the decompressed size + assert(lsize > 128 && lsize < 4096); // The first Phdr maps the stub (instructions, data, bss) rwx. // The second Phdr maps the overlay r--, // to defend against /usr/bin/strip removing the overlay. - Elf_LE32_Phdr *const phdro = 1+(Elf_LE32_Phdr *)(1+ehdr); + Elf_LE32_Phdr *const phdro = 1+phdr; phdro->p_type = PT_LOAD; phdro->p_offset = lsize; @@ -282,8 +412,6 @@ void PackLinuxI386::updateLoader(OutputFile *fo) phdro->p_align = (unsigned) (-PAGE_MASK); patchLoaderChecksum(); - fo->seek(0, SEEK_SET); - fo->rewrite(loader, 0x80); #undef PAGE_MASK } diff --git a/src/p_lx_exc.h b/src/p_lx_exc.h index f9e37da1..2f38360d 100644 --- a/src/p_lx_exc.h +++ b/src/p_lx_exc.h @@ -42,14 +42,28 @@ public: virtual int getFormat() const { return UPX_F_LINUX_i386; } virtual const char *getName() const { return "linux/386"; } virtual const int *getCompressionMethods(int method, int level) const; + virtual const int *getFilters() const; + virtual int buildLoader(const Filter *); virtual bool canPack(); protected: // loader util - virtual const upx_byte *getLoader() const; - virtual int getLoaderSize() const; virtual int getLoaderPrefixSize() const; + virtual int buildLinuxLoader( + upx_byte const *const proto, // assembly-only sections + unsigned const szproto, + upx_byte const *const fold, // linked assembly + C section + unsigned const szfold, + Filter const *ft, + unsigned const brka + ); + + struct cprElfhdr { + struct Elf_LE32_Ehdr ehdr; + struct Elf_LE32_Phdr phdr[2]; + struct PackUnix::l_info linfo; + }; // patch util virtual void patchLoader(); @@ -63,6 +77,8 @@ protected: enum { UPX_ELF_MAGIC = 0x5850557f // "\x7fUPX" }; + + unsigned n_mru; }; diff --git a/src/p_lx_sh.cpp b/src/p_lx_sh.cpp index 22f9f0df..195de3a7 100644 --- a/src/p_lx_sh.cpp +++ b/src/p_lx_sh.cpp @@ -30,6 +30,7 @@ #include "conf.h" #include "file.h" +#include "filter.h" #include "packer.h" #include "p_elf.h" #include "p_unix.h" @@ -44,9 +45,9 @@ **************************************************************************/ static const -#include "stub/l_sh_n2b.h" +#include "stub/l_lx_sh86.h" static const -#include "stub/l_sh_n2d.h" +#include "stub/fold_sh86.h" PackLinuxI386sh::PackLinuxI386sh(InputFile *f) : @@ -58,76 +59,35 @@ PackLinuxI386sh::~PackLinuxI386sh() { } -const upx_byte *PackLinuxI386sh::getLoader() const +int +PackLinuxI386sh::buildLoader(Filter const *ft) { - if (M_IS_NRV2B(ph.method)) - return linux_i386sh_nrv2b_loader; - if (M_IS_NRV2D(ph.method)) - return linux_i386sh_nrv2d_loader; - return NULL; -} + unsigned const sz_fold = sizeof(linux_i386sh_fold); + MemBuffer buf(sz_fold); + memcpy(buf, linux_i386sh_fold, sz_fold); -int PackLinuxI386sh::getLoaderSize() const -{ - if (0 != lsize) { - return lsize; - } - if (M_IS_NRV2B(ph.method)) - return sizeof(linux_i386sh_nrv2b_loader); - if (M_IS_NRV2D(ph.method)) - return sizeof(linux_i386sh_nrv2d_loader); - return 0; -} + checkPatch(0,0,0,0); // reset + patch_le32(buf,sz_fold,"UPX3",l_shname); + patch_le32(buf,sz_fold,"UPX2",o_shname); + // get fresh filter + Filter fold_ft = *ft; + fold_ft.init(ft->id, ft->addvalue); + int preferred_ctos[2] = {ft->cto, -1}; + fold_ft.preferred_ctos = preferred_ctos; + + // filter + optimizeFilter(&fold_ft, buf, sz_fold); + bool success = fold_ft.filter(buf + sizeof(cprElfhdr), sz_fold - sizeof(cprElfhdr)); + (void)success; + + return buildLinuxLoader( + linux_i386sh_loader, sizeof(linux_i386sh_loader), + buf, sz_fold, ft, 0x08048000 ); +} void PackLinuxI386sh::patchLoader() { - lsize = getLoaderSize(); - Elf_LE32_Ehdr *const ehdr = (Elf_LE32_Ehdr *)(void *)loader; - Elf_LE32_Phdr *const phdr = (Elf_LE32_Phdr *)(1+ehdr); - - patch_le32(loader,lsize,"UPX3",l_shname); - patch_le32(loader,lsize,"UPX2",o_shname); - - // stub/scripts/setfold.pl puts address of 'fold_begin' in phdr[1].p_offset - off_t const fold_begin = phdr[1].p_offset; - assert(fold_begin > 0); - assert(fold_begin < (off_t)lsize); - MemBuffer cprLoader(lsize); - - // compress compiled C-code portion of loader - upx_uint const uncLsize = lsize - fold_begin; - upx_uint cprLsize; - int r = upx_compress(loader + fold_begin, uncLsize, cprLoader, &cprLsize, - NULL, ph.method, 10, NULL, NULL); - if (r != UPX_E_OK || cprLsize >= uncLsize) - throwInternalError("loaded compression failed"); - - set_le32(0+fold_begin+loader, uncLsize); - set_le32(4+fold_begin+loader, cprLsize); - memcpy( 8+fold_begin+loader, cprLoader, cprLsize); - lsize = 8 + fold_begin + cprLsize; - patchVersion(loader,lsize); - - // Info for OS kernel to set the brk() - unsigned const brka = getbrk(phdr, ehdr->e_phnum); - phdr[1].p_offset = 0xfff&brka; - phdr[1].p_vaddr = brka; - phdr[1].p_paddr = brka; - phdr[1].p_filesz = 0; - phdr[1].p_memsz = 0; - - // The beginning of our loader consists of a elf_hdr (52 bytes) and - // two sections elf_phdr (2 * 32 byte), so we have 12 free bytes - // from offset 116 to the program start at offset 128. - assert(ehdr->e_phoff == sizeof(Elf_LE32_Ehdr)); - assert(ehdr->e_ehsize == sizeof(Elf_LE32_Ehdr)); - assert(ehdr->e_phentsize == sizeof(Elf_LE32_Phdr)); - assert(ehdr->e_phnum == 2); - assert(ehdr->e_shnum == 0); - assert(lsize > 128 && lsize < 4096); - - patchLoaderChecksum(); } @@ -181,7 +141,8 @@ void PackLinuxI386sh::pack(OutputFile *fo) PackUnix::pack(fo); // update loader - Elf_LE32_Phdr *const phdro = (Elf_LE32_Phdr *)(sizeof(Elf_LE32_Ehdr)+loader); + Elf_LE32_Ehdr *const ehdr = (Elf_LE32_Ehdr *)const_cast(getLoader()); + Elf_LE32_Phdr *const phdro = (Elf_LE32_Phdr *)(1+ehdr); off_t const totlen = fo->getBytesWritten(); phdro[0].p_filesz = totlen; phdro[0].p_memsz = PAGE_MASK & (~PAGE_MASK + totlen); @@ -196,7 +157,7 @@ void PackLinuxI386sh::pack(OutputFile *fo) patchLoaderChecksum(); fo->seek(0, SEEK_SET); - fo->rewrite(loader, 0x80); + fo->rewrite(ehdr, sizeof(cprElfhdr)); #undef PAGE_MASK } diff --git a/src/p_lx_sh.h b/src/p_lx_sh.h index 90f95188..c56ca94d 100644 --- a/src/p_lx_sh.h +++ b/src/p_lx_sh.h @@ -45,16 +45,16 @@ public: virtual int getFormat() const { return UPX_F_LINUX_SH_i386; } virtual const char *getName() const { return "linux/sh386"; } virtual const int *getFilters() const { return NULL; } + virtual int buildLoader(const Filter *); + + virtual void pack(OutputFile *fo); virtual bool canPack(); - virtual void pack(OutputFile *fo); // virtual void unpack(OutputFile *fo) { super::unpack(fo); } virtual bool canUnpackVersion(int version) const { return (version >= 11); } protected: - virtual const upx_byte *getLoader() const; - virtual int getLoaderSize() const; virtual bool getShellName(char *buf); virtual void patchLoader(); diff --git a/src/p_unix.cpp b/src/p_unix.cpp index 823b744a..e90931b2 100644 --- a/src/p_unix.cpp +++ b/src/p_unix.cpp @@ -29,6 +29,7 @@ #include "conf.h" #include "file.h" +#include "filter.h" #include "packer.h" #include "p_unix.h" #include "p_elf.h" @@ -125,22 +126,16 @@ void PackUnix::pack(OutputFile *fo) // create a pseudo-unique program id for our paranoid stub progid = getRandomId(); - // prepare loader - lsize = getLoaderSize(); - loader.alloc(lsize + sizeof(p_info)); - memcpy(loader,getLoader(),lsize); - - // patch loader, prepare header info - patchLoader(); // can change lsize by packing C-code of upx_main etc. - p_info *const hbuf = (p_info *)(loader + lsize); - set_native32(&hbuf->p_progid, progid); - set_native32(&hbuf->p_filesize, file_size); - set_native32(&hbuf->p_blocksize, blocksize); - fo->write(loader, lsize + sizeof(p_info)); - // init compression buffers ibuf.alloc(blocksize); obuf.allocForCompression(blocksize); + { + p_info hbuf; + set_native32(&hbuf.p_progid, progid); + set_native32(&hbuf.p_filesize, file_size); + set_native32(&hbuf.p_blocksize, blocksize); + obuf.write(&hbuf, sizeof(hbuf)); + } // compress blocks unsigned total_in = 0; @@ -148,6 +143,10 @@ void PackUnix::pack(OutputFile *fo) ui_total_passes = (file_size + blocksize - 1) / blocksize; if (ui_total_passes == 1) ui_total_passes = 0; + + Filter ft(ph.level); + ft.addvalue = 0; + fi->seek(0, SEEK_SET); for (;;) { @@ -159,9 +158,19 @@ void PackUnix::pack(OutputFile *fo) // file is e.g. blocksize + 1 bytes long // compress - ph.u_len = l; + ph.c_len = ph.u_len = l; ph.overlap_overhead = 0; - (void) compress(ibuf, obuf); // ignore return value + ft.buf_len = l; + + // leave room for block sizes + unsigned char size[8]; + set_native32(size+0, ph.u_len); + set_native32(size+4, ph.c_len); // will be rewritten + obuf.write(size, 8); + + // If user specified the filter, then use it (-2==strategy). + // Else try the first two filters, and pick the better (2==strategy). + compressWithFilters(&ft, OVERHEAD, ((opt->filter > 0) ? -2 : 2)); if (ph.c_len < ph.u_len) { @@ -177,20 +186,18 @@ void PackUnix::pack(OutputFile *fo) ph.c_adler = upx_adler32(ph.c_adler, ibuf, ph.u_len); } - // write block sizes - unsigned char size[8]; - set_native32(size+0, ph.u_len); - set_native32(size+4, ph.c_len); - fo->write(size, 8); + // update compressed size + set_native32(obuf - 4, ph.c_len); // write compressed data if (ph.c_len < ph.u_len) { - fo->write(obuf, ph.c_len); - verifyOverlappingDecompression(); + obuf.write(obuf, ph.c_len); + // FIXME: obuf is not discardable! + // verifyOverlappingDecompression(); } else - fo->write(ibuf, ph.u_len); + obuf.write(ibuf, ph.u_len); total_in += ph.u_len; total_out += ph.c_len; @@ -199,12 +206,21 @@ void PackUnix::pack(OutputFile *fo) throwEOFException(); // write block end marker (uncompressed size 0) - fo->write("\x00\x00\x00\x00", 4); + set_native32(obuf, 0); + obuf.write(obuf, 4); // update header with totals ph.u_len = total_in; ph.c_len = total_out; + upx_byte const *p = getLoader(); + lsize = getLoaderSize(); + patchFilter32(const_cast(p), lsize, &ft); + fo->write(p, lsize); + + unsigned pos = obuf.seek(0, SEEK_CUR); + fo->write(obuf - pos, pos); + // write packheader writePackHeader(fo); @@ -213,6 +229,8 @@ void PackUnix::pack(OutputFile *fo) fo->write(obuf, 4); updateLoader(fo); + fo->seek(0, SEEK_SET); + fo->rewrite(p, lsize); // finally check the compression ratio if (!checkFinalCompressionRatio(fo)) diff --git a/src/packer.cpp b/src/packer.cpp index 9b4e1d89..187c8785 100644 --- a/src/packer.cpp +++ b/src/packer.cpp @@ -1042,29 +1042,259 @@ const char *Packer::getDecompressor() const } +#define NOFILT 0 // no filter +#define FNOMRU 1 // filter, but not using mru +#define MRUFLT 2 // mru filter + +static unsigned +f80_call(int fid) +{ + return (1+ (0x0f & fid)) % 3; +} + +static unsigned +f80_jmp1(int fid) +{ + return ((1+ (0x0f & fid)) / 3) % 3; +} + +static unsigned +f80_jcc2(int fid) +{ + return f80_jmp1(fid); +} + void Packer::addFilter32(int filter_id) { assert(filter_id > 0); assert(isValidFilter(filter_id)); - if ((filter_id & 0xf) % 3 == 0) - addLoader("CALLTR00", - (filter_id > 0x20) ? "CTCLEVE1" : "", - "CALLTR01", - (filter_id & 0xf) > 3 ? (filter_id > 0x20 ? "CTBSHR01""CTBSWA01" : "CTBROR01""CTBSWA01") : "", - "CALLTR02", - NULL - ); - else - addLoader("CALLTR10", - (filter_id & 0xf) % 3 == 1 ? "CALLTRE8" : "CALLTRE9", - "CALLTR11", - (filter_id > 0x20) ? "CTCLEVE2" : "", - "CALLTR12", - (filter_id & 0xf) > 3 ? (filter_id > 0x20 ? "CTBSHR11""CTBSWA11" : "CTBROR11""CTBSWA11") : "", - "CALLTR13", - NULL - ); + if (filter_id < 0x80) { + if ((filter_id & 0xf) % 3 == 0) { + if (filter_id < 0x40) { + addLoader("CALLTR00", + (filter_id > 0x20) ? "CTCLEVE1" : "", + "CALLTR01", + (filter_id & 0xf) > 3 ? (filter_id > 0x20 ? "CTBSHR01""CTBSWA01" : "CTBROR01""CTBSWA01") : "", + "CALLTR02", + NULL + ); + } + else if (0x40==(0xF0 & filter_id)) { + addLoader("CKLLTR00", 0); + if (9<=(0xf & filter_id)) { + addLoader("CKLLTR10", 0); + } + addLoader("CKLLTR20", 0); + if (9<=(0xf & filter_id)) { + addLoader("CKLLTR30", 0); + } + addLoader("CKLLTR40", 0); + } + } + else + addLoader("CALLTR10", + (filter_id & 0xf) % 3 == 1 ? "CALLTRE8" : "CALLTRE9", + "CALLTR11", + (filter_id > 0x20) ? "CTCLEVE2" : "", + "CALLTR12", + (filter_id & 0xf) > 3 ? (filter_id > 0x20 ? "CTBSHR11""CTBSWA11" : "CTBROR11""CTBSWA11") : "", + "CALLTR13", + NULL + ); + } + if (0x80==(filter_id & 0xF0)) { + bool const x386 = (opt->cpu <= opt->CPU_386); + unsigned const n_mru = ph.n_mru ? 1+ ph.n_mru : 0; + bool const mrupwr2 = (0!=n_mru) && 0==((n_mru -1) & n_mru); + unsigned const f_call = f80_call(filter_id); + unsigned const f_jmp1 = f80_jmp1(filter_id); + unsigned const f_jcc2 = f80_jcc2(filter_id); + + if (NOFILT!=f_jcc2) { + addLoader("LXJCC010", 0); + if (n_mru) { + addLoader("LXMRU045", 0); + } + else { + addLoader("LXMRU046", 0); + } + if (0==n_mru || MRUFLT!=f_jcc2) { + addLoader("LXJCC020", 0); + } + else { // 0!=n_mru + addLoader("LXJCC021", 0); + } + if (NOFILT!=f_jcc2) { + addLoader("LXJCC023", 0); + } + } + addLoader("LXUNF037", 0); + if (x386) { + if (n_mru) { + addLoader("LXUNF386", 0); + } + addLoader("LXUNF387", 0); + if (n_mru) { + addLoader("LXUNF388", 0); + } + } + else { + addLoader("LXUNF486", 0); + if (n_mru) { + addLoader("LXUNF487", 0); + } + } + if (n_mru) { + addLoader("LXMRU065", 0); + if (256==n_mru) { + addLoader("MRUBYTE3", 0); + } + else { + addLoader("MRUARB30", 0); + if (mrupwr2) { + addLoader("MRUBITS3", 0); + } + else { + addLoader("MRUARB40", 0); + } + } + addLoader("LXMRU070", 0); + if (256==n_mru) { + addLoader("MRUBYTE4", 0); + } + else if (mrupwr2) { + addLoader("MRUBITS4", 0); + } + else { + addLoader("MRUARB50", 0); + } + addLoader("LXMRU080", 0); + if (256==n_mru) { + addLoader("MRUBYTE5", 0); + } + else { + addLoader("MRUARB60", 0); + if (mrupwr2) { + addLoader("MRUBITS5", 0); + } + else { + addLoader("MRUARB70", 0); + } + } + addLoader("LXMRU090", 0); + if (256==n_mru) { + addLoader("MRUBYTE6", 0); + } + else { + addLoader("MRUARB80", 0); + if (mrupwr2) { + addLoader("MRUBITS6", 0); + } + else { + addLoader("MRUARB90", 0); + } + } + addLoader("LXMRU100", 0); + } + addLoader("LXUNF040", 0); + if (n_mru) { + addLoader("LXMRU110", 0); + } + else { + addLoader("LXMRU111", 0); + } + addLoader("LXUNF041", 0); + addLoader("LXUNF042", 0); + if (n_mru) { + addLoader("LXMRU010", 0); + if (NOFILT!=f_jmp1 && NOFILT==f_call) { + addLoader("LXJMPA00", 0); + } + else { + addLoader("LXCALLB0", 0); + } + addLoader("LXUNF021", 0); + } + else { + addLoader("LXMRU022", 0); + if (NOFILT!=f_jmp1 && NOFILT==f_call) { + addLoader("LXJMPA01", 0); + } + else { + addLoader("LXCALLB1", 0); + } + } + if (n_mru) { + if (256!=n_mru && mrupwr2) { + addLoader("MRUBITS1", 0); + } + addLoader("LXMRU030", 0); + if (256==n_mru) { + addLoader("MRUBYTE1", 0); + } + else { + addLoader("MRUARB10", 0); + } + addLoader("LXMRU040", 0); + } + + addLoader("LXUNF030", 0); + if (NOFILT!=f_jcc2) { + addLoader("LXJCC000", 0); + } + if (NOFILT!=f_call || NOFILT!=f_jmp1) { // at least one is filtered + // shift opcode origin to zero + if (0==n_mru) { + addLoader("LXCJ0MRU", 0); + } + else { + addLoader("LXCJ1MRU", 0); + } + + // determine if in range + if ((NOFILT!=f_call) && (NOFILT!=f_jmp1)) { // unfilter both + addLoader("LXCALJMP", 0); + } + if ((NOFILT==f_call) ^ (NOFILT==f_jmp1)) { // unfilter just one + if (0==n_mru) { + addLoader("LXCALL00", 0); + } + else { + addLoader("LXCALL01", 0); + } + } + + // determine if mru applies + if (0==n_mru || ! ((FNOMRU==f_call) || (FNOMRU==f_jmp1)) ) { + addLoader("LXCJ2MRU", 0); // no mru, or no exceptions + } + else { // mru on one, but not the other + addLoader("LXCJ4MRU", 0); + if (MRUFLT==f_jmp1) { // JMP only + addLoader("LXCJ6MRU", 0); + } else + if (MRUFLT==f_call) { // CALL only + addLoader("LXCJ7MRU", 0); + } + addLoader("LXCJ8MRU", 0); + } + } + addLoader("LXUNF034", 0); + if (n_mru) { + addLoader("LXMRU055", 0); + if (256==n_mru) { + addLoader("MRUBYTE2", 0); + } + else if (mrupwr2) { + addLoader("MRUBITS2", 0); + } + else if (n_mru) { + addLoader("MRUARB20", 0); + } + addLoader("LXMRU057", 0); + } + } } @@ -1222,6 +1452,10 @@ void Packer::compressWithFilters(Filter *parm_ft, ft.init(ph.filter, orig_ft.addvalue); // filter optimizeFilter(&ft, ibuf + filter_off, filter_len); + + unsigned char *const save = new unsigned char[filter_len]; + memcpy(save, ibuf + filter_off, filter_len); + bool success = ft.filter(ibuf + filter_off, filter_len); if (ft.id != 0 && ft.calls == 0) { @@ -1251,6 +1485,7 @@ void Packer::compressWithFilters(Filter *parm_ft, } nfilters_success++; ph.filter_cto = ft.cto; + ph.n_mru = ft.n_mru; // compress if (compress(ibuf + compress_buf_off, otemp, max_offset, max_match)) { @@ -1294,6 +1529,11 @@ void Packer::compressWithFilters(Filter *parm_ft, } // restore ibuf[] - unfilter with verify ft.unfilter(ibuf + filter_off, filter_len, true); + for (unsigned k = 0; k < filter_len; ++k) { + if ((ibuf + filter_off)[k] != save[k]) { + printf("mismatch at %d\n", k); + } + } // if (strategy < 0) break; @@ -1305,6 +1545,7 @@ void Packer::compressWithFilters(Filter *parm_ft, assert(best_ph.u_len == orig_ph.u_len); assert(best_ph.filter == best_ft.id); assert(best_ph.filter_cto == best_ft.cto); + // FIXME assert(best_ph.n_mru == best_ft.n_mru); // copy back results this->ph = best_ph; @@ -1396,10 +1637,27 @@ bool Packer::patchFilter32(void *loader, int lsize, const Filter *ft) if (ft->id == 0) return false; assert(ft->calls > 0); - if (ft->id > 0x20) - patch_le16(loader, lsize, "??", '?' + (ft->cto << 8)); - patch_le32(loader, lsize, "TEXL", (ft->id & 0xf) % 3 == 0 ? ft->calls : - ft->lastcall - ft->calls * 4); + if (ft->id < 0x80) { + if (0x40 <= ft->id && ft->id < 0x50 && UPX_F_LINUX_i386==ph.format) { + // "push byte '?'" + patch_le16(loader, lsize, "\x6a?", 0x6a + (ft->cto << 8)); + } + if (0x20 <= ft->id && ft->id < 0x40) { + // 077==modr/m of "cmp [edi], byte '?'" (compare immediate 8 bits) + patch_le16(loader, lsize, "\077?", 077 + (ft->cto << 8)); + } + if (ft->id < 0x40) { + patch_le32(loader, lsize, "TEXL", (ft->id & 0xf) % 3 == 0 ? ft->calls : + ft->lastcall - ft->calls * 4); + } + } + if (0x80==(ft->id & 0xF0)) { + int const mru = ph.n_mru ? 1+ ph.n_mru : 0; + if (mru && mru!=256) { + unsigned const is_pwr2 = (0==((mru -1) & mru)); + patch_le32(0x80 + (char *)loader, lsize - 0x80, "NMRU", mru - is_pwr2); + } + } return true; } diff --git a/src/packer.h b/src/packer.h index c7002b6b..b771c32b 100644 --- a/src/packer.h +++ b/src/packer.h @@ -74,6 +74,7 @@ public: int filter; int filter_cto; int header_checksum; + int n_mru; // unsigned saved_u_adler; @@ -250,15 +251,15 @@ protected: int ph_version; // compression buffers - MemBuffer ibuf; // input - MemBuffer obuf; // output + MemBuffer ibuf; // input + MemBufferIO obuf; // output // UI handler UiPacker *uip; int ui_pass; int ui_total_passes; -private: +protected: // linker Linker *linker; diff --git a/src/packhead.cpp b/src/packhead.cpp index 04cd51be..ed12a4b2 100644 --- a/src/packhead.cpp +++ b/src/packhead.cpp @@ -150,7 +150,8 @@ void PackHeader::putPackHeader(upx_bytep p) set_le32(p+24,u_file_size); p[28] = (unsigned char) filter; p[29] = (unsigned char) filter_cto; - p[30] = 0; + p[30] = (unsigned char) (n_mru ? 0 : n_mru -1); + size = 32; } set_le32(p+8,u_adler); set_le32(p+12,c_adler); @@ -166,7 +167,8 @@ void PackHeader::putPackHeader(upx_bytep p) set_be32(p+24,u_file_size); p[28] = (unsigned char) filter; p[29] = (unsigned char) filter_cto; - p[30] = 0; + p[30] = (unsigned char) (n_mru ? 0 : n_mru -1); + size = 32; } p[4] = (unsigned char) version; @@ -246,6 +248,7 @@ bool PackHeader::fillPackHeader(const upx_bytep buf, int blen) u_file_size = get_le32(p+24); off_filter = 28; filter_cto = p[29]; + n_mru = p[30] ? 1+ p[30] : 0; } } else @@ -257,6 +260,7 @@ bool PackHeader::fillPackHeader(const upx_bytep buf, int blen) u_file_size = get_be32(p+24); off_filter = 28; filter_cto = p[29]; + n_mru = p[30] ? 1+ p[30] : 0; } if (version >= 10) diff --git a/src/stub/Makefile b/src/stub/Makefile index 650a9851..9337a02f 100644 --- a/src/stub/Makefile +++ b/src/stub/Makefile @@ -31,9 +31,10 @@ STUBS = \ l_tmt.h \ l_wcle.h \ l_w32pe.h \ - l_lx_n2b.h l_lx_n2d.h \ - l_le_n2b.h l_le_n2d.h \ - l_sh_n2b.h l_sh_n2d.h \ + l_lx_exec86.h fold_exec86.h \ + l_lx_elf86.h fold_elf86.h \ + l_lx_sh86.h fold_sh86.h \ + l_ext2.h \ l_vmlinz.h # experimental: @@ -80,7 +81,7 @@ STRIPELF = ./util/sstrip/sstrip # Use gcc 2.95.2 for smallest code. CC_LINUX_CFLAGS = -Wall -W -Wcast-align -Wcast-qual -Wwrite-strings -CC_LINUX_CFLAGS += -Werror +###CC_LINUX_CFLAGS += -Werror CC_LINUX_CFLAGS += -funsigned-char ###CC_LINUX_CFLAGS += -fwritable-strings -save-temps CC_LINUX = gcc272 -O2 -m386 -malign-functions=0 -malign-jumps=0 -malign-loops=0 $(CC_LINUX_CFLAGS) @@ -170,6 +171,10 @@ l_tmt.h: l_tmt.asx $(NASM) -f bin -o $T.bin $< $(BIN2H) $T.bin nrv_loader $@ +l_ext2.h: l_ext2.asx + $(NASM) -f bin -o $T.bin $< + $(BIN2H) $T.bin nrv_loader $@ + l_vmlinz.h: l_vmlinz.asx $(NASM) -f bin -o $T.bin $< $(BIN2H) $T.bin nrv_loader $@ @@ -249,66 +254,56 @@ l_t_x2ds.h: l_tos2.s # // linux rules (exec, elf, sh, sep) # ************************************************************************/ -l_lx_n2b.h: l_lx_exec.c l_xe_n2b.o l_lx_exec86.lds Makefile - $(CC_LINUX) -DNRV2B -o $T.o -c $< - ld -T $(srcdir)/l_lx_exec86.lds -Map $T.map -o $T.bin l_xe_n2b.o $T.o - $(SETFOLD) $T.bin +l_lx_elf86.h: l_lx_elf86.asx + $(NASM) -f bin -o $T.bin $< + $(BIN2H) $T.bin linux_i386elf_loader $@ + +l_lx_exec86.h: l_lx_exec86.asx + $(NASM) -f bin -o $T.bin $< + $(BIN2H) $T.bin linux_i386exec_loader $@ + +l_lx_sh86.h: l_lx_sh86.asx + $(NASM) -f bin -o $T.bin $< + $(BIN2H) $T.bin linux_i386sh_loader $@ + +l_lx_elf.o: l_lx_elf.c + $(CC_LINUX) -c $< + +fold_elf86.o: fold_elf86.asm + $(NASM) -f elf -o $@ $< + +fold_elf86.h: l_lx_elf.o fold_elf86.o l_lx_elf86.lds + ld -T l_lx_elf86.lds -Map $T.map -o $T.bin $T.o l_lx_elf.o + objcopy -S -R .comment -R .note $T.bin + ./util/sstrip/sstrip $T.bin $(BRANDELF) $T.bin - $(BIN2H) $T.bin linux_i386exec_nrv2b_loader $@ + $(BIN2H) $T.bin linux_i386elf_fold $@ -l_le_n2b.h: l_lx_elf.c l_6e_n2b.o l_lx_elf86.lds - $(CC_LINUX) -DNRV2B -o $T.o -c $< - ld -T $(srcdir)/l_lx_elf86.lds -Map $T.map -o $T.bin l_6e_n2b.o $T.o - $(SETFOLD) $T.bin +l_lx_exec.o: l_lx_exec.c + $(CC_LINUX) -c $< + +fold_exec86.o: fold_exec86.asm + $(NASM) -f elf -o $@ $< + +fold_exec86.h: l_lx_exec.o fold_exec86.o l_lx_exec86.lds + ld -T l_lx_exec86.lds -Map $T.map -o $T.bin $T.o l_lx_exec.o + objcopy -S -R .comment -R .note $T.bin + ./util/sstrip/sstrip $T.bin $(BRANDELF) $T.bin - $(BIN2H) $T.bin linux_i386elf_nrv2b_loader $@ + $(BIN2H) $T.bin linux_i386exec_fold $@ -l_sh_n2b.h: l_lx_sh.c l_6h_n2b.o l_lx_sh86.lds - $(CC_LINUX) -DNRV2B -o $T.o -c $< - ld -T $(srcdir)/l_lx_sh86.lds -Map $T.map -o $T.bin l_6h_n2b.o $T.o - $(SETFOLD) $T.bin +l_lx_sh.o: l_lx_sh.c + $(CC_LINUX) -c $< + +fold_sh86.o: fold_sh86.asm + $(NASM) -f elf -o $@ $< + +fold_sh86.h: l_lx_sh.o fold_sh86.o l_lx_sh86.lds + ld -T l_lx_sh86.lds -Map $T.map -o $T.bin $T.o l_lx_sh.o + objcopy -S -R .comment -R .note $T.bin + ./util/sstrip/sstrip $T.bin $(BRANDELF) $T.bin - $(BIN2H) $T.bin linux_i386sh_nrv2b_loader $@ - -l_xe_n2b.o: l_lx_exec86.asm - $(NASM) -i$(UCL_I386)/ -f elf -dNRV2B -o $@ $< - -l_6e_n2b.o: l_lx_elf86.asm - $(NASM) -i$(UCL_I386)/ -f elf -dNRV2B -o $@ $< - -l_6h_n2b.o: l_lx_sh86.asm - $(NASM) -i$(UCL_I386)/ -f elf -dNRV2B -o $@ $< - - -l_lx_n2d.h: l_lx_exec.c l_xe_n2d.o l_lx_exec86.lds - $(CC_LINUX) -DNRV2D -o $T.o -c $< - ld -T $(srcdir)/l_lx_exec86.lds -Map $T.map -o $T.bin l_xe_n2d.o $T.o - $(SETFOLD) $T.bin - $(BRANDELF) $T.bin - $(BIN2H) $T.bin linux_i386exec_nrv2d_loader $@ - -l_le_n2d.h: l_lx_elf.c l_6e_n2d.o l_lx_elf86.lds - $(CC_LINUX) -DNRV2D -o $T.o -c $< - ld -T $(srcdir)/l_lx_elf86.lds -Map $T.map -o $T.bin l_6e_n2d.o $T.o - $(SETFOLD) $T.bin - $(BRANDELF) $T.bin - $(BIN2H) $T.bin linux_i386elf_nrv2d_loader $@ - -l_sh_n2d.h: l_lx_sh.c l_6h_n2d.o l_lx_sh86.lds - $(CC_LINUX) -DNRV2D -o $T.o -c $< - ld -T $(srcdir)/l_lx_sh86.lds -Map $T.map -o $T.bin l_6h_n2d.o $T.o - $(SETFOLD) $T.bin - $(BRANDELF) $T.bin - $(BIN2H) $T.bin linux_i386sh_nrv2d_loader $@ - -l_xe_n2d.o: l_lx_exec86.asm - $(NASM) -i$(UCL_I386)/ -f elf -dNRV2D -o $@ $< - -l_6e_n2d.o: l_lx_elf86.asm - $(NASM) -i$(UCL_I386)/ -f elf -dNRV2D -o $@ $< - -l_6h_n2d.o: l_lx_sh86.asm - $(NASM) -i$(UCL_I386)/ -f elf -dNRV2D -o $@ $< + $(BIN2H) $T.bin linux_i386sh_fold $@ l_lx_sep.o: l_lx_sep.c $(CC_LINUX) -c $< @@ -344,25 +339,16 @@ l_t_n2bs.h: n2b_d.ash bits.ash $(DEPS1) l_t_n2d.h: n2d_d.ash bits.ash $(DEPS1) l_t_n2ds.h: n2d_d.ash bits.ash $(DEPS1) l_tmt.h: n2b_d32.asy n2d_d32.asy $(DEPS2) +l_ext2.h: n2b_d32.asy n2d_d32.asy $(DEPS2) l_vmlinz.h: n2b_d32.asy n2d_d32.asy $(DEPS2) l_vxd.h: n2b_d32.asy n2d_d32.asy $(DEPS2) l_wcle.h: n2b_d32.asy n2d_d32.asy $(DEPS2) l_w32pe.h: n2b_d32.asy n2d_d32.asy $(DEPS2) -l_xe_n2b.o: n2b_d32.ash $(DEPS1) -l_6e_n2b.o: n2b_d32.ash $(DEPS1) -l_6h_n2b.o: n2b_d32.ash $(DEPS1) +l_lx_elf86.asx: l_lx_elf86.asm macros.ash macros.asy +l_lx_exec86.asx: l_lx_exec86.asm macros.ash macros.asy +l_lx_sh86.asx: l_lx_sh86.asm macros.ash macros.asy -l_xe_n2d.o: n2d_d32.ash $(DEPS1) -l_6e_n2d.o: n2d_d32.ash $(DEPS1) -l_6h_n2d.o: n2d_d32.ash $(DEPS1) - -l_lx_n2b.h: linux.hh -l_lx_n2d.h: linux.hh -l_le_n2b.h: linux.hh -l_le_n2d.h: linux.hh -l_sh_n2b.h: linux.hh -l_sh_n2d.h: linux.hh upxb: linux.hh upxd: linux.hh diff --git a/src/stub/fold_elf86.asm b/src/stub/fold_elf86.asm new file mode 100644 index 00000000..702b52a9 --- /dev/null +++ b/src/stub/fold_elf86.asm @@ -0,0 +1,127 @@ +; fold_elf86.asm -- linkage to C code to process Elf binary +; +; This file is part of the UPX executable compressor. +; +; Copyright (C) 2000-2001 John F. Reiser +; All Rights Reserved. +; +; UPX and the UCL library are free software; you can redistribute them +; and/or modify them under the terms of the GNU General Public License as +; published by the Free Software Foundation; either version 2 of +; the License, or (at your option) any later version. +; +; This program is distributed in the hope that it will be useful, +; but WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +; GNU General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; see the file COPYING. +; If not, write to the Free Software Foundation, Inc., +; 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +; +; Markus F.X.J. Oberhumer Laszlo Molnar John F. Reiser +; markus@oberhumer.com ml1050@cdata.tvnet.hu jreiser@BitWagon.com +; + +%define szElf32_Ehdr 0x34 +%define szElf32_Phdr 8*4 +%define p_memsz 5*4 +%define a_val 4 + +%define __NR_munmap 91 + +;; control just falls through, after this part and compiled C code +;; are uncompressed. + +fold_begin: + ; patchLoader will modify to be + ; dword sz_uncompressed, sz_compressed + ; byte compressed_data... + + pop eax ; discard &sz_uncompressed + pop eax ; discard sz_uncompressed + +; Move argc,argv,envp down so that we can insert more Elf_auxv entries. +; ld-linux.so.2 depends on AT_PHDR and AT_ENTRY, for instance + +%define OVERHEAD 2048 +%define MAX_ELF_HDR 512 + + mov esi, esp + sub esp, byte 6*8 ; AT_PHENT, AT_PHNUM, AT_PAGESZ, AT_ENTRY, AT_PHDR, AT_NULL + mov edi, esp + call do_auxv + + sub esp, dword MAX_ELF_HDR + OVERHEAD + push esp ; argument: temp space + push edi ; argument: AT_next + push ebp ; argument: &decompress + push edx ; argument: my_elfhdr + add edx, [p_memsz + szElf32_Ehdr + edx] + push edx ; argument: uncbuf +EXTERN upx_main + call upx_main ; entry = upx_main(uncbuf, my_elfhdr, &decompress, AT_next, tmp_ehdr) + pop esi ; decompression buffer == (p_vaddr + p_memsz) of stub + pop ebx ; my_elfhdr + add esp, dword 3*4 + MAX_ELF_HDR + OVERHEAD ; remove 3 params, temp space + push eax ; save entry address + + mov edi, [a_val + edi] ; AT_PHDR +find_hatch: + push edi +EXTERN make_hatch + call make_hatch ; find hatch = make_hatch(phdr) + pop ecx ; junk the parameter + add edi, byte szElf32_Phdr ; prepare to try next Elf32_Phdr + test eax,eax + jz find_hatch + xchg eax,edx ; edx= &hatch + +; _dl_start and company (ld-linux.so.2) assumes that it has virgin stack, +; and does not initialize all its stack local variables to zero. +; Ulrich Drepper (drepper@cyngus.com) has refused to fix the bugs. +; See GNU wwwgnats libc/1165 . + +%define N_STKCLR (0x100 + MAX_ELF_HDR + OVERHEAD)/4 + lea edi, [esp - 4*N_STKCLR] + pusha ; values will be zeroed + mov ecx, N_STKCLR + xor eax,eax + rep stosd + + mov ecx,esi ; my p_vaddr + p_memsz + mov bh,0 ; round down to 64KB boundary + sub ecx,ebx ; length to unmap + push byte __NR_munmap + pop eax + jmp edx ; unmap ourselves via escape hatch, then goto entry + +do_auxv: ; entry: %esi=src = &argc; %edi=dst. exit: %edi= &AT_NULL + ; cld + +L10: ; move argc+argv + lodsd + stosd + test eax,eax + jne L10 + +L20: ; move envp + lodsd + stosd + test eax,eax + jne L20 + +L30: ; move existing Elf32_auxv + lodsd + stosd + test eax,eax ; AT_NULL ? + lodsd + stosd + jne L30 + + sub edi, byte 8 ; point to AT_NULL + ret + +; vi:ts=8:et:nowrap + diff --git a/src/stub/fold_exec86.asm b/src/stub/fold_exec86.asm new file mode 100644 index 00000000..8d811462 --- /dev/null +++ b/src/stub/fold_exec86.asm @@ -0,0 +1,46 @@ +; fold_exec86.asm -- linkage to C code to process Elf binary +; +; This file is part of the UPX executable compressor. +; +; Copyright (C) 2000-2001 John F. Reiser +; All Rights Reserved. +; +; UPX and the UCL library are free software; you can redistribute them +; and/or modify them under the terms of the GNU General Public License as +; published by the Free Software Foundation; either version 2 of +; the License, or (at your option) any later version. +; +; This program is distributed in the hope that it will be useful, +; but WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +; GNU General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; see the file COPYING. +; If not, write to the Free Software Foundation, Inc., +; 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +; +; Markus F.X.J. Oberhumer Laszlo Molnar John F. Reiser +; markus@oberhumer.com ml1050@cdata.tvnet.hu jreiser@BitWagon.com +; + +;; control just falls through, after this part and compiled C code +;; are uncompressed. + +fold_begin: ;; this label is known to the Makefile + pop eax ; discard &dstlen + pop eax ; discard dstlen + + pop eax ; Pop the argument count + mov ecx, esp ; argv starts just at the current stack top + lea edx, [ecx+eax*4+4] ; envp = &argv[argc + 1] + push eax ; Restore the stack + push ebp ; argument: &decompress + push ebx ; argument: &my_elfhdr + push edx ; argument: envp + push ecx ; argument: argv +EXTERN upx_main + call upx_main ; Call the UPX main function + hlt ; Crash if somehow upx_main does return + +; vi:ts=8:et:nowrap diff --git a/src/stub/fold_sh86.asm b/src/stub/fold_sh86.asm new file mode 100644 index 00000000..df43e6d8 --- /dev/null +++ b/src/stub/fold_sh86.asm @@ -0,0 +1,134 @@ +; fold_sh86.asm -- Linux program entry point & decompressor (shell script) +; +; This file is part of the UPX executable compressor. +; +; Copyright (C) 1996-2001 Markus Franz Xaver Johannes Oberhumer +; Copyright (C) 1996-2001 Laszlo Molnar +; Copyright (C) 2000-2001 John F. Reiser +; All Rights Reserved. +; +; UPX and the UCL library are free software; you can redistribute them +; and/or modify them under the terms of the GNU General Public License as +; published by the Free Software Foundation; either version 2 of +; the License, or (at your option) any later version. +; +; This program is distributed in the hope that it will be useful, +; but WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +; GNU General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; see the file COPYING. +; If not, write to the Free Software Foundation, Inc., +; 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +; +; Markus F.X.J. Oberhumer Laszlo Molnar John F. Reiser +; markus@oberhumer.com ml1050@cdata.tvnet.hu jreiser@BitWagon.com +; + + + BITS 32 + SECTION .text + +fold_begin: + ; patchLoader will modify to be + ; dword sz_uncompressed, sz_compressed + ; byte compressed_data... + + pop eax ; discard &sz_uncompressed + pop eax ; discard sz_uncompressed + +; Move argc,argv,envp down so that we can insert more Elf_auxv entries. +; ld-linux.so.2 depends on AT_PHDR and AT_ENTRY, for instance + +%define OVERHEAD 2048 +%define MAX_ELF_HDR 512 +%define PAGE_SIZE ( 1<<12) + + mov esi, esp + sub esp, byte 6*8 ; AT_PHENT, AT_PHNUM, AT_PAGESZ, AT_ENTRY, AT_PHDR, AT_NULL + mov edi, esp + call do_auxv + + sub esp, dword MAX_ELF_HDR + OVERHEAD + push esp ; argument: temp space + push edi ; argument: AT_next + push ebp ; argument: &decompress + push edx ; argument: my_elfhdr + add ecx, PAGE_SIZE ; uncompressed stub fits in this + push ecx ; argument: uncbuf +EXTERN upx_main + call upx_main ; entry = upx_main(uncbuf, my_elfhdr, &decompress, AT_next, tmp_ehdr) + pop esi ; decompression buffer + pop ebx ; my_elfhdr + add esp, dword 3*4 + MAX_ELF_HDR + OVERHEAD ; remove 3 params, temp space + + pop ecx ; argc + pop edx ; $0 filename, to become argv[0] + push edx ; restore $0 filename + + add esi, byte 3 + inc ecx + push esi ; &uncompressed shell script + sub esi, byte 3 + + mov [esi], word 0x632d ; "-c" + inc ecx + push esi ; "-c" + + inc ecx + push edx ; argv[0] is duplicate of $0 + + push ecx ; new argc + push eax ; save entry address + +; _dl_start and company (ld-linux.so.2) assumes that it has virgin stack, +; and does not initialize all its stack local variables to zero. +; Ulrich Drepper (drepper@cyngus.com) has refused to fix the bugs. +; See GNU wwwgnats libc/1165 . + +%define N_STKCLR (0x100 + MAX_ELF_HDR + OVERHEAD)/4 + lea edi, [esp - 4*N_STKCLR] + pusha ; values will be zeroed + mov ecx, N_STKCLR + xor eax,eax + rep stosd + +; Because the decompressed shell script occupies low memory anyway, +; there isn't much payback to unmapping the compressed script and +; ourselves the stub. We would need a place to put the escape hatch +; "int $0x80; popa; ret", and some kernels do not allow execution +; on the stack. So, we would have to dirty a page of the shell +; or of /lib/ld-linux.so. It's simpler just to omit the unapping. + popa + ret + +do_auxv: ; entry: %esi=src = &argc; %edi=dst. exit: %edi= &AT_NULL + ; cld + +L10: ; move argc+argv + lodsd + stosd + test eax,eax + jne L10 + +L20: ; move envp + lodsd + stosd + test eax,eax + jne L20 + +L30: ; move existing Elf32_auxv + lodsd + stosd + test eax,eax ; AT_NULL ? + lodsd + stosd + jne L30 + + sub edi, byte 8 ; point to AT_NULL + ret + + +; vi:ts=8:et:nowrap + diff --git a/src/stub/l_lx_elf.c b/src/stub/l_lx_elf.c index 859c23a5..b96c5572 100644 --- a/src/stub/l_lx_elf.c +++ b/src/stub/l_lx_elf.c @@ -102,7 +102,11 @@ do_mmap(void *addr, size_t len, int prot, int flags, int fd, off_t offset) // UPX & NRV stuff **************************************************************************/ -typedef void f_unfilter(nrv_byte *, nrv_uint); // 1st param is also addvalue +typedef void f_unfilter( + nrv_byte *, // also addvalue + nrv_uint, + unsigned cto8 // junk in high 24 bits +); typedef int f_expand( const nrv_byte *, nrv_uint, nrv_byte *, nrv_uint * ); @@ -116,6 +120,7 @@ unpackExtent( ) { while (xo->size) { + unsigned cto8; struct { int32_t sz_unc; // uncompressed int32_t sz_cpr; // compressed @@ -125,6 +130,8 @@ unpackExtent( // Read and check block sizes. xread(xi, (char *)&h, sizeof(h)); + cto8 = h.sz_cpr; + h.sz_cpr >>= 8; if (h.sz_unc == 0) { // uncompressed size 0 -> EOF if (h.sz_cpr != UPX_MAGIC_LE32) // h.sz_cpr must be h->magic err_exit(2); @@ -155,7 +162,7 @@ ERR_LAB && ((512 < out_len) // this block is longer than Ehdr+Phdrs || (xo->size==(unsigned)h.sz_unc) ) // block is last in Extent ) { - (*f_unf)(xo->buf, out_len); + (*f_unf)(xo->buf, out_len, cto8); } xi->buf += h.sz_cpr; xi->size -= h.sz_cpr; @@ -330,14 +337,19 @@ void *upx_main( Elf32_Ehdr *const ehdr // temp char[MAX_ELF_HDR+OVERHEAD] ) { - size_t const lsize = *(unsigned short const *)(0x7c + (char const *)my_ehdr); + struct cprElfhdr { + Elf32_Ehdr ehdr; + Elf32_Phdr phdr[2]; + struct l_info linfo; + }; + size_t const lsize = ((struct cprElfhdr const *)my_ehdr)->linfo.l_lsize; Elf32_Phdr const *phdr = (Elf32_Phdr const *)(1+ehdr); Elf32_Addr entry; struct Extent xo; struct Extent xi = { 0, sizeof(struct p_info) + lsize + CONST_CAST(char *, my_ehdr) }; - size_t const sz_elfhdrs = ((size_t *)xi.buf)[0]; // sizeof(Ehdr+Phdrs), uncompressed - size_t const sz_pckhdrs = ((size_t *)xi.buf)[1]; // sizeof(Ehdr+Phdrs), compressed + size_t const sz_elfhdrs = ((size_t *)xi.buf)[0]; // sizeof(Ehdr+Phdrs), uncompressed + size_t const sz_pckhdrs = ((size_t *)xi.buf)[1]>>8; // sizeof(Ehdr+Phdrs), compressed (void)uncbuf; // used by l_lx_sh.c // Uncompress Ehdr and Phdrs. diff --git a/src/stub/l_lx_elf86.asm b/src/stub/l_lx_elf86.asm index d6283408..f38efc6a 100644 --- a/src/stub/l_lx_elf86.asm +++ b/src/stub/l_lx_elf86.asm @@ -33,42 +33,13 @@ %define jmps jmp short %define jmpn jmp near -; defines for ident.ash and n2b_d32.ash -%ifdef SMALL - %define __IDENTSMA__ - %define __N2BSMA10__ - %define __N2BSMA20__ - %define __N2BSMA30__ - %define __N2BSMA40__ - %define __N2BSMA50__ - %define __N2BSMA60__ - %define __N2DSMA10__ - %define __N2DSMA20__ - %define __N2DSMA30__ - %define __N2DSMA40__ - %define __N2DSMA50__ - %define __N2DSMA60__ -%endif - - - -%include "ident.ash" - -;;;; names of pseudo-sections for addLoader: -;; LXUNFnnn Linux unfilter -;; LXNJMPnn omit filtering of 6-byte Jxx (0x0f 0x80..0x8f) -;; LXMRUnnn MostRecentlyUsed recoding of destinations -;; MRUARBnn arbitrary number of entries in wheel -;; MRUBITSn power of 2 entries in wheel (smaller code) -;; MRUBYTEn 256 entries in wheel (smallest code) - ; /************************************************************************* ; // program entry point ; // see glibc/sysdeps/i386/elf/start.S ; **************************************************************************/ GLOBAL _start - +;__LEXEC000__ _start: ;;;; int3 ;; How to debug this code: Uncomment the 'int3' breakpoint instruction above. @@ -89,27 +60,6 @@ _start: ;; call main ; push address of decompress subroutine decompress: -;__LXUNF000__ - jmps decompr0 - ;; 2+ address of decompress subroutine - ;; unfilter(upx_byte *, length) - pop edx ; return address - pop eax ; upx_byte *, same as addvalue - pop ecx ; length - pusha ; save C-convention ebx, ebp, esi, edi; also eax, edx - xchg eax, edi ; edi= pointer - - push dword ('?'<<8) | 0x0f ; cto8_0f (cto8 byte is modified) -%ifdef __MRUBYTE0__ - xor ebx, ebx ; zero -%else ;__MRUARB00__ (also __MRUBITS0__) - mov ebx, 'NMRU' ; modified N_MRU or N_MRU -1 -%endif ;__LXMRU010__ - - xor edx, edx ; zero -;__LXUNF010__ - jmpn unf0 -;__LXELF010__ ; /************************************************************************* ; // C callable decompressor @@ -120,7 +70,9 @@ decompress: %define OUTP dword [esp+8*4+12] %define OUTS dword [esp+8*4+16] -decompr0: +;__LEXEC009__ + ;; empty section for commonality with l_lx_exec86.asm +;__LEXEC010__ pusha ; cld @@ -129,15 +81,13 @@ decompr0: or ebp, byte -1 ;;; align 8 -%ifdef NRV2B - %include "n2b_d32.ash" -%elifdef NRV2D - %include "n2d_d32.ash" -%else - %error -%endif +%include "n2b_d32.ash" +%include "n2d_d32.ash" +%include "macros.ash" + cjt32 0 +;__LEXEC015__ ; eax is 0 from decompressor code ;xor eax, eax ; return code @@ -158,191 +108,17 @@ decompr0: popa ret -;__LXUNF020__ -;; continuation of entry prolog for unfilter -unf0: -;__LXMRU020__ - push edx ; tail - push ebx ; n_mru or n_mru1 -;__LXUNF025__ - mov esi, esp + ctojr32 + ckt32 dl +;__LEXEC017__ + popa + ret -%define n_mru [esi] -%define n_mru1 [esi] -%define tail [esi + 4*1] -%define cto8_0f [esi + 4*2] -%define cto8 [esi + 4*2 +1] -%define addvalue [esi + 4*3 + 7*4] - -%ifdef __MRUBITS1__ - inc ebx ; n_mru1 ==> n_mru -%endif ;__LXMRU030__ -unf1: ; allocate and clear mru[] - push edx ; zero -%ifdef __MRUBYTE1__ - dec bl -%else ;__MRUARB10__ - dec ebx -%endif ;__LXMRU040__ - jnz unf1 ; leaves 0=='hand' - -%define tmp ebp - -%define jc eax -%define hand ebx -%define hand_l al -%define kh edx -%define kh_l dl - -;__LXUNF030__ -calltrickloop: - mov al, [edi] - inc edi -%ifndef __LXNJMP00__ - sub al, 0x80 ; base of Jcc - cmp al, 0x8f - 0x80 ; span of Jcc - ja ct2 ; not Jcc - mov edx, [edi] ; often unaligned - cmp dx, cto8_0f - jne unfcount - mov byte [edi -1], dl ; 0x0f prefix - add al, 0x80 ; reconstitute Jcc - dec ecx - mov byte [edi], al ; Jcc opcode - inc edi - jmps lxunf -ct2: - sub al, 0xE8 - 0x80 ; base of JMP/CALL - cmp al, 0xE9 - 0xE8 ; span of JMP/CALL -%else ;__LXNJMP10__ - sub al, 0xE8 ; base of JMP/CALL - cmp al, 0xE9 - 0xE8 ; span of JMP/CALL -%endif ;__LXMRU050__ - ja unfcount - mov al, [edi] - cmp al, cto8 - je lxunf -unfcount: - dec ecx - jg calltrickloop - - mov edi,esp ; clear mru[] portion of stack -%ifdef __MRUBYTE2__ - mov ecx, 3+ 256 ; unused, tail, ct8_0f -%elifdef __MRUBITS2__ - mov ecx, n_mru1 - add ecx, byte 1+ 3 ; n_mru1, tail, ct8_0f -%else ;__MRUARB20__ - mov ecx, n_mru - add ecx, byte 3 ; n_mru, tail, ct8_0f -%endif ;__LXMRU060__ - xor eax,eax - rep - stosd - mov esp,edi - popa - push ecx - push eax - push edx - ret - -lxunf: - mov eax, [edi] - shr ax, 8 - rol eax, 16 - xchg al, ah -;__LXMRU065__ - shr jc, 1 ; eax= jc, or mru index - jnc mru4 ; not 1st time for this jc -%ifdef __MRUBYTE3__ - dec hand_l -%else ;__MRUARB30__ - dec hand -%ifdef __MRUBITS3__ - and hand, n_mru1 -%else ;__MRUARB40__ - jge mru3 - add hand, n_mru -mru3: -%endif -%endif ;__LXMRU070__ - - mov [esp + 4*hand], jc ; 1st time: mru[hand] = jc - jmps unf_store - -mru4: ; not 1st time for this jc - lea kh, [jc + hand] ; kh = jc + hand -%ifdef __MRUBYTE4__ - movzbl kh, kh_l -%elifdef __MRUBITS4__ - and kh, n_mru1 -%else ;__MRUARB50__ - cmp kh, n_mru - jb mru5 - sub kh, n_mru -mru5: -%endif ;__LXMRU080__ - mov jc, [esp + 4*kh] ; jc = mru[kh] -%ifdef __MRUBYTE5__ - dec hand_l -%else ;__MRUARB60__ - dec hand -%ifdef __MRUBITS5__ - and hand, n_mru1 -%else ;__MRUARB70__ - jge mru6 - add hand, n_mru -mru6: -%endif -%endif ;__LXMRU090__ - - mov tmp, [esp + 4*hand] ; tmp = mru[hand] - test tmp,tmp - jnz mru8 - - push jc ; ran out of registers - mov eax, tail - -%ifdef __MRUBYTE6__ - dec al -%else ;__MRUARB80__ - dec eax -%ifdef __MRUBITS6__ - and eax, n_mru1 -%else ;__MRUARB90__ - jge mru7 - add eax, n_mru -mru7: -%endif -%endif ;__LXMRU100__ - - xor tmp,tmp - mov tail, eax - xchg [4+ esp + 4*eax], tmp ; tmp = mru[tail]; mru[tail] = 0 - pop jc -mru8: - mov [esp + 4*kh ], tmp ; mru[kh] = tmp - mov [esp + 4*hand], jc ; mru[hand] = jc -;__LXUNF040__ -unf_store: - sub eax, edi - sub ecx, byte 4 - add eax, addvalue - mov [edi], eax - add edi, byte 4 - jmps unfcount - -;__LXELF020__ +;__LEXEC020__ %define PAGE_MASK (~0<<12) %define PAGE_SIZE ( 1<<12) -%define szElf32_Ehdr 0x34 -%define szElf32_Phdr 8*4 -%define p_filesz 4*4 -%define p_memsz 5*4 -%define a_val 4 - %define MAP_FIXED 0x10 %define MAP_PRIVATE 0x02 %define MAP_ANONYMOUS 0x20 @@ -366,7 +142,7 @@ unfold: push eax ; &destination ; mmap a page to hold the decompressed program - xor ecx,ecx + xor ecx, ecx push ecx push ecx mov ch, PAGE_SIZE >> 8 @@ -390,94 +166,12 @@ unfold: main: pop ebp ; &decompress call unfold -fold_begin: - ; patchLoader will modify to be - ; dword sz_uncompressed, sz_compressed - ; byte compressed_data... - pop eax ; discard &sz_uncompressed - pop eax ; discard sz_uncompressed - -; Move argc,argv,envp down so that we can insert more Elf_auxv entries. -; ld-linux.so.2 depends on AT_PHDR and AT_ENTRY, for instance - -%define OVERHEAD 2048 -%define MAX_ELF_HDR 512 - - mov esi, esp - sub esp, byte 6*8 ; AT_PHENT, AT_PHNUM, AT_PAGESZ, AT_ENTRY, AT_PHDR, AT_NULL - mov edi, esp - call do_auxv - - sub esp, dword MAX_ELF_HDR + OVERHEAD - push esp ; argument: temp space - push edi ; argument: AT_next - push ebp ; argument: &decompress - push edx ; argument: my_elfhdr - add edx, [p_memsz + szElf32_Ehdr + edx] - push edx ; argument: uncbuf -EXTERN upx_main - call upx_main ; entry = upx_main(uncbuf, my_elfhdr, &decompress, AT_next, tmp_ehdr) - pop esi ; decompression buffer == (p_vaddr + p_memsz) of stub - pop ebx ; my_elfhdr - add esp, dword 3*4 + MAX_ELF_HDR + OVERHEAD ; remove 3 params, temp space - push eax ; save entry address - - mov edi, [a_val + edi] ; AT_PHDR -find_hatch: - push edi -EXTERN make_hatch - call make_hatch ; find hatch = make_hatch(phdr) - pop ecx ; junk the parameter - add edi, byte szElf32_Phdr ; prepare to try next Elf32_Phdr - test eax,eax - jz find_hatch - xchg eax,edx ; edx= &hatch - -; _dl_start and company (ld-linux.so.2) assumes that it has virgin stack, -; and does not initialize all its stack local variables to zero. -; Ulrich Drepper (drepper@cyngus.com) has refused to fix the bugs. -; See GNU wwwgnats libc/1165 . - -%define N_STKCLR (0x100 + MAX_ELF_HDR + OVERHEAD)/4 - lea edi, [esp - 4*N_STKCLR] - pusha ; values will be zeroed - mov ecx, N_STKCLR - xor eax,eax - rep stosd - - mov ecx,esi ; my p_vaddr + p_memsz - mov bh,0 ; round down to 64KB boundary - sub ecx,ebx ; length to unmap - push byte __NR_munmap - pop eax - jmp edx ; unmap ourselves via escape hatch, then goto entry - -do_auxv: ; entry: %esi=src = &argc; %edi=dst. exit: %edi= &AT_NULL - ; cld - -L10: ; move argc+argv - lodsd - stosd - test eax,eax - jne L10 - -L20: ; move envp - lodsd - stosd - test eax,eax - jne L20 - -L30: ; move existing Elf32_auxv - lodsd - stosd - test eax,eax ; AT_NULL ? - lodsd - stosd - jne L30 - - sub edi, byte 8 ; point to AT_NULL - ret +eof: +; __XTHEENDX__ + section .data + dd -1 + dw eof ; vi:ts=8:et:nowrap diff --git a/src/stub/l_lx_elf86.lds b/src/stub/l_lx_elf86.lds index 9e066473..984355c9 100644 --- a/src/stub/l_lx_elf86.lds +++ b/src/stub/l_lx_elf86.lds @@ -27,7 +27,7 @@ OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") OUTPUT_ARCH(i386) -ENTRY(_start) +/*ENTRY(_start)*/ SECTIONS { /* 0x00401000: l_lx_elf86.asm assumes 1 page up from 64KB boundary */ diff --git a/src/stub/l_lx_exec.c b/src/stub/l_lx_exec.c index 241201ce..93fd4edc 100644 --- a/src/stub/l_lx_exec.c +++ b/src/stub/l_lx_exec.c @@ -101,12 +101,19 @@ static uint32_t ascii5(char *p, uint32_t v, unsigned n) do { unsigned char d = v % 32; if (d >= 26) d -= 43; // 43 == 'Z' - '0' + 1 - *--p += d; + *--p = (d += 'A'); v /= 32; } while (--n > 0); return v; } +static char * +do_mmap(void *addr, size_t len, int prot, int flags, int fd, off_t offset) +{ + (void)len; (void)prot; (void)flags; (void)fd; (void)offset; + return mmap((int *)&addr); +} + #if defined(__i386__) # define SET2(p, c0, c1) \ @@ -125,6 +132,51 @@ static uint32_t ascii5(char *p, uint32_t v, unsigned n) #endif +// go_self is a separate subroutine to spread the burden of local arrays. +// Otherwise the size of the stack frame in upx_main exceeds 128 bytes, +// which causes too many offsets to expand from 1 byte to 4. + +static int +go_self(char const *tmpname, char *argv[], char *envp[]) +{ + // FIXME: why not use "/proc/self/fd/XX"? *BSD doesn't have it? + + // Open the temp file. + int const fdi = open(tmpname, O_RDONLY, 0); + + if (0 <= fdi) { + // 17 chars for "/proc/PPPPP/fd/XX" should be enough, but we + // play safe in case there will be 32-bit pid_t at some time. + //char procself_buf[17+1]; + char procself_buf[31+1]; + + // Compute name of temp fdi. + SET4(procself_buf + 0, '/', 'p', 'r', 'o'); + SET4(procself_buf + 4, 'c', '/', 0 , 0 ); + { + char *const procself = upx_itoa(procself_buf + 6, getpid()); + SET4(procself, '/', 'f', 'd', '/'); + upx_itoa(procself + 4, fdi); + } + + // Check for working /proc/self/fd/X by accessing the + // temp file again, now via temp fdi. + if (UPX2 == access(procself_buf, R_OK | X_OK)) { + // Now it's safe to unlink the temp file (as it is still open). + unlink(tmpname); + // Set the file close-on-exec. + fcntl(fdi, F_SETFD, FD_CLOEXEC); + // Execute the original program via /proc/self/fd/X. + execve(procself_buf, argv, envp); + // NOTE: if we get here we've lost. + } + + // The proc filesystem isn't working. No problem. + close(fdi); + } + return fdi; +} + /************************************************************************* // UPX & NRV stuff **************************************************************************/ @@ -153,59 +205,29 @@ void upx_main( f_expand *const f_decompress ) { - // file descriptors - int fdi, fdo; - Elf32_Phdr const *const phdr = (Elf32_Phdr const *) - (my_ehdr->e_phoff + (char const *)my_ehdr); - struct Extent xi = { phdr[1].p_memsz, (char *)phdr[1].p_vaddr }; - char *next_unmap = (char *)(PAGE_MASK & (unsigned)xi.buf); - struct p_info header; - - // for getpid() - pid_t pid; - - // temporary file name (max 14 chars) - static char tmpname_buf[] = "/tmp/upxAAAAAAAAAAA"; - char *tmpname = tmpname_buf; - // 17 chars for "/proc/PPPPP/fd/XX" should be enough, but we - // play safe in case there will be 32-bit pid_t at some time. - //char procself_buf[17+1]; - char procself_buf[31+1]; - char *procself; + // file descriptor + int fdo; // decompression buffer unsigned char *buf; - static struct MallocArgs { - char *ma_addr; - size_t ma_length; - int ma_prot; - int ma_flags; - int ma_fd; - off_t ma_offset; - } malloc_args = { -#if defined(USE_MMAP_FO) - 0, 0, PROT_READ | PROT_WRITE, MAP_SHARED, 0, 0 -#else - 0, 0, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0 -#endif - }; -#if defined(USE_MMAP_FO) - static struct MallocArgs scratch_page = { - 0, -PAGE_MASK, PROT_READ | PROT_WRITE, - MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, 0, 0 - }; -#endif + + char *tmpname; + + Elf32_Phdr const *const phdr = (Elf32_Phdr const *) + (my_ehdr->e_phoff + (char const *)my_ehdr); + struct Extent xi = { phdr[1].p_memsz, (char *)phdr[1].p_vaddr }; + + char *next_unmap = (char *)(PAGE_MASK & (unsigned)xi.buf); + struct p_info header; + + // temporary file name + char tmpname_buf[20]; // // ----- Step 0: set /proc/self using /proc/ ----- // //personality(PER_LINUX); - pid = getpid(); - SET4(procself_buf + 0, '/', 'p', 'r', 'o'); - SET2(procself_buf + 4, 'c', '/'); - procself = upx_itoa(procself_buf + 6, pid); - *procself++ = '/'; // @@ -226,7 +248,7 @@ void upx_main( // Paranoia. Make sure this is actually our expected executable // by checking the random program id. (The id is both stored // in the header and patched into this stub.) - if (header.p_progid != UPX2) + if (header.p_progid != UPX3) goto error1; @@ -234,13 +256,17 @@ void upx_main( // ----- Step 2: prepare temporary output file ----- // + tmpname = tmpname_buf; + SET4(tmpname + 0, '/', 't', 'm', 'p'); + SET4(tmpname + 4, '/', 'u', 'p', 'x'); + // Compute name of temporary output file in tmpname[]. // Protect against Denial-of-Service attacks. { char *p = tmpname_buf + sizeof(tmpname_buf) - 1; // Compute the last 4 characters (20 bits) from getpid(). - uint32_t r = ascii5(p, (uint32_t)pid, 4); p-=4; + uint32_t r = ascii5(p, (uint32_t)getpid(), 4); *p = '\0'; p -= 4; // Provide 4 random bytes from our program id. r ^= header.p_progid; @@ -254,7 +280,7 @@ void upx_main( r ^= ((uint32_t) tv.tv_usec) << 12; // shift into high-bits #else // using adjtimex() may cause portability problems - static struct timex tx; + struct timex tx; adjtimex(&tx); r ^= (uint32_t) tx.time.tv_sec; r ^= ((uint32_t) tx.time.tv_usec) << 12; // shift into high-bits @@ -294,23 +320,23 @@ void upx_main( // #if defined(USE_MMAP_FO) - // mmap()ed output file. - malloc_args.ma_fd = fdo; - // FIXME: packer could set ma_length - malloc_args.ma_length = header.p_filesize; - buf = mmap((int *)&malloc_args); + // FIXME: packer could set length + buf = do_mmap(0, header.p_filesize, + PROT_READ | PROT_WRITE, MAP_SHARED, fdo, 0); if ((unsigned long) buf >= (unsigned long) -4095) goto error; // Decompressor can overrun the output by 3 bytes. // Defend against SIGSEGV by using a scratch page. - scratch_page.ma_addr = buf + (PAGE_MASK & (header.p_filesize + ~PAGE_MASK)); - mmap((int *)&scratch_page); + // FIXME: packer could set address delta + do_mmap(buf + (PAGE_MASK & (header.p_filesize + ~PAGE_MASK)), + -PAGE_MASK, PROT_READ | PROT_WRITE, + MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, 0, 0 ); #else // Temporary decompression buffer. - // FIXME: packer could set ma_length - malloc_args.ma_length = (header.p_blocksize + OVERHEAD + ~PAGE_MASK) & PAGE_MASK; - buf = mmap((int *)&malloc_args); + // FIXME: packer could set length + buf = do_mmap(0, (header.p_blocksize + OVERHEAD + ~PAGE_MASK) & PAGE_MASK, + PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0 ); if ((unsigned long) buf >= (unsigned long) -4095) goto error; #endif @@ -429,34 +455,9 @@ void upx_main( // Many thanks to Andi Kleen and // Jamie Lokier for this nice idea. - // Open the temp file. - fdi = open(tmpname, O_RDONLY, 0); - if (fdi < 0) + if (0 > go_self(tmpname, argv, envp)) goto error; - // Compute name of temp fdi. - SET3(procself, 'f', 'd', '/'); - upx_itoa(procself + 3, fdi); - - // Check for working /proc/self/fd/X by accessing the - // temp file again, now via temp fdi. -#define err fdo - err = access(procself_buf, R_OK | X_OK); - if (err == UPX3) - { - // Now it's safe to unlink the temp file (as it is still open). - unlink(tmpname); - // Set the file close-on-exec. - fcntl(fdi, F_SETFD, FD_CLOEXEC); - // Execute the original program via /proc/self/fd/X. - execve(procself_buf, argv, envp); - // NOTE: if we get here we've lost. - } -#undef err - - // The proc filesystem isn't working. No problem. - close(fdi); - // // ----- Step 7: start program in /tmp ----- @@ -474,7 +475,7 @@ void upx_main( if (fork() == 0) { // Sleep 3 seconds, then remove the temp file. - static const struct timespec ts = { UPX4, 0 }; + struct timespec ts; ts.tv_sec = UPX4; ts.tv_nsec = 0; nanosleep(&ts, 0); unlink(tmpname); } @@ -500,4 +501,3 @@ void upx_main( /* vi:ts=4:et:nowrap */ - diff --git a/src/stub/l_lx_exec86.asm b/src/stub/l_lx_exec86.asm index dc5df28b..6ad12f2f 100644 --- a/src/stub/l_lx_exec86.asm +++ b/src/stub/l_lx_exec86.asm @@ -33,34 +33,13 @@ %define jmps jmp short %define jmpn jmp near -; defines for ident.ash and n2b_d32.ash -%ifdef SMALL - %define __IDENTSMA__ - %define __N2BSMA10__ - %define __N2BSMA20__ - %define __N2BSMA30__ - %define __N2BSMA40__ - %define __N2BSMA50__ - %define __N2BSMA60__ - %define __N2DSMA10__ - %define __N2DSMA20__ - %define __N2DSMA30__ - %define __N2DSMA40__ - %define __N2DSMA50__ - %define __N2DSMA60__ -%endif - - - -%include "ident.ash" - ; /************************************************************************* ; // program entry point ; // see glibc/sysdeps/i386/elf/start.S ; **************************************************************************/ GLOBAL _start - +;__LEXEC000__ _start: ;;;; int3 ;; How to debug this code: Uncomment the 'int3' breakpoint instruction above. @@ -85,19 +64,24 @@ _start: xor ebx, ebx ; PER_LINUX int 0x80 %endif + call main ; push address of decompress subroutine +decompress: ; /************************************************************************* ; // C callable decompressor ; **************************************************************************/ -%define INP dword [esp+8*4+4] -%define INS dword [esp+8*4+8] -%define OUTP dword [esp+8*4+12] -%define OUTS dword [esp+8*4+16] +%define INP dword [esp+8*4+8] +%define INS dword [esp+8*4+12] +%define OUTP dword [esp+8*4+16] +%define OUTS dword [esp+8*4+20] -decompress: +;__LEXEC009__ + mov eax, 'NMRU' ; free slot in following 'pusha' +;__LEXEC010__ pusha + push byte '?' ; cto8 (sign extension does not matter) ; cld mov esi, INP @@ -105,75 +89,81 @@ decompress: or ebp, byte -1 ;;; align 8 -%ifdef NRV2B - %include "n2b_d32.ash" -%elifdef NRV2D - %include "n2d_d32.ash" -%else - %error -%endif +%include "n2b_d32.ash" +%include "n2d_d32.ash" +%include "macros.ash" + cjt32 0 +;__LEXEC015__ ; eax is 0 from decompressor code ;xor eax, eax ; return code ; check compressed size mov edx, INP add edx, INS - cmp esi, edx + cmp edx, esi jz .ok dec eax .ok: + xchg [8*4 + esp], eax ; store success/failure, fetch NMRU -; write back the uncompressed size - sub edi, OUTP - mov edx, OUTS - mov [edx], edi +; write back the uncompressed size, and prepare for unfilter + mov edx, OUTS + mov ecx, edi + mov edi, OUTP + sub ecx, edi ; ecx= uncompressed size + mov [edx], ecx - mov [7*4 + esp], eax + pop edx ; cto8 + +;__LEXEC110__ Jcc and/or possible n_mru + push edi ; addvalue + push byte 0x0f + pop ebx + mov bh, dl ; ebx= 0,,cto8,0x0F + +;__LEXEC100__ 0!=n_mru + xchg eax, ebx ; eax= ct08_0f; ebx= n_mru {or n_mru1} + +;;LEXEC016 bug in APP: jmp and target must be in same .asx +;; jmpn lxunf0 ; logically belongs here + + ctojr32 + ckt32 dl +;__LEXEC017__ popa ret +;__LEXEC020__ %define PAGE_MASK (~0<<12) %define PAGE_SIZE ( 1<<12) -%define szElf32_Ehdr 0x34 -%define szElf32_Phdr 8*4 -%define p_filesz 4*4 -%define p_memsz 5*4 -%define a_val 4 - -%define MAP_FIXED 0x10 -%define MAP_PRIVATE 0x02 -%define MAP_ANONYMOUS 0x20 -%define PROT_READ 1 -%define PROT_WRITE 2 -%define PROT_EXEC 4 -%define __NR_mmap 90 -%define __NR_munmap 91 - ; Decompress the rest of this loader, and jump to it unfold: - pop esi ; &fold_begin = src + pop esi ; &{ sz_uncompressed, sz_compressed, compressed_data...} + mov ecx, PAGE_MASK push esi ; &dst - mov ecx, ebp ; &decompress - and ecx, dword PAGE_MASK ; &my_elfhdr - mov ebx, ecx ; save &my_elfhdr for later + mov ebx, ebp ; &decompress + and ebx, ecx ; &my_elfhdr + neg ecx ; ecx= PAGE_SIZE + + cld + lodsd ; sz_uncompressed + lodsd ; sz_compressed ;; Compressed code now begins at fold_begin. ;; We want decompressed code to begin at fold_begin, too. ;; Move the compressed code to the high end of the page. ;; Assume non-overlapping so that forward movsb is OK. - lea edi, [-PAGE_MASK + ecx] ; high end of page - add ecx, [p_filesz + szElf32_Ehdr + ecx] ; beyond src - sub ecx, esi ; srclen - push ecx ; srclen - sub edi, ecx - push edi ; &src - cld - rep movsb + lea edi, [ecx + ebx] ; high end of page + push eax ; srclen (of both movsb and decompress) + sub edi, eax ; dst of movsb + push edi ; &src for decompression (after movsb) + xchg ecx, eax ; ecx= len of movsb + rep movsb call ebp ; decompress(&src, srclen, &dst, &dstlen) pop eax ; discard &src pop eax ; discard srclen @@ -188,23 +178,14 @@ unfold: main: pop ebp ; &decompress - push eax ; place to store dstlen - push esp ; &dstlen + push eax ; sz_uncompressed (junk, actually) + push esp ; &sz_uncompressed call unfold -fold_begin: ;; this label is known to the Makefile - pop eax ; discard &dstlen - pop eax ; discard dstlen - pop eax ; Pop the argument count - mov ecx, esp ; argv starts just at the current stack top - lea edx, [ecx+eax*4+4] ; envp = &argv[argc + 1] - push eax ; Restore the stack - push ebp ; argument: &decompress - push ebx ; argument: &my_elfhdr - push edx ; argument: envp - push ecx ; argument: argv -EXTERN upx_main - call upx_main ; Call the UPX main function - hlt ; Crash if somehow upx_main does return +eof: +; __XTHEENDX__ + section .data + dd -1 + dw eof ; vi:ts=8:et:nowrap diff --git a/src/stub/l_lx_exec86.lds b/src/stub/l_lx_exec86.lds index c2cf8b88..4e6c59c8 100644 --- a/src/stub/l_lx_exec86.lds +++ b/src/stub/l_lx_exec86.lds @@ -27,7 +27,7 @@ OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") OUTPUT_ARCH(i386) -ENTRY(_start) +/*ENTRY(_start)*/ SECTIONS { /* 0x08048000: customary Linux/x86 Elf .text start */ diff --git a/src/stub/l_lx_sh86.asm b/src/stub/l_lx_sh86.asm index afa9fd0c..5a24f28e 100644 --- a/src/stub/l_lx_sh86.asm +++ b/src/stub/l_lx_sh86.asm @@ -30,37 +30,13 @@ BITS 32 SECTION .text -%define jmps jmp short -%define jmpn jmp near - -; defines for ident.ash and n2b_d32.ash -%ifdef SMALL - %define __IDENTSMA__ - %define __N2BSMA10__ - %define __N2BSMA20__ - %define __N2BSMA30__ - %define __N2BSMA40__ - %define __N2BSMA50__ - %define __N2BSMA60__ - %define __N2DSMA10__ - %define __N2DSMA20__ - %define __N2DSMA30__ - %define __N2DSMA40__ - %define __N2DSMA50__ - %define __N2DSMA60__ -%endif - - - -%include "ident.ash" - ; /************************************************************************* ; // program entry point ; // see glibc/sysdeps/i386/elf/start.S ; **************************************************************************/ GLOBAL _start - +;__LEXEC000__ _start: ;;;; int3 ;; How to debug this code: Uncomment the 'int3' breakpoint instruction above. @@ -79,7 +55,9 @@ _start: ;; end ;; Step through the code; remember that repeats the previous command. ;; + call main ; push address of decompress subroutine +decompress: ; /************************************************************************* ; // C callable decompressor @@ -90,7 +68,7 @@ _start: %define OUTP dword [esp+8*4+12] %define OUTS dword [esp+8*4+16] -decompress: +;__LEXEC010__ pusha ; cld @@ -99,15 +77,13 @@ decompress: or ebp, byte -1 ;;; align 8 -%ifdef NRV2B - %include "n2b_d32.ash" -%elifdef NRV2D - %include "n2d_d32.ash" -%else - %error -%endif +%include "n2b_d32.ash" +%include "n2d_d32.ash" +%include "macros.ash" + cjt32 0 +;__LEXEC015__ ; eax is 0 from decompressor code ;xor eax, eax ; return code @@ -128,15 +104,13 @@ decompress: popa ret +;__LEXEC020__ %define PAGE_MASK (~0<<12) %define PAGE_SIZE ( 1<<12) %define szElf32_Ehdr 0x34 -%define szElf32_Phdr 8*4 -%define p_filesz 4*4 %define p_memsz 5*4 -%define a_val 4 %define MAP_FIXED 0x10 %define MAP_PRIVATE 0x02 @@ -145,7 +119,6 @@ decompress: %define PROT_WRITE 2 %define PROT_EXEC 4 %define __NR_mmap 90 -%define __NR_munmap 91 ; Decompress the rest of this loader, and jump to it unfold: @@ -186,104 +159,12 @@ unfold: main: pop ebp ; &decompress call unfold -fold_begin: - ; patchLoader will modify to be - ; dword sz_uncompressed, sz_compressed - ; byte compressed_data... - - pop eax ; discard &sz_uncompressed - pop eax ; discard sz_uncompressed - -; Move argc,argv,envp down so that we can insert more Elf_auxv entries. -; ld-linux.so.2 depends on AT_PHDR and AT_ENTRY, for instance - -%define OVERHEAD 2048 -%define MAX_ELF_HDR 512 - - mov esi, esp - sub esp, byte 6*8 ; AT_PHENT, AT_PHNUM, AT_PAGESZ, AT_ENTRY, AT_PHDR, AT_NULL - mov edi, esp - call do_auxv - - sub esp, dword MAX_ELF_HDR + OVERHEAD - push esp ; argument: temp space - push edi ; argument: AT_next - push ebp ; argument: &decompress - push edx ; argument: my_elfhdr - add ecx, PAGE_SIZE ; uncompressed stub fits in this - push ecx ; argument: uncbuf -EXTERN upx_main - call upx_main ; entry = upx_main(uncbuf, my_elfhdr, &decompress, AT_next, tmp_ehdr) - pop esi ; decompression buffer - pop ebx ; my_elfhdr - add esp, dword 3*4 + MAX_ELF_HDR + OVERHEAD ; remove 3 params, temp space - - pop ecx ; argc - pop edx ; $0 filename, to become argv[0] - push edx ; restore $0 filename - - add esi, byte 3 - inc ecx - push esi ; &uncompressed shell script - sub esi, byte 3 - - mov [esi], word 0x632d ; "-c" - inc ecx - push esi ; "-c" - - inc ecx - push edx ; argv[0] is duplicate of $0 - - push ecx ; new argc - push eax ; save entry address - -; _dl_start and company (ld-linux.so.2) assumes that it has virgin stack, -; and does not initialize all its stack local variables to zero. -; Ulrich Drepper (drepper@cyngus.com) has refused to fix the bugs. -; See GNU wwwgnats libc/1165 . - -%define N_STKCLR (0x100 + MAX_ELF_HDR + OVERHEAD)/4 - lea edi, [esp - 4*N_STKCLR] - pusha ; values will be zeroed - mov ecx, N_STKCLR - xor eax,eax - rep stosd - -; Because the decompressed shell script occupies low memory anyway, -; there isn't much payback to unmapping the compressed script and -; ourselves the stub. We would need a place to put the escape hatch -; "int $0x80; popa; ret", and some kernels do not allow execution -; on the stack. So, we would have to dirty a page of the shell -; or of /lib/ld-linux.so. It's simpler just to omit the unapping. - popa - ret - -do_auxv: ; entry: %esi=src = &argc; %edi=dst. exit: %edi= &AT_NULL - ; cld - -L10: ; move argc+argv - lodsd - stosd - test eax,eax - jne L10 - -L20: ; move envp - lodsd - stosd - test eax,eax - jne L20 - -L30: ; move existing Elf32_auxv - lodsd - stosd - test eax,eax ; AT_NULL ? - lodsd - stosd - jne L30 - - sub edi, byte 8 ; point to AT_NULL - ret +eof: +; __XTHEENDX__ + section .data + dd -1 + dw eof ; vi:ts=8:et:nowrap diff --git a/src/stub/l_lx_sh86.lds b/src/stub/l_lx_sh86.lds index 08d1224c..d7d0dc93 100644 --- a/src/stub/l_lx_sh86.lds +++ b/src/stub/l_lx_sh86.lds @@ -27,18 +27,29 @@ OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") OUTPUT_ARCH(i386) -ENTRY(_start) +/*ENTRY(_start)*/ SECTIONS { /* 0x00800000: avoid 0x00400000 for shell itself being compressed */ . = 0x00800000 + SIZEOF_HEADERS; . = ALIGN(0x80); +/* .text : { *(.text) *(.data) + } +*/ + .data : { /* put everything together in one Phdr */ + *(.text) + *(.rodata) + *(.data) + *(.bss) + *(COMMON) } /* 0x08048000: customary Linux/x86 Elf .text start */ +/* . = 0x08048000 + (0xfff & .); .data : { } +*/ } diff --git a/src/stub/macros.ash b/src/stub/macros.ash index a27f09ea..3f0b98ea 100644 --- a/src/stub/macros.ash +++ b/src/stub/macros.ash @@ -168,6 +168,53 @@ ctend: %endmacro +;; call/jump/jcc trick; also used more than once (and/or optionally), so +;; ecx has byte count (not count of applied instances), and +;; edi points to buffer. +%macro ckt32 1 ; param: where is cto8 (dl, bl, or literal) +;__CKLLTR00__ + mov esi, edi ; base of block + jmps ckstart +ckloop3: + mov al, [edi] + inc edi +;__CKLLTR10__ Jcc only + cmp al, 0x80 ; lo of 6-byte Jcc + jb ckloop2 + cmp al, 0x8f ; hi of 6-byte Jcc + ja ckloop2 + cmp byte [edi -2], 0x0F ; prefix of 6-byte Jcc + je ckmark +ckloop2: +;__CKLLTR20__ + sub al, 0xE8 + cmp al, 0xE9 - 0xE8 + ja ckcount +ckmark: + cmp byte [edi], %1 ; cto8 + jnz ckcount + mov eax, [edi] + + shr ax, 8 + rol eax, 16 + xchg al, ah + + sub eax, edi + add eax, esi + mov [edi], eax + add edi, byte 4 +ckstart: + sub ecx, byte 4 +;__CKLLTR30__ Jcc only + mov al, [edi] + inc edi + loop ckloop2 ; prefix cannot overlap previous displacement +;__CKLLTR40__ +ckcount: + dec ecx + jg ckloop3 +ckend: +%endmacro ;; ============= ;; ============= 32-BIT RELOCATIONS @@ -213,4 +260,312 @@ reloc_endx: %endmacro +;; ============= +;; ============= 32-BIT CALL TRICK UNFILTER WITH MostRecentlyUsed BUFFER +;; ============= + +;;;; names of pseudo-sections for addLoader: +;; LXUNFnnn Linux unfilter +;; LXMRUnnn MostRecentlyUsed recoding of destinations +;; MRUARBnn arbitrary number of entries in wheel +;; MRUBITSn power of 2 entries in wheel (smaller code) +;; MRUBYTEn 256 entries in wheel (smallest code) + +%macro ctojr32 0 +;; I got confused by the syntactic sugar of the fake %ifdefs. +;; I can read the section name more easily when it is at the left margin. +;; Also, some of the logic to select the sections is not that simple, +;; and any mismatch between the logic and the %ifdefs is very confusing. +;; Instead, I use comments after the section name, and blank lines for grouping. + +;__LXUNF000__ enter at +0 for decompression; +2 for unfiltering + jmps decompr0 +;__LXUNF002__ + ;; 2+ address of decompress subroutine + ;; unfilter(upx_byte *, length, cto8) +lxunfilter: + pop edx ; return address + pop eax ; upx_byte *, same as addvalue + pop ecx ; length + xchg eax, edi ; edi= pointer; eax= saved_edi + pusha ; save C-convention ebx, ebp, esi, edi; also eax, edx + +; at most one of the next 2 +;__MRUBYTE0__ 256==n_mru + xor ebx, ebx ; zero +;__LXMRU005__ 0!=n_mru + mov ebx, 'NMRU' ; modified N_MRU or N_MRU -1 + +;__LXMRU006__ 0!=n_mru + push byte 0x0f ; prefix of 6-byte Jcc + pop eax + mov ah, [esp + 8*4] ; cto8 +;__LXMRU007__ 0==n_mru + push byte 0x0f ; prefix of 6-byte Jcc + pop ebx + mov bh, [esp + 8*4] ; cto8 + +;__LXUNF008__ + mov dl, [esp + 8*4] ; cto8 + +;__LXUNF010__ + jmpn lxunf0 +decompr0: + +;; These %define are only if 0!=n_mru; +;; else 0x0F==bl && cto8==bh==dh && 0xE8==dl && addvalue==esi . +%define n_mru [esi] +%define n_mru1 [esi] +%define tail [esi + 4*1] +%define cto8_e8e9 [esi + 4*2] +%define cto8_0f [esi + 4*3] +%define addvalue [esi + 4*4] +%define tmp ebp +%define hand ebx +%define hand_l bl +%define kh edx +%define kh_l dl + +;__LXJCC010__ +lxunf2: ; have seen 0x80..0x8f of possible recoded 6-byte Jcc + movzx ebp, word [edi] ; 2 bytes, zero-extended + +;__LXMRU045__ 0!=n_mru + sub ebp, cto8_0f +;__LXMRU046__ 0==n_mru + sub ebp, ebx + +;__LXJCC020__ 0==n_mru, or Jcc excluded ('sub' of equals clears Carry) + jne unfcount +;__LXJCC021__ 0!=n_mru and Jcc participates; must set Carry + sub ebp, byte 1 ; set Carry iff in range + jnb unfcount + +;__LXJCC023__ found Jcc; re-swap 0x8Y opcode and 0x0f prefix + mov byte [edi -1], bl ; 0x0f prefix + dec ecx ; preserve Carry + mov byte [edi], al ; Jcc opcode + inc edi ; preserve Carry + +;__LXUNF037__ +%define jc eax + +lxunf: ; in: Carry set iff we should apply mru and 0!=n_mru + mov eax, [edi] ; BE32 displacement with cto8 in low 8 bits + +;__LXUNF386__ 0!=n_mru && 386 + pushf +;__LXUNF387__ ==386 + shr ax, 8 + rol eax, 16 + xchg al, ah +;__LXUNF388__ 0!=n_mru && 386 + popf + jnc unf_store ; do not apply mru + +;__LXUNF486__ >=486 + mov al, byte 0 + bswap eax ; preserve Carry (2-byte instruction) +;__LXUNF487__ 0!=n_mru && >=486 + jnc unf_store ; do not apply mru + +;__LXMRU065__ 0!=n_mru + shr jc, 1 ; eax= jc, or mru index + jnc mru4 ; not 1st time for this jc +;__MRUBYTE3__ + dec hand_l +;__MRUARB30__ + dec hand +;__MRUBITS3__ + and hand, n_mru1 +;__MRUARB40__ + jge mru3 + add hand, n_mru +mru3: +;__LXMRU070__ + + mov [esp + 4*hand], jc ; 1st time: mru[hand] = jc + jmps unf_store + +mru4: ; not 1st time for this jc + lea kh, [jc + hand] ; kh = jc + hand +;__MRUBYTE4__ + movzx kh, kh_l +;__MRUBITS4__ + and kh, n_mru1 +;__MRUARB50__ + cmp kh, n_mru + jb mru5 + sub kh, n_mru +mru5: +;__LXMRU080__ + mov jc, [esp + 4*kh] ; jc = mru[kh] +;__MRUBYTE5__ + dec hand_l +;__MRUARB60__ + dec hand +;__MRUBITS5__ + and hand, n_mru1 +;__MRUARB70__ + jge mru6 + add hand, n_mru +mru6: +;__LXMRU090__ + + mov tmp, [esp + 4*hand] ; tmp = mru[hand] + test tmp,tmp + jnz mru8 + + push jc ; ran out of registers + mov eax, tail + +;__MRUBYTE6__ + dec al +;__MRUARB80__ + dec eax +;__MRUBITS6__ + and eax, n_mru1 +;__MRUARB90__ + jge mru7 + add eax, n_mru +mru7: +;__LXMRU100__ + + xor tmp,tmp + mov tail, eax + xchg [4+ esp + 4*eax], tmp ; tmp = mru[tail]; mru[tail] = 0 + pop jc +mru8: + mov [esp + 4*kh ], tmp ; mru[kh] = tmp + mov [esp + 4*hand], jc ; mru[hand] = jc +;__LXUNF040__ +unf_store: + sub eax, edi + sub ecx, byte 4 + +; one of the next2 +;__LXMRU110__ 0!=n_mru + add eax, addvalue +;__LXMRU111__ 0==n_mru + add eax, esi ; addvalue (same as initial pointer) + +;__LXUNF041__ + mov [edi], eax + add edi, byte 4 + jmps unfcount +;__LXUNF042__ +lxunf0: ;; continuation of entry prolog for unfilter +;__LEXEC016__ bug in APP: jmp and label must be in same .asx/.asy + jmp lxunf0 ; this instr does not really go here! + +;__LXMRU010__ 0!=n_mru + push eax ; cto8_0f +;__LXJMPA00__ only JMP, and not CALL, is filtered + mov al, 0xE9 +;__LXCALLB0__ only CALL, or both CALL and JMP are filtered + mov al, 0xE8 +;__LXUNF021__ common tail + push eax ; cto8_e8e9 + push byte 0 ; tail + push ebx ; n_mru or n_mru1 + mov esi, esp ; flat model "[esi]" saves a byte over "[ebp]" + +;__LXMRU022__ 0==n_mru + pop esi ; addvalue + mov edx, ebx ; dh= cto8 +;__LXJMPA01__ only JMP, and not CALL, is filtered + mov dl, 0xE9 +;__LXCALLB1__ only CALL, or both CALL and JMP are filtered + mov dl, 0xE8 + + +;__MRUBITS1__ + inc hand ; n_mru1 ==> n_mru +;__LXMRU030__ +lxunf1: ; allocate and clear mru[] + push byte 0 + +; one of the next 2, if n_mru +;__MRUBYTE1__ + dec hand_l +;__MRUARB10__ + dec hand + +;__LXMRU040__ 0!=n_mru + jnz lxunf1 ; leaves 0=='hand' + +;__LXUNF030__ +lxctloop: + movzx eax, word [edi] ; 2 bytes, zero extended + inc edi +;__LXJCC000__ + cmp al, 0x80 ; lo of Jcc + jb lxct1 + cmp al, 0x8f ; hi of Jcc + jbe lxunf2 +lxct1: + +;__LXCJ0MRU__ 0==n_mru + sub eax, edx +;__LXCJ1MRU__ 0!=n_mru + sub eax, cto8_e8e9 + +; both CALL and JMP are filtered +;__LXCALJMP__ + sub eax, byte 1+ (0xE9 - 0xE8) ; set Carry iff in range (result: -2, -1) + +; only CALL, or only JMP, is filtered +;__LXCALL00__ 0==n_mru + je lxunf +;__LXCALL01__ 0!=n_rmu + sub eax, byte 1 ; set Carry iff in range + +;__LXCJ2MRU__ 0==n_mru, or apply mru to all that are filtered here + jb lxunf ; only Carry (Borrow) matters +;__LXCJ4MRU__ 0!=n_mru, but apply mru only to subset of filtered here + jnb unfcount ; was not filtered anyway: do not unfilter + +;we will unfilter, and 0!=n_mru, but should we apply mru? +;__LXCJ6MRU__ apply mru to JMP only (0xFF==al) + jpe lxct3 ; jump if even number of 1 bits in al +;__LXCJ7MRU__ apply mru to CALL only (0xFE==al) + jpo lxct3 ; jump if odd number of 1 bits in al +;__LXCJ8MRU__ do not apply mru to one or both + clc +lxct3: + jmps lxunf + +;__LXUNF034__ +unfcount: + dec ecx + jg lxctloop + +;__LXMRU055__ + mov edi, esp ; clear mru[] portion of stack +;__MRUBYTE2__ + mov ecx, 4+ 256 ; unused, tail, cto8_e8e9, cto8_0f +;__MRUBITS2__ + mov ecx, n_mru1 + add ecx, byte 1+ 4 ; n_mru1, tail, cto8_e8e9, cto8_0f +;__MRUARB20__ + mov ecx, n_mru + add ecx, byte 4 ; n_mru, tail, cto8_e8e9, cto8_0f +;__LXMRU057__ + xor eax, eax + rep + stosd + mov esp, edi + +;__LXMRU058__ 0==n_mru + push esi +;__LXUNF035__ + popa + xchg eax, edi + push ecx + push eax + push edx + ret + +%endmacro + ; vi:ts=8:et:nowrap