Reorganize dos/exe. No stub changes.

2007-03-04 15:14:44 +01:00 · 2007-03-04 15:14:44 +01:00 · 66418ffc8c
commit 66418ffc8c
parent b4c4b1fb70
3 changed files with 354 additions and 250 deletions
--- a/src/stub/src/arch/i086/lzma_d.S
+++ b/src/stub/src/arch/i086/lzma_d.S
@ -39,192 +39,14 @@


 /*************************************************************************
-// support macros
-**************************************************************************/
-
-// huge pointer diff: dx:ax = dx:ax - cx:bx
-// !!! this version does nothing !!!
-.macro M_aNahdiff
-        add     sp, 8
-.endm
-
-
-// umul32: dx:ax = dx:ax * 00:bx
-.macro  M_LMUL_dxax_00bx
-        // mult high-word
-        mov     cx, ax      // save ax
-        mov     ax, dx
-        mul     bx
-        xchg    ax, cx      // save high-word result, get saved ax
-        // mult low-word
-        mul     bx          // dx:ax := ax * bx
-        // add high-word
-        add     dx, cx      // add high-word result
-.endm
-
-// umul32: dx:ax = dx:ax * word ptr [bx]
-.macro  M_LMUL_dxax_00bx_ptr
-        // mult high-word
-        mov     cx, ax      // save ax
-        mov     ax, dx
-        mul     word ptr [bx]
-        xchg    ax, cx      // save high-word result, get saved ax
-        // mult low-word
-        mul     word ptr [bx]
-        // add high-word
-        add     dx, cx      // add high-word result
-.endm
-
-
-// umul32: dx:ax = ax:cx * 00:bx
-.macro  M_LMUL_axcx_00bx
-        // mult high-word
-        mul     bx
-        xchg    ax, cx      // save high-word result, get low
-        // mult low-word
-        mul     bx
-        // add high-word
-        add     dx, cx      // add high-word result
-.endm
-
-
-// shld: dx:ax <<= 8
-.macro  M_shld_8
-        mov     dh, dl
-        mov     dl, ah
-        mov     ah, al
-        xor     al, al
-.endm
-
-.macro  M_shld_8_bp h l
-        mov     dx, word ptr[bp+h]
-        mov     ax, word ptr[bp+l]
-        M_shld_8
-        mov     word ptr[bp+h], dx
-        mov     word ptr[bp+l], ax
-.endm
-
-
-// shld: dx:ax >>= 11
-.macro  M_shrd_11
-#if 0
-        mov     al, ah
-        mov     ah, dl
-        mov     dl, dh
-        xor     dh, dh
-        shr     dx
-        rcr     ax
-        shr     dx
-        rcr     ax
-        shr     dx
-        rcr     ax
-#else
-    // WARNING: this trashes "bx" !
-        mov     cl, 11
-        mov     bx, dx      // save dx
-        shr     ax, cl
-        shr     dx, cl
-        mov     cl, 5       // cl = 16 - cl
-        shl     bx, cl
-        or      ax, bx
-#endif
-.endm
-
-.macro  M_shrd_11_bp h l
-        mov     dx, word ptr[bp+h]
-        mov     ax, word ptr[bp+l]
-        M_shrd_11
-        mov     word ptr[bp+h], dx
-        mov     word ptr[bp+l], ax
-.endm
-
-.macro  M_shrd_11_disi_bp h l
-        mov     dx, di
-        mov     ax, si
-        M_shrd_11
-        mov     word ptr[bp+h], dx
-        mov     word ptr[bp+l], ax
-.endm
-
-
-// shld: dx:ax <<= cl; trashes cl and register "r1" (bx, di, si or bp)
-//   REQUIRED: 0 <= cl <= 15
-.macro  M_shld_00_15 r1
-        mov     r1, ax      // save ax
-        shl     dx, cl
-        shl     ax, cl
-        sub     cl, 16
-        neg     cl          // cl = 16 - cl
-        shr     r1, cl
-        or      dx, r1
-.endm
-
-// shld: dx:ax <<= cl; trashes cl
-//   REQUIRED: 16 <= cl <= 32
-.macro  M_shld_16_32
-        sub     cl, 16
-        shl     ax, cl
-        mov     dx, ax
-        xor     ax, ax
-.endm
-
-// shld: dx:ax <<= cl; trashes cl and register "r1" (bx, di, si or bp)
-//   REQUIRED: 0 <= cl <= 32
-.macro  M_shld r1
-        local   L1, L2
-        cmp     cl, 16
-        jaes    L1
-// 0 <= cl <= 15
-        M_shld_00_15 r1
-        jmps    L2
-L1:
-// 16 <= cl <= 32
-        M_shld_16_32
-L2:
-.endm
-
-
-/*************************************************************************
-// support code (see cleanasm.py)
+//
 **************************************************************************/

 section LZMA_DEC99
-
-.macro  M_PIA
-        M_WCC_PIA
-.endm
-.macro  M_PIA1
-#if 1
-        local   L1
-        inc     ax
-        jnes    L1
-        //add     dx, __AHINCR
-        add     dh, __AHINCR >> 8
-L1:
-#else
-        add     ax, 1
-        sbb     bl, bl
-        and     bl, __AHINCR >> 8
-        add     dh, bl
-#endif
-.endm
-.macro  M_PTC
-        M_WCC_PTC
-.endm
-.macro  M_PTC_JNE l
-        cmp     ax, bx
-        jnes    l
-        cmp     dx, cx
-        jnes    l
-.endm
-.macro  M_PTS
+.macro LZMA_DEC99_dummy
 .endm


-/*************************************************************************
-//
-**************************************************************************/
-
 // init
 section LZMA_DEC00
        //.byte   0xcc
@ -288,13 +110,23 @@ section LZMA_DEC00
    ignore_reloc_overflow lzma_properties


+//#define ret
 section LZMA_DEC10
 .arch   i8086, nojumps
+#define SMALL 1
+#include "lzma_m.h"
 #include "lzma_d_cs.S"
+#undef SMALL
 section LZMA_DEC20
 .arch   i8086, nojumps
+#define SMALL 1
+//#define FAST 1
+#include "lzma_m.h"
 #include "lzma_d_cf.S"
+//#undef FAST
+#undef SMALL
 .arch   i8086, jumps
+#undef ret


 // cleanup
--- a/src/stub/src/arch/i086/lzma_m.h
+++ b/src/stub/src/arch/i086/lzma_m.h
@ -0,0 +1,340 @@
+/*
+;  lzma_m.h -- 16-bit assembly
+;
+;  This file is part of the UPX executable compressor.
+;
+;  Copyright (C) 2006-2007 Markus Franz Xaver Johannes Oberhumer
+;  All Rights Reserved.
+;
+;  UPX and the UCL library are free software; you can redistribute them
+;  and/or modify them under the terms of the GNU General Public License as
+;  published by the Free Software Foundation; either version 2 of
+;  the License, or (at your option) any later version.
+;
+;  This program is distributed in the hope that it will be useful,
+;  but WITHOUT ANY WARRANTY; without even the implied warranty of
+;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;  GNU General Public License for more details.
+;
+;  You should have received a copy of the GNU General Public License
+;  along with this program; see the file COPYING.
+;  If not, write to the Free Software Foundation, Inc.,
+;  59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+;
+;  Markus F.X.J. Oberhumer
+;  <markus@oberhumer.com>
+;  http://www.oberhumer.com/opensource/upx/
+;
+*/
+
+
+#ifndef __AHSHIFT
+
+/* DOS real mode */
+#define __AHSHIFT        12
+#define __AHINCR         (1 << __AHSHIFT)  /* 4096 */
+
+
+/*************************************************************************
+// support macros: PIA, PIS, PTS, PTC
+**************************************************************************/
+
+// huge pointer add: dx:ax = dx:ax + cx:bx
+.macro  M_PIA_small
+        add     ax, bx
+        adc     cx, 0
+#if 1
+        // code size: 8 bytes
+        // i086: 2+4+56+3 == 65 clocks
+        // i286: 2+2+17+2 == 23 clocks
+        mov     bx, cx
+        mov     cl, __AHSHIFT
+        shl     bx, cl
+        add     dx, bx
+#else
+        // code size: 8 bytes
+        // i086: 2+4+20+3 == 29 clocks
+        // i286: 2+2+ 9+2 == 15 clocks
+        mov     bl, cl
+        mov     cl, __AHSHIFT - 8
+        shl     bl, cl
+        add     dh, bl
+#endif
+.endm
+
+.macro  M_PIA_fast
+        add     ax, bx
+        adc     cx, 0
+        // code size: 10 bytes
+        // i086: 4*2+3 == 11 clocks
+        // i286: 4*2+2 == 10 clocks
+        shl     cl
+        shl     cl
+        shl     cl
+        shl     cl
+        add     dh, cl
+.endm
+
+
+.macro  M_PIA1_small
+        // code size: 6 bytes
+        // i086: 3+16   == 19 clocks (jump taken)
+        // i286: 2+ 7+m ~~ 11 clocks (jump taken)
+        local   L1
+        inc     ax
+        jnes    L1
+        //add     dx, __AHINCR
+        add     dh, __AHINCR >> 8
+L1:
+.endm
+
+.macro  M_PIA1_fast
+    // WARNING: this trashes "bx" !
+        // code size: 10 bytes
+        // i086: 3+3+3+3 == 12 clocks
+        // i286: 2+2+2+2 ==  8 clocks
+        add     ax, 1
+        sbb     bl, bl
+        and     bl, __AHINCR >> 8
+        add     dh, bl
+.endm
+
+
+#if 0
+// huge pointer sub: dx:ax = dx:ax - cx:bx
+.macro  M_PIS
+        sub     ax, bx
+        adc     cx, 0
+        mov     bx, cx
+        mov     cl, __AHSHIFT
+        shl     bx, cl
+        sub     dx, bx
+.endm
+#endif
+
+
+// huge pointer diff: dx:ax = dx:ax - cx:bx
+.macro  M_PTS
+#if 0
+// normalize
+// FIXME
+// subtract
+        sub     ax, bx
+        sbb     dx, cx
+#endif
+.endm
+
+
+// huge pointer compare: set zero and carry flags: dx:ax cmp cx:bx
+.macro  M_PTC
+// NOTE: no pointer normalization!
+        local   L1
+        cmp     dx, cx
+        jnes    L1
+        cmp     ax, bx
+L1:
+.endm
+
+.macro  M_PTC_JNE l
+        cmp     ax, bx
+        jnes    l
+        cmp     dx, cx
+        jnes    l
+.endm
+
+
+/*************************************************************************
+// support macros: LMUL, shld, shrd
+**************************************************************************/
+
+// umul32: dx:ax = dx:ax * 00:bx
+.macro  M_LMUL_dxax_00bx
+        // mult high-word
+        mov     cx, ax      // save ax
+        mov     ax, dx
+        mul     bx
+        xchg    ax, cx      // save high-word result, get saved ax
+        // mult low-word
+        mul     bx          // dx:ax := ax * bx
+        // add high-word
+        add     dx, cx      // add high-word result
+.endm
+
+
+// umul32: dx:ax = dx:ax * word ptr [bx]
+.macro  M_LMUL_dxax_00bx_ptr
+        // mult high-word
+        mov     cx, ax      // save ax
+        mov     ax, dx
+        mul     word ptr [bx]
+        xchg    ax, cx      // save high-word result, get saved ax
+        // mult low-word
+        mul     word ptr [bx]
+        // add high-word
+        add     dx, cx      // add high-word result
+.endm
+
+
+// umul32: dx:ax = ax:cx * 00:bx
+.macro  M_LMUL_axcx_00bx
+        // mult high-word
+        mul     bx
+        xchg    ax, cx      // save high-word result, get low
+        // mult low-word
+        mul     bx
+        // add high-word
+        add     dx, cx      // add high-word result
+.endm
+
+
+// shld: dx:ax <<= 8
+.macro  M_shld_8
+        mov     dh, dl
+        mov     dl, ah
+        mov     ah, al
+        xor     al, al
+.endm
+
+.macro  M_shld_8_bp h l
+        mov     dx, word ptr[bp+h]
+        mov     ax, word ptr[bp+l]
+        M_shld_8
+        mov     word ptr[bp+h], dx
+        mov     word ptr[bp+l], ax
+.endm
+
+
+// shld: dx:ax <<= cl; trashes cl and register "r1" (bx, di, si or bp)
+//   REQUIRED: 0 <= cl <= 15
+.macro  M_shld_00_15 r1
+        mov     r1, ax      // save ax
+        shl     dx, cl
+        shl     ax, cl
+        sub     cl, 16
+        neg     cl          // cl = 16 - cl
+        shr     r1, cl
+        or      dx, r1
+.endm
+
+// shld: dx:ax <<= cl; trashes cl
+//   REQUIRED: 16 <= cl <= 32
+.macro  M_shld_16_32
+        sub     cl, 16
+        shl     ax, cl
+        mov     dx, ax
+        xor     ax, ax
+.endm
+
+// shld: dx:ax <<= cl; trashes cl and register "r1" (bx, di, si or bp)
+//   REQUIRED: 0 <= cl <= 32
+.macro  M_shld r1
+        local   L1, L2
+        cmp     cl, 16
+        jaes    L1
+// 0 <= cl <= 15
+        M_shld_00_15 r1
+        jmps    L2
+L1:
+// 16 <= cl <= 32
+        M_shld_16_32
+L2:
+.endm
+
+
+// shld: dx:ax >>= 11
+.macro  M_shrd_11_small
+    // WARNING: this trashes "bx" !
+        // code size: 14 bytes
+        // i086: 4+2+52+52+4+28+3 == 145 clocks
+        // i286: 2+2+16+16+2+10+2 ==  50 clocks
+        mov     cl, 11
+        mov     bx, dx      // save dx
+        shr     ax, cl
+        shr     dx, cl
+        mov     cl, 5       // cl = 16 - cl
+        shl     bx, cl
+        or      ax, bx
+.endm
+
+.macro  M_shrd_11_fast
+        // code size: 20 bytes
+        // i086: 21 clocks
+        // i286: 20 clocks
+        mov     al, ah
+        mov     ah, dl
+        mov     dl, dh
+        xor     dh, dh
+        shr     dx
+        rcr     ax
+        shr     dx
+        rcr     ax
+        shr     dx
+        rcr     ax
+.endm
+
+
+.macro  M_shrd_11_bp_small h l
+        mov     dx, word ptr[bp+h]
+        mov     ax, word ptr[bp+l]
+        M_shrd_11_small
+        mov     word ptr[bp+h], dx
+        mov     word ptr[bp+l], ax
+.endm
+
+.macro  M_shrd_11_bp_fast h l
+        mov     dx, word ptr[bp+h]
+        mov     ax, word ptr[bp+l]
+        M_shrd_11_fast
+        mov     word ptr[bp+h], dx
+        mov     word ptr[bp+l], ax
+.endm
+
+
+.macro  M_shrd_11_disi_bp_small h l
+        mov     dx, di
+        mov     ax, si
+        M_shrd_11_small
+        mov     word ptr[bp+h], dx
+        mov     word ptr[bp+l], ax
+.endm
+
+.macro  M_shrd_11_disi_bp_fast h l
+        mov     dx, di
+        mov     ax, si
+        M_shrd_11_fast
+        mov     word ptr[bp+h], dx
+        mov     word ptr[bp+l], ax
+.endm
+
+
+/*************************************************************************
+//
+**************************************************************************/
+
+#endif /* ifndef __AHSHIFT */
+
+
+#undef M_PIA
+#undef M_PIA1
+#undef M_shrd_11
+#undef M_shrd_11_bp
+#undef M_shrd_11_disi_bp
+
+#if defined(FAST)
+#  define M_PIA                 M_PIA_fast
+#  define M_PIA1                M_PIA1_fast
+#  define M_shrd_11             M_shrd_11_fast
+#  define M_shrd_11_bp          M_shrd_11_bp_fast
+#  define M_shrd_11_disi_bp     M_shrd_11_disi_bp_fast
+#elif defined(SMALL)
+#  define M_PIA                 M_PIA_small
+#  define M_PIA1                M_PIA1_small
+#  define M_shrd_11             M_shrd_11_small
+#  define M_shrd_11_bp          M_shrd_11_bp_small
+#  define M_shrd_11_disi_bp     M_shrd_11_disi_bp_small
+#else
+#  error
+#endif
+
+
+// vi:ts=4:et
--- a/src/stub/src/arch/i086/macros.S
+++ b/src/stub/src/arch/i086/macros.S
@ -136,76 +136,6 @@
 #define         jnes    jnzs


-/*
-; =============
-; ============= HUGE POINTER SUPPORT
-; =============
-*/
-
-/* DOS real mode */
-#define __AHSHIFT        12
-#define __AHINCR         (1 << __AHSHIFT)  /* 4096 */
-
-
-#if 1
-// huge pointer add: dx:ax = dx:ax + cx:bx
-.macro          M_WCC_PIA
-                add     ax, bx
-                adc     cx, 0
-                mov     bx, cx
-                mov     cl, __AHSHIFT
-                shl     bx, cl
-                add     dx, bx
-.endm
-#endif
-
-
-#if 0
-// huge pointer sub: dx:ax = dx:ax - cx:bx
-.macro          M_WCC_PIS
-                sub     ax, bx
-                adc     cx, 0
-                mov     bx, cx
-                mov     cl, __AHSHIFT
-                shl     bx, cl
-                sub     dx, bx
-.endm
-#endif
-
-
-#if 0
-// huge pointer diff: dx:ax = dx:ax - cx:bx
-.macro          M_WCC_PTS
-// normalize
-// FIXME
-// subtract
-                sub     ax, bx
-                sbb     dx, cx
-.endm
-#endif
-
-
-#if 1
-// huge pointer compare: set zero and carry flags: dx:ax cmp cx:bx
-.macro          M_WCC_PTC
-// FIXME: should we normalize the pointers ???
-                local   L1
-                cmp     dx, cx
-                jnes    L1
-                cmp     ax, bx
-L1:
-.endm
-#endif
-
-
-#if 0
-// umul32: dx:ax = dx:ax * cx:bx
-.macro          M_WCC_U4M
-// FIXME
-.endm
-#endif
-
-
 /*
 ; =============
 ; ============= 16-BIT CALLTRICK & JUMPTRICK
@ -237,6 +167,7 @@ section         CALLTRI2
                lodsw
                loop    cjt16_L1

+
 /*
 ; =============
 */
@ -272,6 +203,7 @@ section         CALLTRI6
 cjt16_L2:
 .endm

+
 /*
 ; vi:ts=8:et:nowrap
 */