data cache prefetch

ppc_d_nrv2e.S l_lx_elfppc32.h

committer: jreiser <jreiser> 1109034808 +0000
This commit is contained in:
John Reiser 2005-02-22 01:13:28 +00:00
parent dbc01de9a0
commit 5ce60aad53
2 changed files with 50 additions and 34 deletions

View File

@ -1,4 +1,4 @@
/* l_lx_elfppc32.h -- created from l_lx_elfppc32.bin, 432 (0x1b0) bytes
/* l_lx_elfppc32.h -- created from l_lx_elfppc32.bin, 448 (0x1c0) bytes
This file is part of the UPX executable compressor.
@ -26,35 +26,36 @@
*/
#define LINUX_ELFPPC32_LOADER_ADLER32 0xf2b08e0d
#define LINUX_ELFPPC32_LOADER_CRC32 0x2364b765
#define LINUX_ELFPPC32_LOADER_ADLER32 0x337c930d
#define LINUX_ELFPPC32_LOADER_CRC32 0xac495fea
unsigned char linux_elfppc32_loader[432] = {
72, 0, 1,161,144,166, 0, 0,124,132, 26, 20, 60, 0,128, 0, /* 0x 0 */
61, 32,128, 0, 56, 99,255,255, 56,165,255,255, 57, 64,255,255, /* 0x 10 */
125,168, 2,166, 72, 0, 0, 40, 57, 32, 0, 1,125, 41, 28, 44, /* 0x 20 */
56, 99, 0, 4,124, 9, 0, 64,125, 41, 72, 20, 97, 41, 0, 1, /* 0x 30 */
78,128, 0, 32,141, 3, 0, 1,157, 5, 0, 1,124, 9, 0, 64, /* 0x 40 */
125, 41, 74, 20, 65,162,255,213, 65,129,255,236, 56,224, 0, 1, /* 0x 50 */
72, 0, 0, 20, 56,231,255,255,125, 41, 72, 21, 65,162,255,189, /* 0x 60 */
124,231, 57, 20,125, 41, 72, 21, 65,162,255,177,124,231, 57, 20, /* 0x 70 */
124, 9, 0, 64,125, 41, 74, 20, 65,162,255,161, 65,160,255,216, /* 0x 80 */
57, 0, 0, 0, 52,231,255,253, 84,231, 64, 46, 65,128, 0, 32, /* 0x 90 */
140, 67, 0, 1,124,234, 16,249,125, 74, 14,112, 65,130, 0,136, /* 0x a0 */
112, 66, 0, 1, 65,162, 0, 80, 72, 0, 0, 20,124, 9, 0, 64, /* 0x b0 */
125, 41, 74, 20, 65,162,255,101, 65,161, 0, 60, 57, 0, 0, 1, /* 0x c0 */
124, 9, 0, 64,125, 41, 74, 20, 65,162,255, 81, 65,161, 0, 40, /* 0x d0 */
125, 41, 72, 21, 65,162,255, 69,125, 8, 65, 20,124, 9, 0, 64, /* 0x e0 */
125, 41, 74, 20, 65,162,255, 53, 65,160,255,232, 57, 8, 0, 2, /* 0x f0 */
72, 0, 0, 16,125, 41, 72, 21, 65,162,255, 33,125, 8, 65, 20, /* 0x 100 */
32,234,250,255, 57, 8, 0, 2,125, 8, 1,148,124,234, 42, 20, /* 0x 110 */
125, 9, 3,166,141, 7, 0, 1,157, 5, 0, 1, 66, 0,255,248, /* 0x 120 */
75,255,255, 28,128, 6, 0, 0,125,168, 3,166, 56,165, 0, 1, /* 0x 130 */
56, 99, 0, 1,124,160, 40, 80,124,100, 24, 80,144,166, 0, 0, /* 0x 140 */
78,128, 0, 32,127,200, 2,166, 56,192, 0, 50,128,126, 0, 4, /* 0x 150 */
56,160, 0, 7,124, 99,242, 20, 56,128, 16, 0, 56, 99, 16, 11, /* 0x 160 */
56, 0, 0, 90, 84, 99, 0, 38, 68, 0, 0, 2, 65,131, 0, 32, /* 0x 170 */
124,104, 3,166, 56,193, 0,124,124,101, 27,120,127,233, 3,166, /* 0x 180 */
128,158, 0, 4, 56,126, 0, 12, 78,128, 4, 32,127,224, 0, 8, /* 0x 190 */
148, 33,255,128,188, 65, 0, 4,127,232, 2,166, 75,255,255,169 /* 0x 1a0 */
unsigned char linux_elfppc32_loader[448] = {
72, 0, 1,177,124, 0, 41,236,144,166, 0, 0,124,132, 26, 20, /* 0x 0 */
60, 0,128, 0, 61, 32,128, 0, 56, 99,255,255, 56,165,255,255, /* 0x 10 */
57, 64,255,255,125,168, 2,166, 72, 0, 1, 12, 57, 32, 0, 1, /* 0x 20 */
125, 41, 28, 44, 56, 99, 0, 4,124, 9, 0, 64,125, 41, 72, 20, /* 0x 30 */
97, 41, 0, 1, 78,128, 0, 32,141, 3, 0, 1,157, 5, 0, 1, /* 0x 40 */
124, 9, 0, 64,125, 41, 74, 20, 65,162,255,213, 65,129,255,236, /* 0x 50 */
56,224, 0, 1, 72, 0, 0, 20, 56,231,255,255,125, 41, 72, 21, /* 0x 60 */
65,162,255,189,124,231, 57, 20,125, 41, 72, 21, 65,162,255,177, /* 0x 70 */
124,231, 57, 20,124, 9, 0, 64,125, 41, 74, 20, 65,162,255,161, /* 0x 80 */
65,160,255,216, 57, 0, 0, 0, 52,231,255,253, 84,231, 64, 46, /* 0x 90 */
65,128, 0, 32,140, 67, 0, 1,124,234, 16,249,125, 74, 14,112, /* 0x a0 */
65,130, 0,148,112, 66, 0, 1, 65,162, 0, 80, 72, 0, 0, 20, /* 0x b0 */
124, 9, 0, 64,125, 41, 74, 20, 65,162,255,101, 65,161, 0, 60, /* 0x c0 */
57, 0, 0, 1,124, 9, 0, 64,125, 41, 74, 20, 65,162,255, 81, /* 0x d0 */
65,161, 0, 40,125, 41, 72, 21, 65,162,255, 69,125, 8, 65, 20, /* 0x e0 */
124, 9, 0, 64,125, 41, 74, 20, 65,162,255, 53, 65,160,255,232, /* 0x f0 */
57, 8, 0, 2, 72, 0, 0, 16,125, 41, 72, 21, 65,162,255, 33, /* 0x 100 */
125, 8, 65, 20, 32,234,250,255, 57, 8, 0, 2,125, 8, 1,148, /* 0x 110 */
124,234, 42, 20,125, 9, 3,166,141, 7, 0, 1,157, 5, 0, 1, /* 0x 120 */
66, 0,255,248, 56,224, 1, 0,124, 7, 41,236,124, 7, 26, 44, /* 0x 130 */
75,255,255, 16,128, 6, 0, 0,125,168, 3,166, 56,165, 0, 1, /* 0x 140 */
56, 99, 0, 1,124,160, 40, 80,124,100, 24, 80,144,166, 0, 0, /* 0x 150 */
78,128, 0, 32,127,200, 2,166, 56,192, 0, 50,128,126, 0, 4, /* 0x 160 */
56,160, 0, 7,124, 99,242, 20, 56,128, 16, 0, 56, 99, 16, 11, /* 0x 170 */
56, 0, 0, 90, 84, 99, 0, 38, 68, 0, 0, 2, 65,131, 0, 32, /* 0x 180 */
124,104, 3,166, 56,193, 0,124,124,101, 27,120,127,233, 3,166, /* 0x 190 */
128,158, 0, 4, 56,126, 0, 12, 78,128, 4, 32,127,224, 0, 8, /* 0x 1a0 */
148, 33,255,128,188, 65, 0, 4,127,232, 2,166, 75,255,255,169 /* 0x 1b0 */
};

View File

@ -31,6 +31,8 @@
#include "ppc_regs.h"
SZ_DLINE=128 # size of data cache line in Apple G5
/* Returns 0 on success; non-zero on failure. */
decompress: # (uchar const *src, size_t lsrc, uchar *dst, size_t &ldst)
@ -47,6 +49,8 @@ decompress: # (uchar const *src, size_t lsrc, uchar *dst, size_t &ldst)
#define bits a6
#define disp a7
dcbtst 0,dst # prime dcache for store
stw dst,0(ldst) # original dst
add lsrc,lsrc,src # input eof
@ -57,7 +61,7 @@ decompress: # (uchar const *src, size_t lsrc, uchar *dst, size_t &ldst)
li disp,-1 # initial displacement
mflr t3 # return address
b top_n2e
b bot_n2e
/* jump on next bit, with branch prediction: y==>likely; n==>unlikely
cr0 is set by the cmpl ["compare logical"==>unsigned]:
@ -95,8 +99,8 @@ get32:
lit_n2e:
#define tmp len
lbzu tmp,1(src)
stbu tmp,1(dst)
lbzu tmp,1(src) # tmp= *++src;
stbu tmp,1(dst) # *++dst= tmp;
#undef tmp
top_n2e:
jnextb1y lit_n2e
@ -151,6 +155,17 @@ short_n2e:
stbu tmp,1(dst)
#undef tmp
bdnz+ short_n2e
bot_n2e:
/* This "prefetch for store" is simple, small, and effective. Matches
usually occur more frequently than once per 128 bytes, but G4 line size
is only 32 bytes anyway. Assume that an 'unnecessary' dcbtst costs only
about as much as a hit. The counter register is free at top_n2e, so we could
pace the dcbtst optimally; but that takes 7 or 8 instructions of space.
*/
li back,2*SZ_DLINE
dcbtst back,dst # 2 lines ahead [-1 for stbu]
dcbt back,src # jump start auto prefetch at page boundary
/* Auto prefetch for Read quits at page boundary; needs 2 misses to restart. */
b top_n2e
#undef back