data cache prefetch
ppc_d_nrv2e.S l_lx_elfppc32.h committer: jreiser <jreiser> 1109034808 +0000
This commit is contained in:
parent
dbc01de9a0
commit
5ce60aad53
@ -1,4 +1,4 @@
|
||||
/* l_lx_elfppc32.h -- created from l_lx_elfppc32.bin, 432 (0x1b0) bytes
|
||||
/* l_lx_elfppc32.h -- created from l_lx_elfppc32.bin, 448 (0x1c0) bytes
|
||||
|
||||
This file is part of the UPX executable compressor.
|
||||
|
||||
@ -26,35 +26,36 @@
|
||||
*/
|
||||
|
||||
|
||||
#define LINUX_ELFPPC32_LOADER_ADLER32 0xf2b08e0d
|
||||
#define LINUX_ELFPPC32_LOADER_CRC32 0x2364b765
|
||||
#define LINUX_ELFPPC32_LOADER_ADLER32 0x337c930d
|
||||
#define LINUX_ELFPPC32_LOADER_CRC32 0xac495fea
|
||||
|
||||
unsigned char linux_elfppc32_loader[432] = {
|
||||
72, 0, 1,161,144,166, 0, 0,124,132, 26, 20, 60, 0,128, 0, /* 0x 0 */
|
||||
61, 32,128, 0, 56, 99,255,255, 56,165,255,255, 57, 64,255,255, /* 0x 10 */
|
||||
125,168, 2,166, 72, 0, 0, 40, 57, 32, 0, 1,125, 41, 28, 44, /* 0x 20 */
|
||||
56, 99, 0, 4,124, 9, 0, 64,125, 41, 72, 20, 97, 41, 0, 1, /* 0x 30 */
|
||||
78,128, 0, 32,141, 3, 0, 1,157, 5, 0, 1,124, 9, 0, 64, /* 0x 40 */
|
||||
125, 41, 74, 20, 65,162,255,213, 65,129,255,236, 56,224, 0, 1, /* 0x 50 */
|
||||
72, 0, 0, 20, 56,231,255,255,125, 41, 72, 21, 65,162,255,189, /* 0x 60 */
|
||||
124,231, 57, 20,125, 41, 72, 21, 65,162,255,177,124,231, 57, 20, /* 0x 70 */
|
||||
124, 9, 0, 64,125, 41, 74, 20, 65,162,255,161, 65,160,255,216, /* 0x 80 */
|
||||
57, 0, 0, 0, 52,231,255,253, 84,231, 64, 46, 65,128, 0, 32, /* 0x 90 */
|
||||
140, 67, 0, 1,124,234, 16,249,125, 74, 14,112, 65,130, 0,136, /* 0x a0 */
|
||||
112, 66, 0, 1, 65,162, 0, 80, 72, 0, 0, 20,124, 9, 0, 64, /* 0x b0 */
|
||||
125, 41, 74, 20, 65,162,255,101, 65,161, 0, 60, 57, 0, 0, 1, /* 0x c0 */
|
||||
124, 9, 0, 64,125, 41, 74, 20, 65,162,255, 81, 65,161, 0, 40, /* 0x d0 */
|
||||
125, 41, 72, 21, 65,162,255, 69,125, 8, 65, 20,124, 9, 0, 64, /* 0x e0 */
|
||||
125, 41, 74, 20, 65,162,255, 53, 65,160,255,232, 57, 8, 0, 2, /* 0x f0 */
|
||||
72, 0, 0, 16,125, 41, 72, 21, 65,162,255, 33,125, 8, 65, 20, /* 0x 100 */
|
||||
32,234,250,255, 57, 8, 0, 2,125, 8, 1,148,124,234, 42, 20, /* 0x 110 */
|
||||
125, 9, 3,166,141, 7, 0, 1,157, 5, 0, 1, 66, 0,255,248, /* 0x 120 */
|
||||
75,255,255, 28,128, 6, 0, 0,125,168, 3,166, 56,165, 0, 1, /* 0x 130 */
|
||||
56, 99, 0, 1,124,160, 40, 80,124,100, 24, 80,144,166, 0, 0, /* 0x 140 */
|
||||
78,128, 0, 32,127,200, 2,166, 56,192, 0, 50,128,126, 0, 4, /* 0x 150 */
|
||||
56,160, 0, 7,124, 99,242, 20, 56,128, 16, 0, 56, 99, 16, 11, /* 0x 160 */
|
||||
56, 0, 0, 90, 84, 99, 0, 38, 68, 0, 0, 2, 65,131, 0, 32, /* 0x 170 */
|
||||
124,104, 3,166, 56,193, 0,124,124,101, 27,120,127,233, 3,166, /* 0x 180 */
|
||||
128,158, 0, 4, 56,126, 0, 12, 78,128, 4, 32,127,224, 0, 8, /* 0x 190 */
|
||||
148, 33,255,128,188, 65, 0, 4,127,232, 2,166, 75,255,255,169 /* 0x 1a0 */
|
||||
unsigned char linux_elfppc32_loader[448] = {
|
||||
72, 0, 1,177,124, 0, 41,236,144,166, 0, 0,124,132, 26, 20, /* 0x 0 */
|
||||
60, 0,128, 0, 61, 32,128, 0, 56, 99,255,255, 56,165,255,255, /* 0x 10 */
|
||||
57, 64,255,255,125,168, 2,166, 72, 0, 1, 12, 57, 32, 0, 1, /* 0x 20 */
|
||||
125, 41, 28, 44, 56, 99, 0, 4,124, 9, 0, 64,125, 41, 72, 20, /* 0x 30 */
|
||||
97, 41, 0, 1, 78,128, 0, 32,141, 3, 0, 1,157, 5, 0, 1, /* 0x 40 */
|
||||
124, 9, 0, 64,125, 41, 74, 20, 65,162,255,213, 65,129,255,236, /* 0x 50 */
|
||||
56,224, 0, 1, 72, 0, 0, 20, 56,231,255,255,125, 41, 72, 21, /* 0x 60 */
|
||||
65,162,255,189,124,231, 57, 20,125, 41, 72, 21, 65,162,255,177, /* 0x 70 */
|
||||
124,231, 57, 20,124, 9, 0, 64,125, 41, 74, 20, 65,162,255,161, /* 0x 80 */
|
||||
65,160,255,216, 57, 0, 0, 0, 52,231,255,253, 84,231, 64, 46, /* 0x 90 */
|
||||
65,128, 0, 32,140, 67, 0, 1,124,234, 16,249,125, 74, 14,112, /* 0x a0 */
|
||||
65,130, 0,148,112, 66, 0, 1, 65,162, 0, 80, 72, 0, 0, 20, /* 0x b0 */
|
||||
124, 9, 0, 64,125, 41, 74, 20, 65,162,255,101, 65,161, 0, 60, /* 0x c0 */
|
||||
57, 0, 0, 1,124, 9, 0, 64,125, 41, 74, 20, 65,162,255, 81, /* 0x d0 */
|
||||
65,161, 0, 40,125, 41, 72, 21, 65,162,255, 69,125, 8, 65, 20, /* 0x e0 */
|
||||
124, 9, 0, 64,125, 41, 74, 20, 65,162,255, 53, 65,160,255,232, /* 0x f0 */
|
||||
57, 8, 0, 2, 72, 0, 0, 16,125, 41, 72, 21, 65,162,255, 33, /* 0x 100 */
|
||||
125, 8, 65, 20, 32,234,250,255, 57, 8, 0, 2,125, 8, 1,148, /* 0x 110 */
|
||||
124,234, 42, 20,125, 9, 3,166,141, 7, 0, 1,157, 5, 0, 1, /* 0x 120 */
|
||||
66, 0,255,248, 56,224, 1, 0,124, 7, 41,236,124, 7, 26, 44, /* 0x 130 */
|
||||
75,255,255, 16,128, 6, 0, 0,125,168, 3,166, 56,165, 0, 1, /* 0x 140 */
|
||||
56, 99, 0, 1,124,160, 40, 80,124,100, 24, 80,144,166, 0, 0, /* 0x 150 */
|
||||
78,128, 0, 32,127,200, 2,166, 56,192, 0, 50,128,126, 0, 4, /* 0x 160 */
|
||||
56,160, 0, 7,124, 99,242, 20, 56,128, 16, 0, 56, 99, 16, 11, /* 0x 170 */
|
||||
56, 0, 0, 90, 84, 99, 0, 38, 68, 0, 0, 2, 65,131, 0, 32, /* 0x 180 */
|
||||
124,104, 3,166, 56,193, 0,124,124,101, 27,120,127,233, 3,166, /* 0x 190 */
|
||||
128,158, 0, 4, 56,126, 0, 12, 78,128, 4, 32,127,224, 0, 8, /* 0x 1a0 */
|
||||
148, 33,255,128,188, 65, 0, 4,127,232, 2,166, 75,255,255,169 /* 0x 1b0 */
|
||||
};
|
||||
|
||||
@ -31,6 +31,8 @@
|
||||
|
||||
#include "ppc_regs.h"
|
||||
|
||||
SZ_DLINE=128 # size of data cache line in Apple G5
|
||||
|
||||
/* Returns 0 on success; non-zero on failure. */
|
||||
decompress: # (uchar const *src, size_t lsrc, uchar *dst, size_t &ldst)
|
||||
|
||||
@ -47,6 +49,8 @@ decompress: # (uchar const *src, size_t lsrc, uchar *dst, size_t &ldst)
|
||||
#define bits a6
|
||||
#define disp a7
|
||||
|
||||
dcbtst 0,dst # prime dcache for store
|
||||
|
||||
stw dst,0(ldst) # original dst
|
||||
add lsrc,lsrc,src # input eof
|
||||
|
||||
@ -57,7 +61,7 @@ decompress: # (uchar const *src, size_t lsrc, uchar *dst, size_t &ldst)
|
||||
li disp,-1 # initial displacement
|
||||
|
||||
mflr t3 # return address
|
||||
b top_n2e
|
||||
b bot_n2e
|
||||
|
||||
/* jump on next bit, with branch prediction: y==>likely; n==>unlikely
|
||||
cr0 is set by the cmpl ["compare logical"==>unsigned]:
|
||||
@ -95,8 +99,8 @@ get32:
|
||||
|
||||
lit_n2e:
|
||||
#define tmp len
|
||||
lbzu tmp,1(src)
|
||||
stbu tmp,1(dst)
|
||||
lbzu tmp,1(src) # tmp= *++src;
|
||||
stbu tmp,1(dst) # *++dst= tmp;
|
||||
#undef tmp
|
||||
top_n2e:
|
||||
jnextb1y lit_n2e
|
||||
@ -151,6 +155,17 @@ short_n2e:
|
||||
stbu tmp,1(dst)
|
||||
#undef tmp
|
||||
bdnz+ short_n2e
|
||||
bot_n2e:
|
||||
/* This "prefetch for store" is simple, small, and effective. Matches
|
||||
usually occur more frequently than once per 128 bytes, but G4 line size
|
||||
is only 32 bytes anyway. Assume that an 'unnecessary' dcbtst costs only
|
||||
about as much as a hit. The counter register is free at top_n2e, so we could
|
||||
pace the dcbtst optimally; but that takes 7 or 8 instructions of space.
|
||||
*/
|
||||
li back,2*SZ_DLINE
|
||||
dcbtst back,dst # 2 lines ahead [-1 for stbu]
|
||||
dcbt back,src # jump start auto prefetch at page boundary
|
||||
/* Auto prefetch for Read quits at page boundary; needs 2 misses to restart. */
|
||||
b top_n2e
|
||||
#undef back
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user