src/stub: flush datacache before msync

This commit is contained in:
Markus F.X.J. Oberhumer 2025-04-29 17:44:47 +02:00
parent 5cec09966e
commit e509bb87e9
7 changed files with 101 additions and 5 deletions

View File

@ -107,6 +107,13 @@ Psync: .globl Psync
bic r12,arg1,r12 // lo frag
sub arg1,arg1,r12 // page align lo end
add arg2,arg2,r12
stmdb sp!,{r0,r1,r2} // lo, len, ??
add r1,r1,r0 // hi
mov r2,#0 // might be CSSELR_DCACHE from linux/arch/arm/include/asm/cachetype.h
do_sys2 __ARM_NR_cacheflush
ldmia sp!,{r0,r1,r2}
do_sys __NR_msync; ret
mmap_privanon: .globl mmap_privanon

View File

@ -30,6 +30,9 @@
#define ARM_OLDABI 1
#include "arch/arm/v4a/macros.S"
#include "MAX_ELF_HDR.S"
__NR_SYSCALL_BASE = 0
__ARM_NR_BASE= 0xf0000 + __NR_SYSCALL_BASE
__ARM_NR_cacheflush = 2 + __ARM_NR_BASE
NBPW= 4
sz_Elf32_Ehdr = 13*4
@ -139,6 +142,13 @@ Psync: .globl Psync
bic r12,arg1,r12
sub arg1,arg1,r12
add arg2,arg2,r12
stmdb sp!,{r0,r1,r2} // lo, len, ??
add r1,r1,r0 // hi
mov r2,#0 // might be CSSELR_DCACHE from linux/arch/arm/include/asm/cachetype.h
do_sys2 __ARM_NR_cacheflush
ldmia sp!,{r0,r1,r2}
b msync
L05:

View File

@ -128,6 +128,44 @@ Psync: .globl Psync
bic x8,x0,x8
sub x0,x0,x8
add x1,x1,x8
// Sync contents of data cache into RAM.
// Linux should do this implicitly, but apparently not.
CTR_IDC_SHIFT= 28
CTR_DIC_SHIFT= 29
mov x3,x0 // lo
add x4,x0,x1 // hi
sync_cache_range: // (void *lo= x3, void *const hi= x4)
mrs x6,ctr_el0
tbnz w6,#CTR_IDC_SHIFT,dc_not_dirty
ubfx x5,x6,#16,#4 // -2+ log2(dline_size)
mov x8,#-4; lsl x8,x8,x5 // sz_dline mask
and x5,x8,x3 // round down to dc line
// cmp x5,x4; b.hs dc_done
dc_loop:
dc cvau,x5 // sync dline
sub x5,x5,x8 // next dline
cmp x5,x4; b.lo dc_loop
//dc_done:
dc_not_dirty:
dsb ish // why here if dc not dirty?
tbnz w6,#CTR_DIC_SHIFT,ic_not_dirty
and x6,x6,#0xf // -2+ log2(iline_size)
mov x8,#-4; lsl x8,x8,x6 // sz_iline mask
and x3,x8,x3 // round down to ic line
// cmp x3,x4; b.hs ic_done
ic_loop:
ic ivau,x3 // sync iline
sub x3,x3,x8 // next iline
cmp x3,x4; b.lo ic_loop
//ic_done:
dsb ish
ic_not_dirty:
isb
// fall into msync
msync: .globl msync
do_sys __NR_msync; ret

View File

@ -91,6 +91,42 @@ Psync: .globl Psync
bic x8,x0,x8
sub x0,x0,x8
add x1,x1,x8
// Sync contents of data cache into RAM.
// Linux should do this implicitly, but apparently not.
CTR_IDC_SHIFT= 28
CTR_DIC_SHIFT= 29
mov x3,x0 // lo
add x4,x0,x1 // hi
sync_cache_range: // (void *lo= x3, void *const hi= x4)
mrs x6,ctr_el0
tbnz w6,#CTR_IDC_SHIFT,dc_not_dirty
ubfx x5,x6,#16,#4 // -2+ log2(dline_size)
mov x8,#-4; lsl x8,x8,x5 // sz_dline mask
and x5,x8,x3 // round down to dc line
// cmp x5,x4; b.hs dc_done
dc_loop:
dc cvau,x5 // sync dline
sub x5,x5,x8 // next dline
cmp x5,x4; b.lo dc_loop
//dc_done:
dc_not_dirty:
dsb ish // why here if dc not dirty?
tbnz w6,#CTR_DIC_SHIFT,ic_not_dirty
and x6,x6,#0xf // -2+ log2(iline_size)
mov x8,#-4; lsl x8,x8,x6 // sz_iline mask
and x3,x8,x3 // round down to ic line
// cmp x3,x4; b.hs ic_done
ic_loop:
ic ivau,x3 // sync iline
sub x3,x3,x8 // next iline
cmp x3,x4; b.lo ic_loop
//ic_done:
dsb ish
ic_not_dirty:
isb
// fall into msync
do_sys __NR_msync; ret
fold: // enter here (x0= &so_info; x1= &{argc,argv,envp,lr}
@ -206,11 +242,6 @@ brk:
readlink:
do_sys __NR_readlink; ret
.globl __sync_cache_range
__sync_cache_range: // (void *lo, void *hi)
#include "arm64-sync-cache-range.S"
ret
get_sys_munmap: .globl get_sys_munmap // r0= system call instruction
#if defined(ARMEL_DARWIN) /*{*/
ldr w0,4*1 + munmap

View File

@ -334,6 +334,14 @@ Psync: .globl Psync
and TMP,a0,v0
sub a0,TMP
add a1,TMP
__NR_cacheflush = 147+ __NR_Linux
/* asm/cachectl.h */
ICACHE= 1<<0
DCACHE= 1<<1
li a2,DCACHE
li v0,__NR_cacheflush; syscall // ignore failure
li v0,__NR_msync; syscall
jr ra
addiu sp,2*NBPW

View File

@ -86,6 +86,7 @@ no_unf:
POP2 a0,a1 // MATCH_81 dst, len
add a1,a1,a0 // lo, hi
addi a1,a1,-1 // highest covered addr
CACHELINE=32
ori a0,a0,-1+ CACHELINE // highest addr on cache line

View File

@ -86,6 +86,7 @@ no_unf:
POP2 a0,a1 // MATCH_81 dst, len
add a1,a1,a0 // lo, hi
addi a1,a1,-1 // highest covered addr
CACHELINE=32
ori a0,a0,-1+ CACHELINE // highest addr on cache line