; sys_icache_invalidate - invalidate instruction cache for an address range. ; specific to the Power PC and Mac OS X (due to use of ; __cpu_capabilities). ; The only reason to write this much assembler for this simple job is that ; some revisions of the Power PC processor had bugs in this area that require ; precise ordering and timing of the instructions that affect the cache. ; This isn't in the processor manuals, it's only in the errata sheets. /* need this to write code that will assemble as dynamic or static */ #include #undef PICIFY_REG #define PICIFY_REG r6 ; Default to 32 bytes per cache line for the cases where _cpu_capabilities ; is not yet set up. This gives slower, but correct behaviour, even on ; processors with a different cache-line size. .text .align 4, 0x0 /* sys_icache_invalidate(char *start, long len) */ .globl _sys_icache_invalidate _sys_icache_invalidate: mr. r4, r4 beqlr cr0 ; if (length == 0) return ; we trash r5 - r9, but apparently these are scratch registers ; for this algorithm, we need to know the cache line size. ; get the cpu capabilities info, mask it off & shift. EXTERN_TO_REG(r7, __cpu_capabilities) ; cache_line_size = (_cpu_capabilities&(kCache32|kCache64|kCache128))<<3 rlwinm. r8, r7, 3, 24, 26 bne+ cr0, 2f ; if (cache_line_size == 0) li r8, 32 ; cache_line_size = 32; 2: neg r9, r8 ; r9 = cache_line_mask = ~(cache_line_size - 1) add r5, r3, r4 ; r5 = last_cache_line address=start+length-1 addi r5, r5, -1 ; r3 = first_cache_line address, rounded down to start of line and r3, r3, r9 mr r6, r3 ; save start addr. ; loop over address range, flushing data cache 3: dcbf 0, r6 ; flush data cache for address in r6 add r6, r6, r8 ; move to next cache line cmplw r6, r5 ; flush all lines <= addr. in r5 ble+ 3b ; if more cache lines left, repeat sync ; wait until all bus operations have finished ; loop over the same range, this time invalidating the instruction cache 4: icbi 0, r3 ; invalidate instruction cache line for r3 add r3, r3, r8 ; move to next cache line cmplw r3, r5 ; invalidate all lines <= addr. in r5 ble+ 4b ; if more cache lines left, repeat sync ; wait until all bus operations have finished isync ; toss any speculatively executed instructions blr