/*
* Copyright (c) 2007-2014 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
* This file contains Original Code and/or Modifications of Original Code
* as defined in and that are subject to the Apple Public Source License
* Version 2.0 (the 'License'). You may not use this file except in
* compliance with the License. The rights granted to you under the License
* may not be used to create, or enable the creation or redistribution of,
* unlawful or unlicensed copies of an Apple operating system, or to
* circumvent, violate, or enable the circumvention or violation of, any
* terms of an Apple operating system software license agreement.
*
* Please obtain a copy of the License at
* http://www.opensource.apple.com/apsl/ and read it before using this file.
*
* The Original Code and all software distributed under the License are
* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
* Please see the License for the specific language governing rights and
* limitations under the License.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
*/
#include <arm/asm.h>
#include <arm/proc_reg.h>
#include <mach_kdp.h>
#include "assym.s"
.text
.align 12
.align 2
.globl EXT(resume_idle_cpu)
LEXT(resume_idle_cpu)
// r0 set to BootArgs phys address
// r1 set to cpu data phys address
LOAD_ADDR(lr, arm_init_idle_cpu)
b L_start_cpu_0
.globl EXT(start_cpu)
LEXT(start_cpu)
// r0 set to BootArgs phys address
// r1 set to cpu data phys address
LOAD_ADDR(lr, arm_init_cpu)
b L_start_cpu_0
L_start_cpu_0:
cpsid if // Disable IRQ FIQ
// Turn on L1 I-Cache, Branch prediction early
mcr p15, 0, r11, c7, c5, 0 // invalidate the icache
isb // before moving on
mrc p15, 0, r11, c1, c0, 0 // read mmu control into r11
orr r11, r11, #(SCTLR_ICACHE | SCTLR_PREDIC) // enable i-cache, b-prediction
mcr p15, 0, r11, c1, c0, 0 // set mmu control
dsb // ensure mmu settings are inplace
isb // before moving on
// Get the kernel's phys & virt addr, and size from BootArgs
ldr r8, [r0, BA_PHYS_BASE] // Get the phys base in r8
ldr r9, [r0, BA_VIRT_BASE] // Get the virt base in r9
ldr r10, [r0, BA_MEM_SIZE] // Get the mem size in r10
// Set the base of the translation table into the MMU
ldr r4, [r0, BA_TOP_OF_KERNEL_DATA] // Get the top of kernel data
orr r5, r4, #(TTBR_SETUP & 0x00FF) // Setup PTWs memory attribute
orr r5, r5, #(TTBR_SETUP & 0xFF00) // Setup PTWs memory attribute
mcr p15, 0, r5, c2, c0, 0 // write kernel to translation table base 0
mcr p15, 0, r5, c2, c0, 1 // also to translation table base 1
mov r5, #TTBCR_N_1GB_TTB0 // identify the split between 0 and 1
mcr p15, 0, r5, c2, c0, 2 // and set up the translation control reg
ldr r2, [r1, CPU_NUMBER_GS] // Get cpu number
mcr p15, 0, r2, c13, c0, 3 // Write TPIDRURO
ldr sp, [r1, CPU_INTSTACK_TOP] // Get interrupt stack top
sub sp, sp, SS_SIZE // Set stack pointer
sub r0, r1, r8 // Convert to virtual address
add r0, r0, r9
b join_start
.align 2
.globl EXT(_start)
LEXT(_start)
// r0 has the boot-args pointer
// r1 set to zero
mov r1, #0
LOAD_ADDR(lr, arm_init)
cpsid if // Disable IRQ FIQ
// Turn on L1 I-Cache, Branch prediction early
mcr p15, 0, r11, c7, c5, 0 // invalidate the icache
isb // before moving on
mrc p15, 0, r11, c1, c0, 0 // read mmu control into r11
orr r11, r11, #(SCTLR_ICACHE | SCTLR_PREDIC) // enable i-cache, b-prediction
mcr p15, 0, r11, c1, c0, 0 // set mmu control
dsb // ensure mmu settings are inplace
isb // before moving on
// Get the kernel's phys & virt addr, and size from boot_args.
ldr r8, [r0, BA_PHYS_BASE] // Get the phys base in r8
ldr r9, [r0, BA_VIRT_BASE] // Get the virt base in r9
ldr r10, [r0, BA_MEM_SIZE] // Get the mem size in r10
#define LOAD_PHYS_ADDR(reg, label) \
LOAD_ADDR(reg, label) add reg, reg, r8
// Take this opportunity to patch the targets for the exception vectors
LOAD_ADDR(r4, fleh_reset)
LOAD_PHYS_ADDR(r5, ExceptionVectorsTable)
str r4, [r5]
LOAD_ADDR(r4, fleh_undef)
add r5, #4
str r4, [r5]
LOAD_ADDR(r4, fleh_swi)
add r5, #4
str r4, [r5]
LOAD_ADDR(r4, fleh_prefabt)
add r5, #4
str r4, [r5]
LOAD_ADDR(r4, fleh_dataabt)
add r5, #4
str r4, [r5]
LOAD_ADDR(r4, fleh_addrexc)
add r5, #4
str r4, [r5]
LOAD_ADDR(r4, fleh_irq)
add r5, #4
str r4, [r5]
LOAD_ADDR(r4, fleh_decirq)
add r5, #4
str r4, [r5]
// arm_init_tramp is sensitive, so for the moment, take the opportunity to store the
// virtual address locally, so that we don't run into issues retrieving it later.
// This is a pretty miserable solution, but it should be enough for the moment
LOAD_ADDR(r4, arm_init_tramp)
adr r5, arm_init_tramp_addr
str r4, [r5]
#undef LOAD_PHYS_ADDR
// Set the base of the translation table into the MMU
ldr r4, [r0, BA_TOP_OF_KERNEL_DATA] // Get the top of kernel data
orr r5, r4, #(TTBR_SETUP & 0x00FF) // Setup PTWs memory attribute
orr r5, r5, #(TTBR_SETUP & 0xFF00) // Setup PTWs memory attribute
mcr p15, 0, r5, c2, c0, 0 // write kernel to translation table base 0
mcr p15, 0, r5, c2, c0, 1 // also to translation table base 1
mov r5, #TTBCR_N_1GB_TTB0 // identify the split between 0 and 1
mcr p15, 0, r5, c2, c0, 2 // and set up the translation control reg
// Mark the entries invalid in the 4 page trampoline translation table
// Mark the entries invalid in the 4 page CPU translation table
// Mark the entries invalid in the one page table for the final 1MB (if used)
// Mark the entries invalid in the one page table for HIGH_EXC_VECTORS
mov r5, r4 // local copy of base
mov r11, #ARM_TTE_TYPE_FAULT // invalid entry template
mov r2, PGBYTES >> 2 // number of ttes/page
add r2, r2, r2, LSL #2 // 8 ttes + 2 ptes to clear. Multiply by 5...
mov r2, r2, LSL #1 // ...then multiply by 2
invalidate_tte:
str r11, [r5] // store the invalid tte
add r5, r5, #4 // increment tte pointer
subs r2, r2, #1 // decrement count
bne invalidate_tte
// create default section tte template
mov r6, #ARM_TTE_TYPE_BLOCK // use block mapping entries
mov r7, #(ARM_TTE_BLOCK_ATTRINDX(CACHE_ATTRINDX_DEFAULT) & 0xFF)
orr r7, r7, #(ARM_TTE_BLOCK_ATTRINDX(CACHE_ATTRINDX_DEFAULT) & 0xFF00)
orr r7, r7, #(ARM_TTE_BLOCK_ATTRINDX(CACHE_ATTRINDX_DEFAULT) & 0xF0000)
orr r6, r6, r7 // with default cache attrs
mov r7, #ARM_TTE_BLOCK_AP(AP_RWNA) // Set kernel rw, user no access
orr r7, r7, #(ARM_TTE_BLOCK_AP(AP_RWNA) & 0xFF00)
orr r7, r7, #(ARM_TTE_BLOCK_AP(AP_RWNA) & 0xF0000)
orr r6, r6, r7 // Set RWNA protection
orr r6, r6, #ARM_TTE_BLOCK_AF // Set access protection
orr r6, r6, #ARM_TTE_BLOCK_SH // Set shareability
// Set up the V=P mapping for the 1 MB section around the current pc
lsr r7, pc, #ARM_TT_L1_SHIFT // Extract tte index for pc addr
add r5, r4, r7, LSL #2 // convert tte index to tte pointer
lsl r7, r7, #ARM_TT_L1_SHIFT // Truncate pc to 1MB aligned addr
orr r11, r7, r6 // make tte entry value
str r11, [r5] // store tte
// Set up the virtual mapping for the kernel using 1Mb direct section TTE entries
mov r7, r8 // Save original phys base
add r5, r4, r9, LSR #ARM_TT_L1_SHIFT-2 // convert vaddr to tte pointer
mov r3, #ARM_TT_L1_SIZE // set 1MB boundary
mapveqp:
cmp r3, r10 // Check if we're beyond the last 1MB section
bgt mapveqpL2 // If so, a coarse entry is required
orr r11, r7, r6 // make tte entry value
str r11, [r5], #4 // store tte and move to next
add r7, r7, #ARM_TT_L1_SIZE // move to next phys addr
subs r10, r10, #ARM_TT_L1_SIZE // subtract tte size
bne mapveqp
b doneveqp // end is 1MB aligned, and we're done
mapveqpL2:
// The end is not 1MB aligned, so steal a page and set up L2 entries within
// Coarse entry first
add r6, r4, PGBYTES * 8 // add L2 offset
mov r11, r6
orr r6, #ARM_TTE_TYPE_TABLE // coarse entry
str r6, [r5] // store coarse tte entry
// Fill in the L2 entries
mov r5, r11
// create pte template
mov r2, #ARM_PTE_TYPE // default pte type
orr r2, r2, #(ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DEFAULT) & 0xff) // with default cache attrs
orr r2, r2, #(ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DEFAULT) & 0xff00)
orr r2, r2, #(ARM_PTE_AP(AP_RWNA) & 0xff) // with default cache attrs
orr r2, r2, #(ARM_PTE_AP(AP_RWNA) & 0xff00)
orr r2, r2, #ARM_PTE_AF // Set access
orr r2, r2, #ARM_PTE_SH // Set shareability
storepte:
orr r11, r7, r2 // make pte entry value
str r11, [r5], #4 // store pte and move to next
add r7, r7, PGBYTES // move to next phys addr
subs r10, r10, PGBYTES // subtract pte size
bne storepte
doneveqp:
// Insert page table page for high address exception vectors into translation table
mov r5, #0xff000000 // part of virt HIGH_EXC_VECTORS (HACK!)
orr r5, r5, #0x00ff0000 // rest of virt HIGH_EXC_VECTORS (HACK!)
mov r5, r5, LSR #ARM_TT_L1_SHIFT // convert virt addr to index
add r5, r4, r5, LSL #2 // convert to tte pointer
add r6, r4, PGBYTES * 9 // get page table base (past 4 + 4 + 1 tte/pte pages)
mov r7, #(ARM_TTE_TABLE_MASK & 0xFFFF) // ARM_TTE_TABLE_MASK low halfword
movt r7, #(ARM_TTE_TABLE_MASK >> 16) // ARM_TTE_TABLE_MASK top halfword
and r11, r6, r7 // apply mask
orr r11, r11, #ARM_TTE_TYPE_TABLE // mark it as a coarse page table
str r11, [r5] // store tte entry for page table
// create pte template
mov r2, #ARM_PTE_TYPE // pte type
orr r2, r2, #(ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DEFAULT) & 0x00ff) // default cache attrs
orr r2, r2, #(ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DEFAULT) & 0xff00)
orr r2, r2, #(ARM_PTE_AP(AP_RWNA) & 0x00ff) // set RWNA protection
orr r2, r2, #(ARM_PTE_AP(AP_RWNA) & 0xff00)
orr r2, r2, #ARM_PTE_AF // Set access
orr r2, r2, #ARM_PTE_SH // Set shareability
// Now initialize the page table entry for the exception vectors
mov r5, #0xff000000 // part of HIGH_EXC_VECTORS
orr r5, r5, #0x00ff0000 // rest of HIGH_EXC_VECTORS
mov r7, #(ARM_TT_L2_INDEX_MASK & 0xFFFF) // ARM_TT_L2_INDEX_MASK low halfword
movt r7, #(ARM_TT_L2_INDEX_MASK >> 16) // ARM_TT_L2_INDEX_MASK top halfword
and r5, r5, r7 // mask for getting index
mov r5, r5, LSR #ARM_TT_L2_SHIFT // get page table index
add r5, r6, r5, LSL #2 // convert to pte pointer
LOAD_ADDR(r11, ExceptionVectorsBase) // get address of vectors addr
sub r11, r11, r9 // convert to physical address
add r11, r11, r8
mov r7, #(ARM_PTE_PAGE_MASK & 0xFFFF) // ARM_PTE_PAGE_MASK low halfword
movt r7, #(ARM_PTE_PAGE_MASK >> 16) // ARM_PTE_PAGE_MASK top halfword
and r11, r11, r7 // insert masked address into pte
orr r11, r11, r2 // add template bits
str r11, [r5] // store pte by base and index
// clean the dcache
mov r11, #0
cleanflushway:
cleanflushline:
mcr p15, 0, r11, c7, c14, 2 // cleanflush dcache line by way/set
add r11, r11, #1 << MMU_I7SET // increment set index
tst r11, #1 << (MMU_NSET + MMU_I7SET) // look for overflow
beq cleanflushline
bic r11, r11, #1 << (MMU_NSET + MMU_I7SET) // clear set overflow
adds r11, r11, #1 << MMU_I7WAY // increment way
bcc cleanflushway // loop
#if __ARM_L2CACHE__
// Invalidate L2 cache
mov r11, #2
invall2flushway:
invall2flushline:
mcr p15, 0, r11, c7, c14, 2 // Invalidate dcache line by way/set
add r11, r11, #1 << L2_I7SET // increment set index
tst r11, #1 << (L2_NSET + L2_I7SET) // look for overflow
beq invall2flushline
bic r11, r11, #1 << (L2_NSET + L2_I7SET) // clear set overflow
adds r11, r11, #1 << L2_I7WAY // increment way
bcc invall2flushway // loop
#endif
mov r11, #0
mcr p15, 0, r11, c13, c0, 3 // Write TPIDRURO
LOAD_ADDR(sp, intstack_top) // Get interrupt stack top
sub sp, sp, SS_SIZE // Set stack pointer
sub r0, r0, r8 // Convert to virtual address
add r0, r0, r9
join_start:
// kernel page table is setup
// lr set to return handler function virtual address
// r0 set to return handler argument virtual address
// sp set to interrupt context stack virtual address
// Cpu specific configuration
#ifdef ARMA7
#if __ARMA7_SMP__
mrc p15, 0, r11, c1, c0, 1
orr r11, r11, #(1<<6) // SMP
mcr p15, 0, r11, c1, c0, 1
isb
#endif
#endif
mrs r11, cpsr // Get cpsr
bic r11, #0x100 // Allow async aborts
msr cpsr_x, r11 // Update cpsr
mov r11, #0
mcr p15, 0, r11, c8, c7, 0 // invalidate all TLB entries
mcr p15, 0, r11, c7, c5, 0 // invalidate the icache
// set DACR
mov r11, #(ARM_DAC_SETUP & 0xFFFF) // ARM_DAC_SETUP low halfword
movt r11, #(ARM_DAC_SETUP >> 16) // ARM_DAC_SETUP top halfword
mcr p15, 0, r11, c3, c0, 0 // write to dac register
// Set PRRR
mov r11, #(PRRR_SETUP & 0xFFFF) // PRRR_SETUP low halfword
movt r11, #(PRRR_SETUP >> 16) // PRRR_SETUP top halfword
mcr p15, 0, r11, c10,c2,0 // write to PRRR register
// Set NMRR
mov r11, #(NMRR_SETUP & 0xFFFF) // NMRR_SETUP low halfword
movt r11, #(NMRR_SETUP >> 16) // NMRR_SETUP top halfword
mcr p15, 0, r11, c10,c2,1 // write to NMRR register
// set SCTLR
mrc p15, 0, r11, c1, c0, 0 // read system control
bic r11, r11, #SCTLR_ALIGN // force off alignment exceptions
mov r7, #(SCTLR_AFE|SCTLR_TRE) // Access flag, TEX remap
orr r7, r7, #(SCTLR_HIGHVEC | SCTLR_ICACHE | SCTLR_PREDIC)
orr r7, r7, #(SCTLR_DCACHE | SCTLR_ENABLE)
#if (__ARM_ENABLE_SWAP__ == 1)
orr r7, r7, #SCTLR_SW // SWP/SWPB Enable
#endif
orr r11, r11, r7 // or in the default settings
mcr p15, 0, r11, c1, c0, 0 // set mmu control
dsb // ensure mmu settings are inplace
isb // before moving on
#if __ARM_VFP__
// Initialize the VFP coprocessors.
mrc p15, 0, r2, c1, c0, 2 // read coprocessor control register
mov r3, #15 // 0xF
orr r2, r2, r3, LSL #20 // enable 10 and 11
mcr p15, 0, r2, c1, c0, 2 // write coprocessor control register
isb
#endif /* __ARM_VFP__ */
// Running virtual. Prepare to call init code
cmp r1, #0 // Test if invoked from start
beq join_start_1 // Branch if yes
ldr r7, arm_init_tramp_addr // Load trampoline address
bx r7 // Branch to virtual trampoline address
// Loading the virtual address for arm_init_tramp is a rather ugly
// problem. There is probably a better solution, but for the moment,
// patch the address in locally so that loading it is trivial
arm_init_tramp_addr:
.long 0
.globl EXT(arm_init_tramp)
LEXT(arm_init_tramp)
mrc p15, 0, r5, c2, c0, 0 // Read to translation table base 0
add r5, r5, PGBYTES * 4 // get kernel page table base (past 4 boot tte pages)
mcr p15, 0, r5, c2, c0, 0 // write kernel to translation table base 0
mcr p15, 0, r5, c2, c0, 1 // also to translation table base 1
isb
mov r5, #0
mcr p15, 0, r5, c8, c7, 0 // Flush all TLB entries
dsb // ensure mmu settings are inplace
isb // before moving on
join_start_1:
#if __ARM_VFP__
// Enable VFP for the bootstrap thread context.
// VFP is enabled for the arm_init path as we may
// execute VFP code before we can handle an undef.
fmrx r2, fpexc // get fpexc
orr r2, #FPEXC_EN // set the enable bit
fmxr fpexc, r2 // set fpexc
mov r2, #FPSCR_DEFAULT // set default fpscr
fmxr fpscr, r2 // set fpscr
#endif /* __ARM_VFP__ */
mov r7, #0 // Set stack frame 0
bx lr
LOAD_ADDR_GEN_DEF(arm_init)
LOAD_ADDR_GEN_DEF(arm_init_cpu)
LOAD_ADDR_GEN_DEF(arm_init_idle_cpu)
LOAD_ADDR_GEN_DEF(arm_init_tramp)
LOAD_ADDR_GEN_DEF(fleh_reset)
LOAD_ADDR_GEN_DEF(ExceptionVectorsTable)
LOAD_ADDR_GEN_DEF(fleh_undef)
LOAD_ADDR_GEN_DEF(fleh_swi)
LOAD_ADDR_GEN_DEF(fleh_prefabt)
LOAD_ADDR_GEN_DEF(fleh_dataabt)
LOAD_ADDR_GEN_DEF(fleh_addrexc)
LOAD_ADDR_GEN_DEF(fleh_irq)
LOAD_ADDR_GEN_DEF(fleh_decirq)
#include "globals_asm.h"
/* vim: set ts=4: */