#include <platforms.h>
#include <mach_kdb.h>
#include <mach/i386/vm_param.h>
#include <string.h>
#include <mach/vm_param.h>
#include <mach/vm_prot.h>
#include <mach/machine.h>
#include <mach/time_value.h>
#include <kern/spl.h>
#include <kern/assert.h>
#include <kern/debug.h>
#include <kern/misc_protos.h>
#include <kern/cpu_data.h>
#include <kern/processor.h>
#include <vm/vm_page.h>
#include <vm/pmap.h>
#include <vm/vm_kern.h>
#include <i386/pmap.h>
#include <i386/misc_protos.h>
#include <i386/ipl.h>
#include <i386/cpuid.h>
#include <mach/thread_status.h>
#include <pexpert/i386/efi.h>
#include <i386/i386_lowmem.h>
#include <i386/lowglobals.h>
#include <mach-o/loader.h>
#include <libkern/kernel_mach_header.h>
#if DEBUG
#define DBG(x...) kprintf("DBG: " x)
#define PRINT_PMAP_MEMORY_TABLE
#else
#define DBG(x...)
#endif
vm_size_t mem_size = 0;
vm_offset_t first_avail = 0;
uint64_t max_mem;
uint64_t mem_actual;
uint64_t sane_size = 0;
#define MAXBOUNCEPOOL (128 * 1024 * 1024)
#define MAXLORESERVE ( 32 * 1024 * 1024)
extern unsigned int bsd_mbuf_cluster_reserve(void);
uint32_t bounce_pool_base = 0;
uint32_t bounce_pool_size = 0;
static void reserve_bouncepool(uint32_t);
pmap_paddr_t avail_start, avail_end;
vm_offset_t virtual_avail, virtual_end;
static pmap_paddr_t avail_remaining;
vm_offset_t static_memory_end = 0;
vm_offset_t sHIB, eHIB, stext, etext, sdata, edata, end;
boolean_t kernel_text_ps_4K = TRUE;
boolean_t wpkernel = TRUE;
extern void *KPTphys;
void *sectTEXTB; unsigned long sectSizeTEXT;
void *sectDATAB; unsigned long sectSizeDATA;
void *sectOBJCB; unsigned long sectSizeOBJC;
void *sectLINKB; unsigned long sectSizeLINK;
void *sectPRELINKB; unsigned long sectSizePRELINK;
void *sectHIBB; unsigned long sectSizeHIB;
void *sectINITPTB; unsigned long sectSizeINITPT;
extern int srv;
extern uint64_t firmware_Conventional_bytes;
extern uint64_t firmware_RuntimeServices_bytes;
extern uint64_t firmware_ACPIReclaim_bytes;
extern uint64_t firmware_ACPINVS_bytes;
extern uint64_t firmware_PalCode_bytes;
extern uint64_t firmware_Reserved_bytes;
extern uint64_t firmware_Unusable_bytes;
extern uint64_t firmware_other_bytes;
uint64_t firmware_MMIO_bytes;
void
i386_vm_init(uint64_t maxmem,
boolean_t IA32e,
boot_args *args)
{
pmap_memory_region_t *pmptr;
pmap_memory_region_t *prev_pmptr;
EfiMemoryRange *mptr;
unsigned int mcount;
unsigned int msize;
ppnum_t fap;
unsigned int i;
unsigned int safeboot;
ppnum_t maxpg = 0;
uint32_t pmap_type;
uint32_t maxbouncepoolsize;
uint32_t maxloreserve;
uint32_t maxdmaaddr;
sectTEXTB = (void *) getsegdatafromheader(
&_mh_execute_header, "__TEXT", §SizeTEXT);
sectDATAB = (void *) getsegdatafromheader(
&_mh_execute_header, "__DATA", §SizeDATA);
sectOBJCB = (void *) getsegdatafromheader(
&_mh_execute_header, "__OBJC", §SizeOBJC);
sectLINKB = (void *) getsegdatafromheader(
&_mh_execute_header, "__LINKEDIT", §SizeLINK);
sectHIBB = (void *)getsegdatafromheader(
&_mh_execute_header, "__HIB", §SizeHIB);
sectINITPTB = (void *)getsegdatafromheader(
&_mh_execute_header, "__INITPT", §SizeINITPT);
sectPRELINKB = (void *) getsegdatafromheader(
&_mh_execute_header, "__PRELINK_TEXT", §SizePRELINK);
sHIB = (vm_offset_t) sectHIBB;
eHIB = (vm_offset_t) sectHIBB + sectSizeHIB;
stext = (vm_offset_t) sectTEXTB;
etext = (vm_offset_t) sectTEXTB + sectSizeTEXT;
sdata = (vm_offset_t) sectDATAB;
edata = (vm_offset_t) sectDATAB + sectSizeDATA;
#if DEBUG
kprintf("sectTEXTB = %p\n", sectTEXTB);
kprintf("sectDATAB = %p\n", sectDATAB);
kprintf("sectOBJCB = %p\n", sectOBJCB);
kprintf("sectLINKB = %p\n", sectLINKB);
kprintf("sectHIBB = %p\n", sectHIBB);
kprintf("sectPRELINKB = %p\n", sectPRELINKB);
kprintf("eHIB = %p\n", (void *) eHIB);
kprintf("stext = %p\n", (void *) stext);
kprintf("etext = %p\n", (void *) etext);
kprintf("sdata = %p\n", (void *) sdata);
kprintf("edata = %p\n", (void *) edata);
#endif
vm_set_page_size();
if ((1 == vm_himemory_mode) || PE_parse_boot_argn("-x", &safeboot, sizeof (safeboot))) {
maxpg = 1 << (32 - I386_PGSHIFT);
}
avail_remaining = 0;
avail_end = 0;
pmptr = pmap_memory_regions;
prev_pmptr = 0;
pmap_memory_region_count = pmap_memory_region_current = 0;
fap = (ppnum_t) i386_btop(first_avail);
mptr = (EfiMemoryRange *)ml_static_ptovirt((vm_offset_t)args->MemoryMap);
if (args->MemoryMapDescriptorSize == 0)
panic("Invalid memory map descriptor size");
msize = args->MemoryMapDescriptorSize;
mcount = args->MemoryMapSize / msize;
#define FOURGIG 0x0000000100000000ULL
for (i = 0; i < mcount; i++, mptr = (EfiMemoryRange *)(((vm_offset_t)mptr) + msize)) {
ppnum_t base, top;
uint64_t region_bytes = 0;
if (pmap_memory_region_count >= PMAP_MEMORY_REGIONS_SIZE) {
kprintf("WARNING: truncating memory region count at %d\n", pmap_memory_region_count);
break;
}
base = (ppnum_t) (mptr->PhysicalStart >> I386_PGSHIFT);
top = (ppnum_t) (((mptr->PhysicalStart) >> I386_PGSHIFT) + mptr->NumberOfPages - 1);
region_bytes = (uint64_t)(mptr->NumberOfPages << I386_PGSHIFT);
pmap_type = mptr->Type;
switch (mptr->Type) {
case kEfiLoaderCode:
case kEfiLoaderData:
case kEfiBootServicesCode:
case kEfiBootServicesData:
case kEfiConventionalMemory:
pmap_type = kEfiConventionalMemory;
sane_size += region_bytes;
firmware_Conventional_bytes += region_bytes;
break;
case kEfiRuntimeServicesCode:
case kEfiRuntimeServicesData:
firmware_RuntimeServices_bytes += region_bytes;
sane_size += region_bytes;
break;
case kEfiACPIReclaimMemory:
firmware_ACPIReclaim_bytes += region_bytes;
sane_size += region_bytes;
break;
case kEfiACPIMemoryNVS:
firmware_ACPINVS_bytes += region_bytes;
sane_size += region_bytes;
break;
case kEfiPalCode:
firmware_PalCode_bytes += region_bytes;
sane_size += region_bytes;
break;
case kEfiReservedMemoryType:
firmware_Reserved_bytes += region_bytes;
break;
case kEfiUnusableMemory:
firmware_Unusable_bytes += region_bytes;
break;
case kEfiMemoryMappedIO:
case kEfiMemoryMappedIOPortSpace:
firmware_MMIO_bytes += region_bytes;
break;
default:
firmware_other_bytes += region_bytes;
break;
}
kprintf("EFI region %d: type %u/%d, base 0x%x, top 0x%x\n",
i, mptr->Type, pmap_type, base, top);
if (maxpg) {
if (base >= maxpg)
break;
top = (top > maxpg) ? maxpg : top;
}
if ((mptr->Attribute & EFI_MEMORY_RUNTIME) == EFI_MEMORY_RUNTIME ||
pmap_type != kEfiConventionalMemory) {
prev_pmptr = 0;
continue;
} else {
if (top < I386_LOWMEM_RESERVED) {
prev_pmptr = 0;
continue;
}
if (top < fap) {
if (base >= I386_LOWMEM_RESERVED)
pmptr->base = base;
else
pmptr->base = I386_LOWMEM_RESERVED;
pmptr->alloc = pmptr->end = top;
pmptr->type = pmap_type;
}
else if ( (base < fap) && (top > fap) ) {
pmptr->base = base;
pmptr->alloc = pmptr->end = (fap - 1);
pmptr->type = pmap_type;
pmptr++;
pmap_memory_region_count++;
pmptr->alloc = pmptr->base = fap;
pmptr->type = pmap_type;
pmptr->end = top;
}
else {
pmptr->alloc = pmptr->base = base;
pmptr->type = pmap_type;
pmptr->end = top;
}
if (i386_ptob(pmptr->end) > avail_end )
avail_end = i386_ptob(pmptr->end);
avail_remaining += (pmptr->end - pmptr->base);
if (prev_pmptr &&
pmptr->type == prev_pmptr->type &&
pmptr->base == pmptr->alloc &&
pmptr->base == (prev_pmptr->end + 1)) {
prev_pmptr->end = pmptr->end;
} else {
pmap_memory_region_count++;
prev_pmptr = pmptr;
pmptr++;
}
}
}
#ifdef PRINT_PMAP_MEMORY_TABLE
{
unsigned int j;
pmap_memory_region_t *p = pmap_memory_regions;
addr64_t region_start, region_end;
addr64_t efi_start, efi_end;
for (j=0;j<pmap_memory_region_count;j++, p++) {
kprintf("pmap region %d type %d base 0x%llx alloc 0x%llx top 0x%llx\n",
j, p->type,
(addr64_t) p->base << I386_PGSHIFT,
(addr64_t) p->alloc << I386_PGSHIFT,
(addr64_t) p->end << I386_PGSHIFT);
region_start = (addr64_t) p->base << I386_PGSHIFT;
region_end = ((addr64_t) p->end << I386_PGSHIFT) - 1;
mptr = (EfiMemoryRange *) ml_static_ptovirt((vm_offset_t)args->MemoryMap);
for (i=0; i<mcount; i++, mptr = (EfiMemoryRange *)(((vm_offset_t)mptr) + msize)) {
if (mptr->Type != kEfiLoaderCode &&
mptr->Type != kEfiLoaderData &&
mptr->Type != kEfiBootServicesCode &&
mptr->Type != kEfiBootServicesData &&
mptr->Type != kEfiConventionalMemory) {
efi_start = (addr64_t)mptr->PhysicalStart;
efi_end = efi_start + ((vm_offset_t)mptr->NumberOfPages << I386_PGSHIFT) - 1;
if ((efi_start >= region_start && efi_start <= region_end) ||
(efi_end >= region_start && efi_end <= region_end)) {
kprintf(" *** Overlapping region with EFI runtime region %d\n", i);
}
}
}
}
}
#endif
avail_start = first_avail;
mem_actual = sane_size;
sane_size = (sane_size + 128 * MB - 1) & ~((uint64_t)(128 * MB - 1));
if (maxmem == 0 && sane_size > KERNEL_MAXMEM) {
maxmem = KERNEL_MAXMEM;
printf("Physical memory %lld bytes capped at %dGB\n",
sane_size, (uint32_t) (KERNEL_MAXMEM/GB));
}
if ( (maxmem > (uint64_t)first_avail) && (maxmem < sane_size)) {
ppnum_t discarded_pages = (ppnum_t)((sane_size - maxmem) >> I386_PGSHIFT);
ppnum_t highest_pn = 0;
ppnum_t cur_alloc = 0;
uint64_t pages_to_use;
unsigned cur_region = 0;
sane_size = maxmem;
if (avail_remaining > discarded_pages)
avail_remaining -= discarded_pages;
else
avail_remaining = 0;
pages_to_use = avail_remaining;
while (cur_region < pmap_memory_region_count && pages_to_use) {
for (cur_alloc = pmap_memory_regions[cur_region].alloc;
cur_alloc < pmap_memory_regions[cur_region].end && pages_to_use;
cur_alloc++) {
if (cur_alloc > highest_pn)
highest_pn = cur_alloc;
pages_to_use--;
}
if (pages_to_use == 0)
pmap_memory_regions[cur_region].end = cur_alloc;
cur_region++;
}
pmap_memory_region_count = cur_region;
avail_end = i386_ptob(highest_pn + 1);
}
if (sane_size > (FOURGIG >> 1))
mem_size = (vm_size_t)(FOURGIG >> 1);
else
mem_size = (vm_size_t)sane_size;
max_mem = sane_size;
kprintf("Physical memory %llu MB\n", sane_size/MB);
if (!PE_parse_boot_argn("max_valid_dma_addr", &maxdmaaddr, sizeof (maxdmaaddr)))
max_valid_dma_address = 4 * GB;
else
max_valid_dma_address = ((uint64_t) maxdmaaddr) * MB;
if (!PE_parse_boot_argn("maxbouncepool", &maxbouncepoolsize, sizeof (maxbouncepoolsize)))
maxbouncepoolsize = MAXBOUNCEPOOL;
else
maxbouncepoolsize = maxbouncepoolsize * (1024 * 1024);
if (PE_parse_boot_argn("srv", &srv, sizeof (srv))) {
srv = 1;
}
if (!PE_parse_boot_argn("maxloreserve", &maxloreserve, sizeof (maxloreserve)))
maxloreserve = MAXLORESERVE + bsd_mbuf_cluster_reserve();
else
maxloreserve = maxloreserve * (1024 * 1024);
if (avail_end >= max_valid_dma_address) {
if (maxbouncepoolsize)
reserve_bouncepool(maxbouncepoolsize);
if (maxloreserve)
vm_lopage_poolsize = maxloreserve / PAGE_SIZE;
}
pmap_bootstrap(0, IA32e);
}
unsigned int
pmap_free_pages(void)
{
return (unsigned int)avail_remaining;
}
#if defined(__LP64__)
boolean_t
pmap_next_page_k64( ppnum_t *pn)
{
if(max_mem >= (32*GB)) {
pmap_memory_region_t *last_region = &pmap_memory_regions[pmap_memory_region_count-1];
if (last_region->alloc != last_region->end) {
*pn = last_region->alloc++;
avail_remaining--;
return TRUE;
}
}
return pmap_next_page(pn);
}
#endif
boolean_t
pmap_next_page(
ppnum_t *pn)
{
if (avail_remaining) while (pmap_memory_region_current < pmap_memory_region_count) {
if (pmap_memory_regions[pmap_memory_region_current].alloc ==
pmap_memory_regions[pmap_memory_region_current].end) {
pmap_memory_region_current++;
continue;
}
*pn = pmap_memory_regions[pmap_memory_region_current].alloc++;
avail_remaining--;
return TRUE;
}
return FALSE;
}
boolean_t
pmap_valid_page(
ppnum_t pn)
{
unsigned int i;
pmap_memory_region_t *pmptr = pmap_memory_regions;
for (i = 0; i < pmap_memory_region_count; i++, pmptr++) {
if ( (pn >= pmptr->base) && (pn <= pmptr->end) )
return TRUE;
}
return FALSE;
}
static void
reserve_bouncepool(uint32_t bounce_pool_wanted)
{
pmap_memory_region_t *pmptr = pmap_memory_regions;
pmap_memory_region_t *lowest = NULL;
unsigned int i;
unsigned int pages_needed;
pages_needed = bounce_pool_wanted / PAGE_SIZE;
for (i = 0; i < pmap_memory_region_count; i++, pmptr++) {
if ( (pmptr->end - pmptr->alloc) >= pages_needed ) {
if ( (lowest == NULL) || (pmptr->alloc < lowest->alloc) )
lowest = pmptr;
}
}
if ( (lowest != NULL) ) {
bounce_pool_base = lowest->alloc * PAGE_SIZE;
bounce_pool_size = bounce_pool_wanted;
lowest->alloc += pages_needed;
avail_remaining -= pages_needed;
}
}
void
pmap_lowmem_finalize(void)
{
spl_t spl;
int i;
if (i386_btop(kvtophys((vm_offset_t) &IdlePML4)) !=
I386_KERNEL_IMAGE_BASE_PAGE)
panic("pmap_lowmem_finalize() unexpected kernel base address");
for (i = 0;
i < 1;
i++) {
vm_offset_t pbase = (vm_offset_t)i386_ptob(pmap_memory_regions[i].base);
vm_offset_t pend = (vm_offset_t)i386_ptob(pmap_memory_regions[i].end);
DBG("ml_static_mfree(%p,%p) for pmap region %d\n",
(void *) ml_static_ptovirt(pbase),
(void *) (pend - pbase), i);
ml_static_mfree(ml_static_ptovirt(pbase), pend - pbase);
}
if ((stext & I386_LPGMASK) == 0 && (sdata & I386_LPGMASK) == 0) {
kprintf("Kernel text is 2MB aligned");
kernel_text_ps_4K = FALSE;
if (PE_parse_boot_argn("-kernel_text_ps_4K",
&kernel_text_ps_4K,
sizeof (kernel_text_ps_4K)))
kprintf(" but will be mapped with 4K pages\n");
else
kprintf(" and will be mapped with 2M pages\n");
}
(void) PE_parse_boot_argn("wpkernel", &wpkernel, sizeof (wpkernel));
if (wpkernel)
kprintf("Kernel text %p-%p to be write-protected\n",
(void *) stext, (void *) etext);
spl = splhigh();
if (kernel_text_ps_4K && wpkernel) {
vm_offset_t myva;
for (myva = stext; myva < etext; myva += PAGE_SIZE) {
pt_entry_t *ptep;
ptep = pmap_pte(kernel_pmap, (vm_map_offset_t)myva);
if (ptep)
pmap_store_pte(ptep, *ptep & ~INTEL_PTE_RW);
}
}
if (!kernel_text_ps_4K) {
vm_offset_t myva;
DBG("ml_static_mfree(%p,%p) for padding below text\n",
(void *) eHIB, (void *) (stext - eHIB));
ml_static_mfree(eHIB, stext - eHIB);
DBG("ml_static_mfree(%p,%p) for padding above text\n",
(void *) etext, (void *) (sdata - etext));
ml_static_mfree(etext, sdata - etext);
for (myva = stext; myva < sdata; myva += I386_LPGBYTES) {
pt_entry_t *ptep;
vm_offset_t pte_phys;
pt_entry_t *pdep;
pt_entry_t pde;
pdep = pmap_pde(kernel_pmap, (vm_map_offset_t)myva);
ptep = pmap_pte(kernel_pmap, (vm_map_offset_t)myva);
DBG("myva: %p pdep: %p ptep: %p\n",
(void *) myva, (void *) pdep, (void *) ptep);
if ((*ptep & INTEL_PTE_VALID) == 0)
continue;
pte_phys = (vm_offset_t)(*ptep & PG_FRAME);
pde = *pdep & PTMASK;
pde |= INTEL_PTE_PS;
pde |= pte_phys;
if (wpkernel)
pde &= ~INTEL_PTE_RW;
DBG("pmap_store_pte(%p,0x%llx)\n",
(void *)pdep, pde);
pmap_store_pte(pdep, pde);
vm_offset_t vm_ptep = (vm_offset_t) KPTphys
+ (pte_phys >> PTPGSHIFT);
DBG("ml_static_mfree(%p,0x%x) for pte\n",
(void *) vm_ptep, PAGE_SIZE);
ml_static_mfree(vm_ptep, PAGE_SIZE);
}
pmap_kernel_text_ps = I386_LPGBYTES;
}
#if defined(__i386__)
pmap_store_pte(pmap_pte(kernel_pmap, 0), INTEL_PTE_INVALID);
#endif
pt_entry_t *pte = NULL;
if (0 == (pte = pmap_pte(kernel_pmap,
VM_MIN_KERNEL_LOADED_ADDRESS + 0x2000)))
panic("lowmem pte");
assert(0 == ((vm_offset_t) &lowGlo & PAGE_MASK));
pmap_store_pte(pte, kvtophys((vm_offset_t)&lowGlo)
| INTEL_PTE_REF
| INTEL_PTE_MOD
| INTEL_PTE_WIRED
| INTEL_PTE_VALID
| INTEL_PTE_RW);
splx(spl);
flush_tlb();
}