tlb.h [plain text]

/*
 * Copyright (c) 2019 Apple Inc. All rights reserved.
 *
 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
 *
 * This file contains Original Code and/or Modifications of Original Code
 * as defined in and that are subject to the Apple Public Source License
 * Version 2.0 (the 'License'). You may not use this file except in
 * compliance with the License. The rights granted to you under the License
 * may not be used to create, or enable the creation or redistribution of,
 * unlawful or unlicensed copies of an Apple operating system, or to
 * circumvent, violate, or enable the circumvention or violation of, any
 * terms of an Apple operating system software license agreement.
 *
 * Please obtain a copy of the License at
 * http://www.opensource.apple.com/apsl/ and read it before using this file.
 *
 * The Original Code and all software distributed under the License are
 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
 * Please see the License for the specific language governing rights and
 * limitations under the License.
 *
 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
 */

#pragma once

#include <arm64/proc_reg.h>
#include <machine/atomic.h>

#define tlbi_addr(x) ((((x) >> 12) & TLBI_ADDR_MASK) << TLBI_ADDR_SHIFT)
#define tlbi_asid(x) (((uintptr_t)(x) & TLBI_ASID_MASK) << TLBI_ASID_SHIFT)

#if __ARM_KERNEL_PROTECT__
/*
 * __ARM_KERNEL_PROTECT__ adds two complications to TLB management:
 *
 * 1. As each pmap has two ASIDs, every TLB operation that targets an ASID must
 *   target both ASIDs for the pmap that owns the target ASID.
 *
 * 2. Any TLB operation targeting the kernel_pmap ASID (ASID 0) must target all
 *   ASIDs (as kernel_pmap mappings may be referenced while using an ASID that
 *   belongs to another pmap).  We expect these routines to be called with the
 *   EL0 ASID for the target; not the EL1 ASID.
 */
#endif /* __ARM_KERNEL_PROTECT__ */

static inline void
sync_tlb_flush(void)
{
	__builtin_arm_dsb(DSB_ISH);
	__builtin_arm_isb(ISB_SY);
}

// flush_mmu_tlb: full TLB flush on all cores
static inline void
flush_mmu_tlb_async(void)
{
	asm volatile ("tlbi vmalle1is");
}

static inline void
flush_mmu_tlb(void)
{
	flush_mmu_tlb_async();
	sync_tlb_flush();
}

// flush_core_tlb: full TLB flush on local core only
static inline void
flush_core_tlb_async(void)
{
	asm volatile ("tlbi vmalle1");
}

static inline void
flush_core_tlb(void)
{
	flush_core_tlb_async();
	sync_tlb_flush();
}

// flush_mmu_tlb_allentries_async: flush entries that map VA range, all ASIDS, all cores
// start and end are in units of 4K pages.
static inline void
flush_mmu_tlb_allentries_async(uint64_t start, uint64_t end)
{
#if __ARM_16K_PG__
	start = start & ~0x3ULL;

	/*
	 * The code below is not necessarily correct.  From an overview of
	 * the client code, the expected contract for TLB flushes is that
	 * we will expand from an "address, length" pair to "start address,
	 * end address" in the course of a TLB flush.  This suggests that
	 * a flush for "X, X+4" is actually only asking for a flush of a
	 * single 16KB page.  At the same time, we'd like to be prepared
	 * for bad inputs (X, X+3), so add 3 and then truncate the 4KB page
	 * number to a 16KB page boundary.  This should deal correctly with
	 * unaligned inputs.
	 *
	 * If our expecations about client behavior are wrong however, this
	 * will lead to occasional TLB corruption on platforms with 16KB
	 * pages.
	 */
	end = (end + 0x3ULL) & ~0x3ULL;
#endif // __ARM_16K_PG__
	for (; start < end; start += (ARM_PGBYTES / 4096)) {
		asm volatile ("tlbi vaae1is, %0" : : "r"(start));
	}
}

static inline void
flush_mmu_tlb_allentries(uint64_t start, uint64_t end)
{
	flush_mmu_tlb_allentries_async(start, end);
	sync_tlb_flush();
}

// flush_mmu_tlb_entry: flush TLB entries that map a VA and ASID, all cores
// Will also flush global entries that match the VA
static inline void
flush_mmu_tlb_entry_async(uint64_t val)
{
#if __ARM_KERNEL_PROTECT__
	uint64_t asid = val >> TLBI_ASID_SHIFT;
	if (asid == 0) {
		asm volatile ("tlbi vaae1is, %0" : : "r"(val));
		return;
	}
	val = val & ~(1ULL << TLBI_ASID_SHIFT);
	asm volatile ("tlbi vae1is, %0" : : "r"(val));
	val = val | (1ULL << TLBI_ASID_SHIFT);
#endif /* __ARM_KERNEL_PROTECT__ */
	asm volatile ("tlbi vae1is, %0" : : "r"(val));
}

static inline void
flush_mmu_tlb_entry(uint64_t val)
{
	flush_mmu_tlb_entry_async(val);
	sync_tlb_flush();
}

// flush_mmu_tlb_entries: flush TLB entries that map a VA range and ASID, all cores
// start and end must have the ASID in the high 16 bits, with the VA in units of 4K in the lowest bits
// Will also flush global entries that match the VA range
static inline void
flush_mmu_tlb_entries_async(uint64_t start, uint64_t end)
{
#if __ARM_16K_PG__
	start = start & ~0x3ULL;

	/*
	 * The code below is not necessarily correct.  From an overview of
	 * the client code, the expected contract for TLB flushes is that
	 * we will expand from an "address, length" pair to "start address,
	 * end address" in the course of a TLB flush.  This suggests that
	 * a flush for "X, X+4" is actually only asking for a flush of a
	 * single 16KB page.  At the same time, we'd like to be prepared
	 * for bad inputs (X, X+3), so add 3 and then truncate the 4KB page
	 * number to a 16KB page boundary.  This should deal correctly with
	 * unaligned inputs.
	 *
	 * If our expecations about client behavior are wrong however, this
	 * will lead to occasional TLB corruption on platforms with 16KB
	 * pages.
	 */
	end = (end + 0x3ULL) & ~0x3ULL;
#endif // __ARM_16K_PG__
#if __ARM_KERNEL_PROTECT__
	uint64_t asid = start >> TLBI_ASID_SHIFT;
	/*
	 * If we are flushing ASID 0, this is a kernel operation.  With this
	 * ASID scheme, this means we should flush all ASIDs.
	 */
	if (asid == 0) {
		for (; start < end; start += (ARM_PGBYTES / 4096)) {
			asm volatile ("tlbi vaae1is, %0" : : "r"(start));
		}
		return;
	}
	start = start | (1ULL << TLBI_ASID_SHIFT);
	end = end | (1ULL << TLBI_ASID_SHIFT);
	for (; start < end; start += (ARM_PGBYTES / 4096)) {
		start = start & ~(1ULL << TLBI_ASID_SHIFT);
		asm volatile ("tlbi vae1is, %0" : : "r"(start));
		start = start | (1ULL << TLBI_ASID_SHIFT);
		asm volatile ("tlbi vae1is, %0" : : "r"(start));
	}
#else
	for (; start < end; start += (ARM_PGBYTES / 4096)) {
		asm volatile ("tlbi vae1is, %0" : : "r"(start));
	}
#endif /* __ARM_KERNEL_PROTECT__ */
}

static inline void
flush_mmu_tlb_entries(uint64_t start, uint64_t end)
{
	flush_mmu_tlb_entries_async(start, end);
	sync_tlb_flush();
}

// flush_mmu_tlb_asid: flush all entries that match an ASID, on all cores
// ASID must be in high 16 bits of argument
// Will not flush global entries
static inline void
flush_mmu_tlb_asid_async(uint64_t val)
{
#if __ARM_KERNEL_PROTECT__
	/*
	 * If we are flushing ASID 0, this is a kernel operation.  With this
	 * ASID scheme, this means we should flush all ASIDs.
	 */
	uint64_t asid = val >> TLBI_ASID_SHIFT;
	if (asid == 0) {
		asm volatile ("tlbi vmalle1is");
		return;
	}
	val = val & ~(1ULL << TLBI_ASID_SHIFT);
	asm volatile ("tlbi aside1is, %0" : : "r"(val));
	val = val | (1ULL << TLBI_ASID_SHIFT);
#endif /* __ARM_KERNEL_PROTECT__ */
	asm volatile ("tlbi aside1is, %0" : : "r"(val));
}

static inline void
flush_mmu_tlb_asid(uint64_t val)
{
	flush_mmu_tlb_asid_async(val);
	sync_tlb_flush();
}

// flush_core_tlb_asid: flush all entries that match an ASID, local core only
// ASID must be in high 16 bits of argument
// Will not flush global entries
static inline void
flush_core_tlb_asid_async(uint64_t val)
{
#if __ARM_KERNEL_PROTECT__
	/*
	 * If we are flushing ASID 0, this is a kernel operation.  With this
	 * ASID scheme, this means we should flush all ASIDs.
	 */
	uint64_t asid = val >> TLBI_ASID_SHIFT;
	if (asid == 0) {
		asm volatile ("tlbi vmalle1");
		return;
	}
	val = val & ~(1ULL << TLBI_ASID_SHIFT);
	asm volatile ("tlbi aside1, %0" : : "r"(val));
	val = val | (1ULL << TLBI_ASID_SHIFT);
#endif /* __ARM_KERNEL_PROTECT__ */
	asm volatile ("tlbi aside1, %0" : : "r"(val));
}

static inline void
flush_core_tlb_asid(uint64_t val)
{
	flush_core_tlb_asid_async(val);
	sync_tlb_flush();
}

#if __ARM_RANGE_TLBI__
#if __ARM_KERNEL_PROTECT__
	#error __ARM_RANGE_TLBI__ + __ARM_KERNEL_PROTECT__ is not currently supported
#endif

#define ARM64_16K_TLB_RANGE_PAGES (1ULL << 21)
#define rtlbi_addr(x) (((x) >> RTLBI_ADDR_SHIFT) & RTLBI_ADDR_MASK)
#define rtlbi_scale(x) ((uint64_t)(x) << RTLBI_SCALE_SHIFT)
#define rtlbi_num(x) ((uint64_t)(x) << RTLBI_NUM_SHIFT)

/**
 * Given the number of pages to invalidate, generate the correct parameter to
 * pass to any of the TLBI by range methods.
 */
static inline uint64_t
generate_rtlbi_param(ppnum_t npages, uint32_t asid, vm_offset_t va)
{
	/**
	 * Per the armv8.4 RTLBI extension spec, the range encoded in the rtlbi register operand is defined by:
	 * BaseADDR <= VA < BaseADDR+((NUM+1)*2^(5*SCALE+1) * Translation_Granule_Size)
	 */
	unsigned order = (sizeof(npages) * 8) - __builtin_clz(npages - 1) - 1;
	unsigned scale = ((order ? order : 1) - 1) / 5;
	unsigned granule = 1 << ((5 * scale) + 1);
	unsigned num = (((npages + granule - 1) & ~(granule - 1)) / granule) - 1;
	return tlbi_asid(asid) | RTLBI_TG | rtlbi_scale(scale) | rtlbi_num(num) | rtlbi_addr(va);
}

// flush_mmu_tlb_range: flush TLB entries that map a VA range using a single instruction
// The argument should be encoded according to generate_rtlbi_param().
// Follows the same ASID matching behavior as flush_mmu_tlb_entries()
static inline void
flush_mmu_tlb_range_async(uint64_t val)
{
	asm volatile ("tlbi rvae1is, %0" : : "r"(val));
}

static inline void
flush_mmu_tlb_range(uint64_t val)
{
	flush_mmu_tlb_range_async(val);
	sync_tlb_flush();
}

// flush_mmu_tlb_allrange: flush TLB entries that map a VA range using a single instruction
// The argument should be encoded according to generate_rtlbi_param().
// Follows the same ASID matching behavior as flush_mmu_tlb_allentries()
static inline void
flush_mmu_tlb_allrange_async(uint64_t val)
{
	asm volatile ("tlbi rvaae1is, %0" : : "r"(val));
}

static inline void
flush_mmu_tlb_allrange(uint64_t val)
{
	flush_mmu_tlb_allrange_async(val);
	sync_tlb_flush();
}

#endif // __ARM_RANGE_TLBI__