measure_tlbs.c   [plain text]


#include <stdio.h>
#include <stdlib.h>
#include <signal.h>
#include <setjmp.h>
#include <mach/mach.h>
#include <mach/mach_vm.h>
#include <time.h>

#define SUPERPAGE_SIZE (2*1024*1024)
#define SUPERPAGE_MASK (-SUPERPAGE_SIZE)
#define SUPERPAGE_ROUND_UP(a) ((a + SUPERPAGE_SIZE-1) & SUPERPAGE_MASK)

#define RUNS0 100000
#define STEP 4 /* KB */
#define START STEP
#define MAX (1024*1024) /* KB */

#define RUNS1 RUNS0
#define RUNS2 (RUNS0/20)

clock_t
testt(boolean_t superpages, int mode, int write, int kb)
{
	static int sum;
	char *data;
	unsigned int run, p, p2, i, res;
	mach_vm_address_t addr = 0;
	int pages = kb / 4;
	mach_vm_size_t  size = SUPERPAGE_ROUND_UP(pages * PAGE_SIZE); /* allocate full superpages */
	int kr;

	kr = mach_vm_allocate(mach_task_self(), &addr, size, VM_FLAGS_ANYWHERE | (superpages? VM_FLAGS_SUPERPAGE_SIZE_2MB : VM_FLAGS_SUPERPAGE_NONE));

	if (!addr) {
		return 0;
	}

	data = (char*)(long)addr;

	/* touch every base page to make sure everything is mapped and zero-filled */
	for (p = 0; p < pages; p++) {
		sum += data[p * PAGE_SIZE];
	}

	clock_t a = clock(); /* start timing */
	switch (mode) {
	case 0:         /* one byte every 4096 */
		if (write) {
			for (run = 0; run < RUNS0; run++) {
				for (p = 0; p < pages; p++) {
					data[p * PAGE_SIZE] = run & 0xFF;
				}
			}
		} else {
			for (run = 0; run < RUNS0; run++) {
				for (p = 0; p < pages; p++) {
					sum += data[p * PAGE_SIZE];
				}
			}
		}
		break;
	case 1:         /* every byte */
		if (write) {
			for (run = 0; run < RUNS1 / PAGE_SIZE; run++) {
				for (i = 0; i < pages * PAGE_SIZE; i++) {
					data[i] = run & 0xFF;
				}
			}
		} else {
			for (run = 0; run < RUNS1 / PAGE_SIZE; run++) {
				for (i = 0; i < pages * PAGE_SIZE; i++) {
					sum += data[i];
				}
			}
		}
		break;
	case 2:         /* random */
#define PRIME 15485863
#define NODE_SIZE 128           /* bytes per node */
#define NODE_ACCESSES 16        /* accesses per node */
		p = 0;
		if (write) {
			for (run = 0; run < RUNS2 * pages; run++) {
				p += PRIME;
				p2 = p % (pages * PAGE_SIZE / NODE_SIZE);
//printf("p2 = %d\n", p2);
				for (i = 0; i < NODE_ACCESSES; i++) {
					data[p2 * NODE_SIZE + i] = run & 0xFF;
				}
			}
		} else {
			for (run = 0; run < RUNS2 * pages; run++) {
				p += PRIME;
				p2 = p % (pages * PAGE_SIZE / NODE_SIZE);
				for (i = 0; i < NODE_ACCESSES; i++) {
					sum += data[p2 * NODE_SIZE + i];
				}
			}
		}
		break;
	}
	clock_t b = clock(); /* stop timing */
	mach_vm_deallocate(mach_task_self(), addr, size);
	res = b - a;
	res /= pages;
	return res;
}

int
main(int argc, char **argv)
{
	int kb;
	uint64_t time1, time2, time3, time4;

	int mode;

	printf("; m0 r s; m0 r b; m0 w s; m0 w b; m1 r s; m1 r b; m1 w s; m1 w b; m2 r s; m2 r b; m2 w s; m2 w b\n");
	for (kb = START; kb < MAX; kb += STEP) {
		printf("%d", kb);
		for (mode = 0; mode <= 2; mode++) {
			time1 = time2 = time3 = time4 = -1;
			time1 = testt(TRUE, mode, 0, kb);       // read super
			time2 = testt(FALSE, mode, 0, kb);      // read base
			time3 = testt(TRUE, mode, 1, kb);       // write super
			time4 = testt(FALSE, mode, 1, kb);      // write base
			printf("; %lld; %lld; %lld; %lld", time1, time2, time3, time4);
			fflush(stdout);
		}
		printf("\n");
	}

	return 0;
}