fast_apply_bench.c [plain text]
#include <mach/mach.h>
#include <mach/mach_time.h>
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <errno.h>
#include <string.h>
static inline uint64_t
rdtsc(void)
{
uint32_t lo, hi;
asm volatile("rdtsc" : "=a" (lo), "=d" (hi));
return (uint64_t)hi << 32 | lo;
}
__attribute__((noinline)) void
apply_p(void (^b)(size_t), size_t offset, size_t count)
{
abort();
}
size_t total_active_cpus = 8;
__attribute__((noinline)) void
apply(void (^b)(size_t), size_t offset, size_t count)
{
const size_t too_long = 100000;
const size_t laps = 16;
uint64_t delta, tmp, now;
size_t i;
if (total_active_cpus == 1) {
for (i = 0; i < count; i++) {
b(offset + i);
}
return;
}
now = mach_absolute_time();
for (i = 0; i < count; i++) {
b(offset + i);
if (i % laps) {
continue;
}
tmp = mach_absolute_time();
delta = tmp - now;
now = tmp;
if (delta > (too_long * laps) || (i == 0 && delta > too_long)) {
apply_p(b, offset + i + 1, count - (i + 1));
return;
}
}
}
int
main(void)
{
void (^b)(size_t) = ^(size_t index) {
asm volatile("");
};
const size_t laps = 10000000;
mach_timebase_info_data_t tbi;
kern_return_t kr;
long double dd;
uint64_t s, e;
size_t i;
kr = mach_timebase_info(&tbi);
assert(kr == 0);
assert(tbi.numer == tbi.denom);
s = mach_absolute_time();
for (i = 0; i < laps; i++) {
b(i);
}
e = mach_absolute_time();
dd = e - s;
dd /= laps;
printf("direct:\t%Lf ns\n", dd);
s = mach_absolute_time();
apply(b, 0, laps);
e = mach_absolute_time();
dd = e - s;
dd /= laps;
printf("apply:\t%Lf ns\n", dd);
return 0;
}