#if defined(LIBC_SCCS) && !defined(lint)
static char sccsid[] = "@(#)qsort.c 8.1 (Berkeley) 6/4/93";
#endif
#include <sys/cdefs.h>
__FBSDID("$FreeBSD: src/lib/libc/stdlib/qsort.c,v 1.15 2008/01/14 09:21:34 das Exp $");
#include <stdlib.h>
#include <pthread.h>
#include <dispatch/dispatch.h>
#include <stddef.h>
#include <string.h>
#include <libkern/OSAtomic.h>
#include <sys/mman.h>
#include <errno.h>
#define __APPLE_API_PRIVATE
#include <machine/cpu_capabilities.h>
#ifdef I_AM_PSORT_R
typedef int cmp_t(void *, const void *, const void *);
#else
typedef int cmp_t(const void *, const void *);
#endif
#ifdef I_AM_PSORT_B
static inline char *med3(char *, char *, char *, cmp_t ^, void *) __attribute__((always_inline));
#else
static inline char *med3(char *, char *, char *, cmp_t *, void *) __attribute__((always_inline));
#endif
static inline void swapfunc(char *, char *, int, int) __attribute__((always_inline));
#define min(a, b) (a) < (b) ? a : b
#define NARGS ((PAGESIZE - offsetof(struct page, args)) / sizeof(union args))
#define PAGESIZE 4096
#define PARALLEL_MIN_SIZE 2000
struct shared;
union args {
union args *next;
struct {
struct shared *shared;
void *a;
size_t n;
int depth_limit;
} ;
};
struct page {
struct page *next;
union args args[0];
};
struct shared {
char *who;
union args *freelist;
struct page *pagelist;
#ifdef I_AM_PSORT_R
void *thunk;
#endif
#ifdef I_AM_PSORT_B
cmp_t ^cmp;
#else
cmp_t *cmp;
#endif
size_t es;
size_t turnoff;
dispatch_queue_t queue;
pthread_cond_t cond;
pthread_mutex_t mutex;
OSSpinLock sharedlock;
int count;
};
static union args *
getargs(struct shared *shared)
{
union args *args;
OSSpinLockLock(&shared->sharedlock);
if(!shared->freelist) {
struct page *page;
union args *prev;
int i;
if((page = (struct page *)mmap(NULL, PAGESIZE, PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, 0)) == NULL)
return NULL;
page->next = shared->pagelist;
shared->pagelist = page;
prev = NULL;
for(args = page->args, i = NARGS; i > 0; args++, i--) {
args->next = prev;
prev = args;
}
shared->freelist = prev;
}
args = shared->freelist;
shared->freelist = args->next;
OSSpinLockUnlock(&shared->sharedlock);
return args;
}
static void
returnargs(struct shared *shared, union args *args)
{
OSSpinLockLock(&shared->sharedlock);
args->next = shared->freelist;
shared->freelist = args;
OSSpinLockUnlock(&shared->sharedlock);
}
#define swapcode(TYPE, parmi, parmj, n) { \
long i = (n) / sizeof (TYPE); \
TYPE *pi = (TYPE *) (parmi); \
TYPE *pj = (TYPE *) (parmj); \
do { \
TYPE t = *pi; \
*pi++ = *pj; \
*pj++ = t; \
} while (--i > 0); \
}
#define SWAPINIT(a, es) swaptype = ((char *)a - (char *)0) % sizeof(long) || \
es % sizeof(long) ? 2 : es == sizeof(long)? 0 : 1;
static inline void
swapfunc(a, b, n, swaptype)
char *a, *b;
int n, swaptype;
{
if(swaptype <= 1)
swapcode(long, a, b, n)
else
swapcode(char, a, b, n)
}
#define swap(a, b) \
if (swaptype == 0) { \
long t = *(long *)(a); \
*(long *)(a) = *(long *)(b); \
*(long *)(b) = t; \
} else \
swapfunc(a, b, es, swaptype)
#define vecswap(a, b, n) if ((n) > 0) swapfunc(a, b, n, swaptype)
#ifdef I_AM_PSORT_R
#define CMP(t, x, y) (cmp((t), (x), (y)))
#else
#define CMP(t, x, y) (cmp((x), (y)))
#endif
static inline char *
med3(char *a, char *b, char *c,
#ifdef I_AM_PSORT_B
cmp_t ^cmp,
#else
cmp_t *cmp,
#endif
void *thunk
#ifndef I_AM_PSORT_R
__unused
#endif
)
{
return CMP(thunk, a, b) < 0 ?
(CMP(thunk, b, c) < 0 ? b : (CMP(thunk, a, c) < 0 ? c : a ))
:(CMP(thunk, b, c) > 0 ? b : (CMP(thunk, a, c) < 0 ? a : c ));
}
#ifdef __LP64__
#define DEPTH(x) (2 * (flsl((long)(x)) - 1))
#else
#define DEPTH(x) (2 * (fls((int)(x)) - 1))
#endif
#ifdef I_AM_PSORT_R
int __heapsort_r(void *, size_t, size_t, void *, int (*)(void *, const void *, const void *));
#endif
static void _psort_parallel(void *x);
static void
_psort(void *a, size_t n, size_t es,
#ifdef I_AM_PSORT_R
void *thunk,
#else
#define thunk NULL
#endif
#ifdef I_AM_PSORT_B
cmp_t ^cmp,
#else
cmp_t *cmp,
#endif
int depth_limit, struct shared *shared)
{
char *pa, *pb, *pc, *pd, *pl, *pm, *pn;
size_t d, r;
int cmp_result;
int swaptype, swap_cnt;
loop:
if (depth_limit-- <= 0) {
#ifdef I_AM_PSORT_B
heapsort_b(a, n, es, cmp);
#elif defined(I_AM_PSORT_R)
__heapsort_r(a, n, es, thunk, cmp);
#else
heapsort(a, n, es, cmp);
#endif
return;
}
SWAPINIT(a, es);
swap_cnt = 0;
if (n < 7) {
for (pm = (char *)a + es; pm < (char *)a + n * es; pm += es)
for (pl = pm;
pl > (char *)a && CMP(thunk, pl - es, pl) > 0;
pl -= es)
swap(pl, pl - es);
return;
}
pm = (char *)a + (n / 2) * es;
if (n > 7) {
pl = a;
pn = (char *)a + (n - 1) * es;
if (n > 40) {
d = (n / 8) * es;
pl = med3(pl, pl + d, pl + 2 * d, cmp, thunk);
pm = med3(pm - d, pm, pm + d, cmp, thunk);
pn = med3(pn - 2 * d, pn - d, pn, cmp, thunk);
}
pm = med3(pl, pm, pn, cmp, thunk);
}
swap(a, pm);
pa = pb = (char *)a + es;
pc = pd = (char *)a + (n - 1) * es;
for (;;) {
while (pb <= pc && (cmp_result = CMP(thunk, pb, a)) <= 0) {
if (cmp_result == 0) {
swap_cnt = 1;
swap(pa, pb);
pa += es;
}
pb += es;
}
while (pb <= pc && (cmp_result = CMP(thunk, pc, a)) >= 0) {
if (cmp_result == 0) {
swap_cnt = 1;
swap(pc, pd);
pd -= es;
}
pc -= es;
}
if (pb > pc)
break;
swap(pb, pc);
swap_cnt = 1;
pb += es;
pc -= es;
}
pn = (char *)a + n * es;
r = min(pa - (char *)a, pb - pa);
vecswap(a, pb - r, r);
r = min(pd - pc, pn - pd - es);
vecswap(pb, pn - r, r);
if (swap_cnt == 0) {
r = 1 + n / 4;
for (pm = (char *)a + es; pm < (char *)a + n * es; pm += es)
for (pl = pm;
pl > (char *)a && CMP(thunk, pl - es, pl) > 0;
pl -= es) {
swap(pl, pl - es);
if (++swap_cnt > r) goto nevermind;
}
return;
}
nevermind:
if ((r = pb - pa) > es) {
r /= es;
if (shared && r > shared->turnoff) {
union args *args = getargs(shared);
if (args == NULL)
LIBC_ABORT("%s: getargs: %s", shared->who, strerror(errno));
args->shared = shared;
args->a = a;
args->n = r;
args->depth_limit = depth_limit;
OSAtomicIncrement32(&shared->count);
dispatch_async_f(shared->queue, args, _psort_parallel);
} else {
#ifdef I_AM_PSORT_R
_psort(a, r, es, thunk, cmp, depth_limit, NULL);
#else
_psort(a, r, es, cmp, depth_limit, NULL);
#endif
}
}
if ((r = pd - pc) > es) {
a = pn - r;
n = r / es;
goto loop;
}
}
static void
_psort_parallel(void *x)
{
union args *args = (union args *)x;
struct shared *shared = args->shared;
_psort(args->a, args->n, shared->es,
#ifdef I_AM_PSORT_R
shared->thunk,
#endif
shared->cmp, args->depth_limit, shared);
returnargs(shared, args);
if(OSAtomicDecrement32(&shared->count) <= 0) {
pthread_mutex_lock(&shared->mutex);
pthread_cond_signal(&shared->cond);
pthread_mutex_unlock(&shared->mutex);
}
}
static size_t
isqrt(size_t x)
{
size_t s = 1L << (flsl(x) / 2);
return (s + x / s) / 2;
}
void
#ifdef I_AM_PSORT_R
psort_r(void *a, size_t n, size_t es, void *thunk, cmp_t *cmp)
#elif defined(I_AM_PSORT_B)
psort_b(void *a, size_t n, size_t es, cmp_t ^cmp)
#else
psort(void *a, size_t n, size_t es, cmp_t *cmp)
#endif
{
if (n >= PARALLEL_MIN_SIZE && _NumCPUs() > 1) {
struct shared shared;
union args *args;
bzero(&shared, sizeof(shared));
shared.sharedlock = OS_SPINLOCK_INIT;
if ((args = getargs(&shared)) != NULL) {
struct page *p, *pp;
#ifdef I_AM_PSORT_R
shared.who = "psort_r";
shared.thunk = thunk;
#elif defined(I_AM_PSORT_B)
shared.who = "psort_b";
#else
shared.who = "psort";
#endif
shared.cmp = cmp;
shared.es = es;
shared.queue = dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0);
shared.cond = (pthread_cond_t)PTHREAD_COND_INITIALIZER;
shared.mutex = (pthread_mutex_t)PTHREAD_MUTEX_INITIALIZER;
args->a = a;
args->n = n;
args->depth_limit = DEPTH(n);
args->shared = &shared;
shared.turnoff = isqrt(n);
OSAtomicIncrement32(&shared.count);
_psort_parallel(args);
pthread_mutex_lock(&shared.mutex);
while(shared.count > 0)
pthread_cond_wait(&shared.cond, &shared.mutex);
pthread_mutex_unlock(&shared.mutex);
pthread_mutex_destroy(&shared.mutex);
pthread_cond_destroy(&shared.cond);
for(p = shared.pagelist; p; p = pp) {
pp = p->next;
munmap(p, PAGESIZE);
}
return;
}
}
#ifdef I_AM_PSORT_R
qsort_r(a, n, es, thunk, cmp);
#elif defined(I_AM_PSORT_B)
qsort_b(a, n, es, cmp);
#else
qsort(a, n, es, cmp);
#endif
}