#include <sys/systm.h>
#include <sys/fcntl.h>
#include <sys/file_internal.h>
#include <sys/filedesc.h>
#include <sys/kernel.h>
#include <sys/vnode_internal.h>
#include <sys/malloc.h>
#include <sys/mount_internal.h>
#include <sys/param.h>
#include <sys/proc_internal.h>
#include <sys/sysctl.h>
#include <sys/unistd.h>
#include <sys/user.h>
#include <sys/aio_kern.h>
#include <sys/sysproto.h>
#include <machine/limits.h>
#include <mach/mach_types.h>
#include <kern/kern_types.h>
#include <kern/zalloc.h>
#include <kern/task.h>
#include <kern/sched_prim.h>
#include <vm/vm_map.h>
#include <sys/kdebug.h>
#define AIO_work_queued 1
#define AIO_worker_wake 2
#define AIO_completion_sig 3
#define AIO_completion_cleanup_wait 4
#define AIO_completion_cleanup_wake 5
#define AIO_completion_suspend_wake 6
#define AIO_fsync_delay 7
#define AIO_cancel 10
#define AIO_cancel_async_workq 11
#define AIO_cancel_sync_workq 12
#define AIO_cancel_activeq 13
#define AIO_cancel_doneq 14
#define AIO_fsync 20
#define AIO_read 30
#define AIO_write 40
#define AIO_listio 50
#define AIO_error 60
#define AIO_error_val 61
#define AIO_error_activeq 62
#define AIO_error_workq 63
#define AIO_return 70
#define AIO_return_val 71
#define AIO_return_activeq 72
#define AIO_return_workq 73
#define AIO_exec 80
#define AIO_exit 90
#define AIO_exit_sleep 91
#define AIO_close 100
#define AIO_close_sleep 101
#define AIO_suspend 110
#define AIO_suspend_sleep 111
#define AIO_worker_thread 120
#if 0
#undef KERNEL_DEBUG
#define KERNEL_DEBUG KERNEL_DEBUG_CONSTANT
#endif
struct aio_anchor_cb
{
int aio_async_workq_count;
int lio_sync_workq_count;
int aio_active_count;
int aio_done_count;
TAILQ_HEAD( , aio_workq_entry ) aio_async_workq;
TAILQ_HEAD( , aio_workq_entry ) lio_sync_workq;
};
typedef struct aio_anchor_cb aio_anchor_cb;
#define AIO_SUSPEND_SLEEP_CHAN p_estcpu
#define AIO_CLEANUP_SLEEP_CHAN p_pctcpu
#define AIO_LOCK lck_mtx_lock(aio_lock)
#define AIO_UNLOCK lck_mtx_unlock(aio_lock)
static int aio_active_requests_for_process( struct proc *procp );
static boolean_t aio_delay_fsync_request( aio_workq_entry *entryp );
static int aio_free_request( aio_workq_entry *entryp, vm_map_t the_map );
static int aio_get_all_queues_count( void );
static int aio_get_process_count( struct proc *procp );
static aio_workq_entry * aio_get_some_work( void );
static boolean_t aio_last_group_io( aio_workq_entry *entryp );
static void aio_mark_requests( aio_workq_entry *entryp );
static int aio_queue_async_request( struct proc *procp,
user_addr_t aiocbp,
int kindOfIO );
static int aio_validate( aio_workq_entry *entryp );
static void aio_work_thread( void );
static int do_aio_cancel( struct proc *p,
int fd,
user_addr_t aiocbp,
boolean_t wait_for_completion,
boolean_t disable_notification );
static void do_aio_completion( aio_workq_entry *entryp );
static int do_aio_fsync( aio_workq_entry *entryp );
static int do_aio_read( aio_workq_entry *entryp );
static int do_aio_write( aio_workq_entry *entryp );
static void do_munge_aiocb( struct aiocb *my_aiocbp, struct user_aiocb *the_user_aiocbp );
static boolean_t is_already_queued( struct proc *procp,
user_addr_t aiocbp );
static int lio_create_async_entry( struct proc *procp,
user_addr_t aiocbp,
user_addr_t sigp,
long group_tag,
aio_workq_entry **entrypp );
static int lio_create_sync_entry( struct proc *procp,
user_addr_t aiocbp,
long group_tag,
aio_workq_entry **entrypp );
extern int dofileread( struct proc *p, struct fileproc *fp, int fd,
user_addr_t bufp, user_size_t nbyte,
off_t offset, int flags, user_ssize_t *retval );
extern int dofilewrite( struct proc *p, struct fileproc *fp, int fd,
user_addr_t bufp, user_size_t nbyte, off_t offset,
int flags, user_ssize_t *retval );
extern int aio_max_requests;
extern int aio_max_requests_per_process;
extern int aio_worker_threads;
static aio_anchor_cb aio_anchor;
static lck_mtx_t * aio_lock;
static lck_grp_t * aio_lock_grp;
static lck_attr_t * aio_lock_attr;
static lck_grp_attr_t * aio_lock_grp_attr;
static struct zone *aio_workq_zonep;
int
aio_cancel( struct proc *p, struct aio_cancel_args *uap, int *retval )
{
struct user_aiocb my_aiocb;
int result;
KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_cancel)) | DBG_FUNC_START,
(int)p, (int)uap->aiocbp, 0, 0, 0 );
AIO_LOCK;
result = aio_get_all_queues_count( );
AIO_UNLOCK;
if ( result < 1 ) {
result = EBADF;
goto ExitRoutine;
}
*retval = -1;
if ( uap->aiocbp != USER_ADDR_NULL ) {
if ( !IS_64BIT_PROCESS(p) ) {
struct aiocb aiocb32;
result = copyin( uap->aiocbp, &aiocb32, sizeof(aiocb32) );
if ( result == 0 )
do_munge_aiocb( &aiocb32, &my_aiocb );
} else
result = copyin( uap->aiocbp, &my_aiocb, sizeof(my_aiocb) );
if ( result != 0 ) {
result = EAGAIN;
goto ExitRoutine;
}
if ( uap->fd != my_aiocb.aio_fildes ) {
result = EBADF;
goto ExitRoutine;
}
}
result = do_aio_cancel( p, uap->fd, uap->aiocbp, FALSE, FALSE );
if ( result != -1 ) {
*retval = result;
result = 0;
goto ExitRoutine;
}
result = EBADF;
ExitRoutine:
KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_cancel)) | DBG_FUNC_END,
(int)p, (int)uap->aiocbp, result, 0, 0 );
return( result );
}
__private_extern__ void
_aio_close( struct proc *p, int fd )
{
int error, count;
AIO_LOCK;
count = aio_get_all_queues_count( );
AIO_UNLOCK;
if ( count < 1 )
return;
KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_close)) | DBG_FUNC_START,
(int)p, fd, 0, 0, 0 );
error = do_aio_cancel( p, fd, 0, TRUE, FALSE );
if ( error == AIO_NOTCANCELED ) {
KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_close_sleep)) | DBG_FUNC_NONE,
(int)p, fd, 0, 0, 0 );
tsleep( &p->AIO_CLEANUP_SLEEP_CHAN, PRIBIO, "aio_close", 0 );
}
KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_close)) | DBG_FUNC_END,
(int)p, fd, 0, 0, 0 );
return;
}
int
aio_error( struct proc *p, struct aio_error_args *uap, int *retval )
{
aio_workq_entry *entryp;
int error;
KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_error)) | DBG_FUNC_START,
(int)p, (int)uap->aiocbp, 0, 0, 0 );
AIO_LOCK;
if ( aio_get_all_queues_count( ) < 1 ) {
error = EINVAL;
goto ExitRoutine;
}
TAILQ_FOREACH( entryp, &p->aio_doneq, aio_workq_link ) {
if ( entryp->uaiocbp == uap->aiocbp ) {
*retval = entryp->errorval;
error = 0;
KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_error_val)) | DBG_FUNC_NONE,
(int)p, (int)uap->aiocbp, *retval, 0, 0 );
goto ExitRoutine;
}
}
TAILQ_FOREACH( entryp, &p->aio_activeq, aio_workq_link ) {
if ( entryp->uaiocbp == uap->aiocbp ) {
*retval = EINPROGRESS;
error = 0;
KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_error_activeq)) | DBG_FUNC_NONE,
(int)p, (int)uap->aiocbp, *retval, 0, 0 );
goto ExitRoutine;
}
}
TAILQ_FOREACH( entryp, &aio_anchor.aio_async_workq, aio_workq_link ) {
if ( p == entryp->procp && entryp->uaiocbp == uap->aiocbp ) {
*retval = EINPROGRESS;
error = 0;
KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_error_workq)) | DBG_FUNC_NONE,
(int)p, (int)uap->aiocbp, *retval, 0, 0 );
goto ExitRoutine;
}
}
error = EINVAL;
ExitRoutine:
KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_error)) | DBG_FUNC_END,
(int)p, (int)uap->aiocbp, error, 0, 0 );
AIO_UNLOCK;
return( error );
}
int
aio_fsync( struct proc *p, struct aio_fsync_args *uap, int *retval )
{
int error;
int fsync_kind;
KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_fsync)) | DBG_FUNC_START,
(int)p, (int)uap->aiocbp, uap->op, 0, 0 );
*retval = 0;
if (uap->op == O_SYNC || uap->op == 0)
fsync_kind = AIO_FSYNC;
#if 0 // we don't support fdatasync() call yet
else if ( uap->op == O_DSYNC )
fsync_kind = AIO_DSYNC;
#endif
else {
*retval = -1;
error = EINVAL;
goto ExitRoutine;
}
error = aio_queue_async_request( p, uap->aiocbp, fsync_kind );
if ( error != 0 )
*retval = -1;
ExitRoutine:
KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_fsync)) | DBG_FUNC_END,
(int)p, (int)uap->aiocbp, error, 0, 0 );
return( error );
}
int
aio_read( struct proc *p, struct aio_read_args *uap, int *retval )
{
int error;
KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_read)) | DBG_FUNC_START,
(int)p, (int)uap->aiocbp, 0, 0, 0 );
*retval = 0;
error = aio_queue_async_request( p, uap->aiocbp, AIO_READ );
if ( error != 0 )
*retval = -1;
KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_read)) | DBG_FUNC_END,
(int)p, (int)uap->aiocbp, error, 0, 0 );
return( error );
}
int
aio_return( struct proc *p, struct aio_return_args *uap, user_ssize_t *retval )
{
aio_workq_entry *entryp;
int error;
boolean_t lock_held;
KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_return)) | DBG_FUNC_START,
(int)p, (int)uap->aiocbp, 0, 0, 0 );
AIO_LOCK;
lock_held = TRUE;
*retval = 0;
if ( aio_get_all_queues_count( ) < 1 ) {
error = EINVAL;
goto ExitRoutine;
}
TAILQ_FOREACH( entryp, &p->aio_doneq, aio_workq_link ) {
if ( entryp->uaiocbp == uap->aiocbp ) {
TAILQ_REMOVE( &p->aio_doneq, entryp, aio_workq_link );
aio_anchor.aio_done_count--;
p->aio_done_count--;
*retval = entryp->returnval;
if ( (entryp->flags & AIO_COMPLETION) == 0 ) {
vm_map_t my_map;
my_map = entryp->aio_map;
entryp->aio_map = VM_MAP_NULL;
AIO_UNLOCK;
lock_held = FALSE;
aio_free_request( entryp, my_map );
}
else
entryp->flags |= AIO_DO_FREE;
error = 0;
KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_return_val)) | DBG_FUNC_NONE,
(int)p, (int)uap->aiocbp, *retval, 0, 0 );
goto ExitRoutine;
}
}
TAILQ_FOREACH( entryp, &p->aio_activeq, aio_workq_link ) {
if ( entryp->uaiocbp == uap->aiocbp ) {
error = EINPROGRESS;
KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_return_activeq)) | DBG_FUNC_NONE,
(int)p, (int)uap->aiocbp, *retval, 0, 0 );
goto ExitRoutine;
}
}
TAILQ_FOREACH( entryp, &aio_anchor.aio_async_workq, aio_workq_link ) {
if ( p == entryp->procp && entryp->uaiocbp == uap->aiocbp ) {
error = EINPROGRESS;
KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_return_workq)) | DBG_FUNC_NONE,
(int)p, (int)uap->aiocbp, *retval, 0, 0 );
goto ExitRoutine;
}
}
error = EINVAL;
ExitRoutine:
if ( lock_held )
AIO_UNLOCK;
KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_return)) | DBG_FUNC_END,
(int)p, (int)uap->aiocbp, error, 0, 0 );
return( error );
}
__private_extern__ void
_aio_exec( struct proc *p )
{
KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_exec)) | DBG_FUNC_START,
(int)p, 0, 0, 0, 0 );
_aio_exit( p );
KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_exec)) | DBG_FUNC_END,
(int)p, 0, 0, 0, 0 );
return;
}
__private_extern__ void
_aio_exit( struct proc *p )
{
int error, count;
aio_workq_entry *entryp;
AIO_LOCK;
count = aio_get_all_queues_count( );
AIO_UNLOCK;
if ( count < 1 ) {
return;
}
KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_exit)) | DBG_FUNC_START,
(int)p, 0, 0, 0, 0 );
error = do_aio_cancel( p, 0, 0, TRUE, TRUE );
if ( error == AIO_NOTCANCELED ) {
KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_exit_sleep)) | DBG_FUNC_NONE,
(int)p, 0, 0, 0, 0 );
tsleep( &p->AIO_CLEANUP_SLEEP_CHAN, PRIBIO, "aio_exit", 0 );
}
AIO_LOCK;
entryp = TAILQ_FIRST( &p->aio_doneq );
while ( entryp != NULL ) {
aio_workq_entry *next_entryp;
next_entryp = TAILQ_NEXT( entryp, aio_workq_link );
TAILQ_REMOVE( &p->aio_doneq, entryp, aio_workq_link );
aio_anchor.aio_done_count--;
p->aio_done_count--;
if ( (entryp->flags & AIO_COMPLETION) == 0 ) {
vm_map_t my_map;
my_map = entryp->aio_map;
entryp->aio_map = VM_MAP_NULL;
AIO_UNLOCK;
aio_free_request( entryp, my_map );
AIO_LOCK;
entryp = TAILQ_FIRST( &p->aio_doneq );
continue;
}
else
entryp->flags |= AIO_DO_FREE;
entryp = next_entryp;
}
AIO_UNLOCK;
KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_exit)) | DBG_FUNC_END,
(int)p, 0, 0, 0, 0 );
return;
}
static int
do_aio_cancel( struct proc *p, int fd, user_addr_t aiocbp,
boolean_t wait_for_completion, boolean_t disable_notification )
{
aio_workq_entry *entryp;
int result;
result = -1;
AIO_LOCK;
entryp = TAILQ_FIRST( &aio_anchor.aio_async_workq );
while ( entryp != NULL ) {
aio_workq_entry *next_entryp;
next_entryp = TAILQ_NEXT( entryp, aio_workq_link );
if ( p == entryp->procp ) {
if ( (aiocbp == USER_ADDR_NULL && fd == 0) ||
(aiocbp != USER_ADDR_NULL && entryp->uaiocbp == aiocbp) ||
(aiocbp == USER_ADDR_NULL && fd == entryp->aiocb.aio_fildes) ) {
TAILQ_REMOVE( &aio_anchor.aio_async_workq, entryp, aio_workq_link );
aio_anchor.aio_async_workq_count--;
entryp->errorval = ECANCELED;
entryp->returnval = -1;
if ( disable_notification )
entryp->flags |= AIO_DISABLE;
result = AIO_CANCELED;
KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_cancel_async_workq)) | DBG_FUNC_NONE,
(int)entryp->procp, (int)entryp->uaiocbp, fd, 0, 0 );
TAILQ_INSERT_TAIL( &p->aio_doneq, entryp, aio_workq_link );
aio_anchor.aio_done_count++;
p->aio_done_count++;
entryp->flags |= AIO_COMPLETION;
AIO_UNLOCK;
do_aio_completion( entryp );
AIO_LOCK;
entryp->flags &= ~AIO_COMPLETION;
if ( (entryp->flags & AIO_DO_FREE) != 0 ) {
vm_map_t my_map;
my_map = entryp->aio_map;
entryp->aio_map = VM_MAP_NULL;
AIO_UNLOCK;
aio_free_request( entryp, my_map );
}
else
AIO_UNLOCK;
if ( aiocbp != USER_ADDR_NULL ) {
return( result );
}
AIO_LOCK;
entryp = TAILQ_FIRST( &aio_anchor.aio_async_workq );
continue;
}
}
entryp = next_entryp;
}
entryp = TAILQ_FIRST( &aio_anchor.lio_sync_workq );
while ( entryp != NULL ) {
aio_workq_entry *next_entryp;
next_entryp = TAILQ_NEXT( entryp, aio_workq_link );
if ( p == entryp->procp ) {
if ( (aiocbp == USER_ADDR_NULL && fd == 0) ||
(aiocbp != USER_ADDR_NULL && entryp->uaiocbp == aiocbp) ||
(aiocbp == USER_ADDR_NULL && fd == entryp->aiocb.aio_fildes) ) {
TAILQ_REMOVE( &aio_anchor.lio_sync_workq, entryp, aio_workq_link );
aio_anchor.lio_sync_workq_count--;
entryp->errorval = ECANCELED;
entryp->returnval = -1;
if ( disable_notification )
entryp->flags |= AIO_DISABLE;
result = AIO_CANCELED;
KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_cancel_sync_workq)) | DBG_FUNC_NONE,
(int)entryp->procp, (int)entryp->uaiocbp, fd, 0, 0 );
TAILQ_INSERT_TAIL( &p->aio_doneq, entryp, aio_workq_link );
aio_anchor.aio_done_count++;
p->aio_done_count++;
if ( aiocbp != USER_ADDR_NULL ) {
AIO_UNLOCK;
return( result );
}
}
}
entryp = next_entryp;
}
TAILQ_FOREACH( entryp, &p->aio_activeq, aio_workq_link ) {
if ( (aiocbp == USER_ADDR_NULL && fd == 0) ||
(aiocbp != USER_ADDR_NULL && entryp->uaiocbp == aiocbp) ||
(aiocbp == USER_ADDR_NULL && fd == entryp->aiocb.aio_fildes) ) {
result = AIO_NOTCANCELED;
KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_cancel_activeq)) | DBG_FUNC_NONE,
(int)entryp->procp, (int)entryp->uaiocbp, fd, 0, 0 );
if ( wait_for_completion )
entryp->flags |= AIO_WAITING;
if ( disable_notification )
entryp->flags |= AIO_DISABLE;
if ( aiocbp != USER_ADDR_NULL ) {
AIO_UNLOCK;
return( result );
}
}
}
if ( result == -1 ) {
TAILQ_FOREACH( entryp, &p->aio_doneq, aio_workq_link ) {
if ( (aiocbp == USER_ADDR_NULL && fd == 0) ||
(aiocbp != USER_ADDR_NULL && entryp->uaiocbp == aiocbp) ||
(aiocbp == USER_ADDR_NULL && fd == entryp->aiocb.aio_fildes) ) {
result = AIO_ALLDONE;
KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_cancel_doneq)) | DBG_FUNC_NONE,
(int)entryp->procp, (int)entryp->uaiocbp, fd, 0, 0 );
if ( aiocbp != USER_ADDR_NULL ) {
AIO_UNLOCK;
return( result );
}
}
}
}
AIO_UNLOCK;
return( result );
}
int
aio_suspend( struct proc *p, struct aio_suspend_args *uap, int *retval )
{
int error;
int i, count;
uint64_t abstime;
struct user_timespec ts;
aio_workq_entry *entryp;
user_addr_t *aiocbpp;
KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_suspend)) | DBG_FUNC_START,
(int)p, uap->nent, 0, 0, 0 );
*retval = -1;
abstime = 0;
aiocbpp = NULL;
AIO_LOCK;
count = aio_get_all_queues_count( );
AIO_UNLOCK;
if ( count < 1 ) {
error = EINVAL;
goto ExitThisRoutine;
}
if ( uap->nent < 1 || uap->nent > aio_max_requests_per_process ) {
error = EINVAL;
goto ExitThisRoutine;
}
if ( uap->timeoutp != USER_ADDR_NULL ) {
if ( proc_is64bit(p) ) {
error = copyin( uap->timeoutp, &ts, sizeof(ts) );
}
else {
struct timespec temp;
error = copyin( uap->timeoutp, &temp, sizeof(temp) );
if ( error == 0 ) {
ts.tv_sec = temp.tv_sec;
ts.tv_nsec = temp.tv_nsec;
}
}
if ( error != 0 ) {
error = EAGAIN;
goto ExitThisRoutine;
}
if ( ts.tv_nsec < 0 || ts.tv_nsec >= 1000000000 ) {
error = EINVAL;
goto ExitThisRoutine;
}
nanoseconds_to_absolutetime( (uint64_t)ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec,
&abstime );
clock_absolutetime_interval_to_deadline( abstime, &abstime );
}
MALLOC( aiocbpp, user_addr_t *, (uap->nent * sizeof(user_addr_t)), M_TEMP, M_WAITOK );
if ( aiocbpp == NULL ) {
error = EAGAIN;
goto ExitThisRoutine;
}
error = copyin( uap->aiocblist, aiocbpp,
proc_is64bit(p) ? (uap->nent * sizeof(user_addr_t))
: (uap->nent * sizeof(uintptr_t)) );
if ( error != 0 ) {
error = EAGAIN;
goto ExitThisRoutine;
}
if ( !proc_is64bit(p) && sizeof(uintptr_t) < sizeof(user_addr_t) ) {
uintptr_t *my_ptrp = ((uintptr_t *)aiocbpp) + (uap->nent - 1);
user_addr_t *my_addrp = aiocbpp + (uap->nent - 1);
for (i = 0; i < uap->nent; i++, my_ptrp--, my_addrp--) {
*my_addrp = (user_addr_t) (*my_ptrp);
}
}
AIO_LOCK;
for ( i = 0; i < uap->nent; i++ ) {
user_addr_t aiocbp;
aiocbp = *(aiocbpp + i);
if ( aiocbp == USER_ADDR_NULL )
continue;
TAILQ_FOREACH( entryp, &p->aio_doneq, aio_workq_link ) {
if ( entryp->uaiocbp == aiocbp ) {
*retval = 0;
error = 0;
AIO_UNLOCK;
goto ExitThisRoutine;
}
}
}
KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_suspend_sleep)) | DBG_FUNC_NONE,
(int)p, uap->nent, 0, 0, 0 );
assert_wait_deadline( (event_t) &p->AIO_SUSPEND_SLEEP_CHAN, THREAD_ABORTSAFE, abstime );
AIO_UNLOCK;
error = thread_block( THREAD_CONTINUE_NULL );
if ( error == THREAD_AWAKENED ) {
*retval = 0;
error = 0;
}
else if ( error == THREAD_TIMED_OUT ) {
error = EAGAIN;
}
else {
error = EINTR;
}
ExitThisRoutine:
if ( aiocbpp != NULL )
FREE( aiocbpp, M_TEMP );
KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_suspend)) | DBG_FUNC_END,
(int)p, uap->nent, error, 0, 0 );
return( error );
}
int
aio_write( struct proc *p, struct aio_write_args *uap, int *retval )
{
int error;
*retval = 0;
KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_write)) | DBG_FUNC_START,
(int)p, (int)uap->aiocbp, 0, 0, 0 );
error = aio_queue_async_request( p, uap->aiocbp, AIO_WRITE );
if ( error != 0 )
*retval = -1;
KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_write)) | DBG_FUNC_END,
(int)p, (int)uap->aiocbp, error, 0, 0 );
return( error );
}
int
lio_listio( struct proc *p, struct lio_listio_args *uap, int *retval )
{
int i;
int call_result;
int result;
long group_tag;
aio_workq_entry * *entryp_listp;
user_addr_t *aiocbpp;
KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_listio)) | DBG_FUNC_START,
(int)p, uap->nent, uap->mode, 0, 0 );
entryp_listp = NULL;
aiocbpp = NULL;
call_result = -1;
*retval = -1;
if ( !(uap->mode == LIO_NOWAIT || uap->mode == LIO_WAIT) ) {
call_result = EINVAL;
goto ExitRoutine;
}
if ( uap->nent < 1 || uap->nent > AIO_LISTIO_MAX ) {
call_result = EINVAL;
goto ExitRoutine;
}
group_tag = random();
MALLOC( entryp_listp, void *, (uap->nent * sizeof(aio_workq_entry *)), M_TEMP, M_WAITOK );
if ( entryp_listp == NULL ) {
call_result = EAGAIN;
goto ExitRoutine;
}
MALLOC( aiocbpp, user_addr_t *, (uap->nent * sizeof(user_addr_t)), M_TEMP, M_WAITOK );
if ( aiocbpp == NULL ) {
call_result = EAGAIN;
goto ExitRoutine;
}
result = copyin( uap->aiocblist, aiocbpp,
IS_64BIT_PROCESS(p) ? (uap->nent * sizeof(user_addr_t))
: (uap->nent * sizeof(uintptr_t)) );
if ( result != 0 ) {
call_result = EAGAIN;
goto ExitRoutine;
}
if ( !IS_64BIT_PROCESS(p) && sizeof(uintptr_t) < sizeof(user_addr_t) ) {
uintptr_t *my_ptrp = ((uintptr_t *)aiocbpp) + (uap->nent - 1);
user_addr_t *my_addrp = aiocbpp + (uap->nent - 1);
for (i = 0; i < uap->nent; i++, my_ptrp--, my_addrp--) {
*my_addrp = (user_addr_t) (*my_ptrp);
}
}
for ( i = 0; i < uap->nent; i++ ) {
user_addr_t my_aiocbp;
*(entryp_listp + i) = NULL;
my_aiocbp = *(aiocbpp + i);
if ( my_aiocbp == USER_ADDR_NULL )
continue;
if ( uap->mode == LIO_NOWAIT )
result = lio_create_async_entry( p, my_aiocbp, uap->sigp,
group_tag, (entryp_listp + i) );
else
result = lio_create_sync_entry( p, my_aiocbp, group_tag,
(entryp_listp + i) );
if ( result != 0 && call_result == -1 )
call_result = result;
}
AIO_LOCK;
for ( i = 0; i < uap->nent; i++ ) {
aio_workq_entry *entryp;
entryp = *(entryp_listp + i);
if ( entryp == NULL )
continue;
if ( aio_get_all_queues_count( ) >= aio_max_requests ||
aio_get_process_count( entryp->procp ) >= aio_max_requests_per_process ||
is_already_queued( entryp->procp, entryp->uaiocbp ) == TRUE ) {
vm_map_t my_map;
my_map = entryp->aio_map;
entryp->aio_map = VM_MAP_NULL;
if ( call_result == -1 )
call_result = EAGAIN;
AIO_UNLOCK;
aio_free_request( entryp, my_map );
AIO_LOCK;
continue;
}
if ( uap->mode == LIO_NOWAIT ) {
TAILQ_INSERT_TAIL( &aio_anchor.aio_async_workq, entryp, aio_workq_link );
aio_anchor.aio_async_workq_count++;
KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_work_queued)) | DBG_FUNC_NONE,
(int)p, (int)entryp->uaiocbp, 0, 0, 0 );
}
else {
TAILQ_INSERT_TAIL( &aio_anchor.lio_sync_workq, entryp, aio_workq_link );
aio_anchor.lio_sync_workq_count++;
}
}
if ( uap->mode == LIO_NOWAIT ) {
wakeup_one( (caddr_t) &aio_anchor.aio_async_workq );
}
else {
aio_workq_entry *entryp;
int error;
entryp = TAILQ_FIRST( &aio_anchor.lio_sync_workq );
while ( entryp != NULL ) {
if ( p == entryp->procp && group_tag == entryp->group_tag ) {
TAILQ_REMOVE( &aio_anchor.lio_sync_workq, entryp, aio_workq_link );
aio_anchor.lio_sync_workq_count--;
AIO_UNLOCK;
if ( (entryp->flags & AIO_READ) != 0 ) {
error = do_aio_read( entryp );
}
else if ( (entryp->flags & AIO_WRITE) != 0 ) {
error = do_aio_write( entryp );
}
else if ( (entryp->flags & AIO_FSYNC) != 0 ) {
error = do_aio_fsync( entryp );
}
else {
printf( "%s - unknown aio request - flags 0x%02X \n",
__FUNCTION__, entryp->flags );
error = EINVAL;
}
entryp->errorval = error;
if ( error != 0 && call_result == -1 )
call_result = EIO;
AIO_LOCK;
TAILQ_INSERT_TAIL( &p->aio_doneq, entryp, aio_workq_link );
aio_anchor.aio_done_count++;
p->aio_done_count++;
entryp = TAILQ_FIRST( &aio_anchor.lio_sync_workq );
continue;
}
entryp = TAILQ_NEXT( entryp, aio_workq_link );
}
}
AIO_UNLOCK;
if ( call_result == -1 ) {
call_result = 0;
*retval = 0;
}
ExitRoutine:
if ( entryp_listp != NULL )
FREE( entryp_listp, M_TEMP );
if ( aiocbpp != NULL )
FREE( aiocbpp, M_TEMP );
KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_listio)) | DBG_FUNC_END,
(int)p, call_result, 0, 0, 0 );
return( call_result );
}
static void
aio_work_thread( void )
{
aio_workq_entry *entryp;
for( ;; ) {
AIO_LOCK;
entryp = aio_get_some_work();
if ( entryp == NULL ) {
assert_wait( (event_t) &aio_anchor.aio_async_workq, THREAD_UNINT );
AIO_UNLOCK;
thread_block( (thread_continue_t)aio_work_thread );
}
else {
int error;
vm_map_t currentmap;
vm_map_t oldmap = VM_MAP_NULL;
task_t oldaiotask = TASK_NULL;
struct uthread *uthreadp = NULL;
AIO_UNLOCK;
KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_worker_thread)) | DBG_FUNC_START,
(int)entryp->procp, (int)entryp->uaiocbp, entryp->flags, 0, 0 );
currentmap = get_task_map( (current_proc())->task );
if ( currentmap != entryp->aio_map ) {
uthreadp = (struct uthread *) get_bsdthread_info(current_thread());
oldaiotask = uthreadp->uu_aio_task;
uthreadp->uu_aio_task = entryp->procp->task;
oldmap = vm_map_switch( entryp->aio_map );
}
if ( (entryp->flags & AIO_READ) != 0 ) {
error = do_aio_read( entryp );
}
else if ( (entryp->flags & AIO_WRITE) != 0 ) {
error = do_aio_write( entryp );
}
else if ( (entryp->flags & AIO_FSYNC) != 0 ) {
error = do_aio_fsync( entryp );
}
else {
printf( "%s - unknown aio request - flags 0x%02X \n",
__FUNCTION__, entryp->flags );
error = EINVAL;
}
entryp->errorval = error;
if ( currentmap != entryp->aio_map ) {
(void) vm_map_switch( oldmap );
uthreadp->uu_aio_task = oldaiotask;
}
AIO_LOCK;
TAILQ_REMOVE( &entryp->procp->aio_activeq, entryp, aio_workq_link );
aio_anchor.aio_active_count--;
entryp->procp->aio_active_count--;
TAILQ_INSERT_TAIL( &entryp->procp->aio_doneq, entryp, aio_workq_link );
aio_anchor.aio_done_count++;
entryp->procp->aio_done_count++;
entryp->flags |= AIO_COMPLETION;
if ( VM_MAP_NULL != entryp->aio_map ) {
vm_map_t my_map;
my_map = entryp->aio_map;
entryp->aio_map = VM_MAP_NULL;
AIO_UNLOCK;
vm_map_deallocate( my_map );
}
else {
AIO_UNLOCK;
}
do_aio_completion( entryp );
KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_worker_thread)) | DBG_FUNC_END,
(int)entryp->procp, (int)entryp->uaiocbp, entryp->errorval,
entryp->returnval, 0 );
AIO_LOCK;
entryp->flags &= ~AIO_COMPLETION;
if ( (entryp->flags & AIO_DO_FREE) != 0 ) {
vm_map_t my_map;
my_map = entryp->aio_map;
entryp->aio_map = VM_MAP_NULL;
AIO_UNLOCK;
aio_free_request( entryp, my_map );
}
else
AIO_UNLOCK;
}
}
}
static aio_workq_entry *
aio_get_some_work( void )
{
aio_workq_entry *entryp;
for ( entryp = TAILQ_FIRST( &aio_anchor.aio_async_workq );
entryp != NULL;
entryp = TAILQ_NEXT( entryp, aio_workq_link ) ) {
if ( (entryp->flags & AIO_FSYNC) != 0 ) {
if ( aio_delay_fsync_request( entryp ) ) {
KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_fsync_delay)) | DBG_FUNC_NONE,
(int)entryp->procp, (int)entryp->uaiocbp, 0, 0, 0 );
continue;
}
}
break;
}
if ( entryp != NULL ) {
TAILQ_REMOVE( &aio_anchor.aio_async_workq, entryp, aio_workq_link );
aio_anchor.aio_async_workq_count--;
TAILQ_INSERT_TAIL( &entryp->procp->aio_activeq, entryp, aio_workq_link );
aio_anchor.aio_active_count++;
entryp->procp->aio_active_count++;
}
return( entryp );
}
static boolean_t
aio_delay_fsync_request( aio_workq_entry *entryp )
{
aio_workq_entry *my_entryp;
TAILQ_FOREACH( my_entryp, &entryp->procp->aio_activeq, aio_workq_link ) {
if ( my_entryp->fsyncp != USER_ADDR_NULL &&
entryp->uaiocbp == my_entryp->fsyncp &&
entryp->aiocb.aio_fildes == my_entryp->aiocb.aio_fildes ) {
return( TRUE );
}
}
return( FALSE );
}
static int
aio_queue_async_request( struct proc *procp, user_addr_t aiocbp, int kindOfIO )
{
aio_workq_entry *entryp;
int result;
entryp = (aio_workq_entry *) zalloc( aio_workq_zonep );
if ( entryp == NULL ) {
result = EAGAIN;
goto error_exit;
}
bzero( entryp, sizeof(*entryp) );
entryp->procp = procp;
entryp->uaiocbp = aiocbp;
entryp->flags |= kindOfIO;
entryp->aio_map = VM_MAP_NULL;
if ( !IS_64BIT_PROCESS(procp) ) {
struct aiocb aiocb32;
result = copyin( aiocbp, &aiocb32, sizeof(aiocb32) );
if ( result == 0 )
do_munge_aiocb( &aiocb32, &entryp->aiocb );
} else
result = copyin( aiocbp, &entryp->aiocb, sizeof(entryp->aiocb) );
if ( result != 0 ) {
result = EAGAIN;
goto error_exit;
}
result = aio_validate( entryp );
if ( result != 0 )
goto error_exit;
entryp->aio_map = get_task_map( procp->task );
vm_map_reference( entryp->aio_map );
AIO_LOCK;
if ( is_already_queued( entryp->procp, entryp->uaiocbp ) == TRUE ) {
AIO_UNLOCK;
result = EAGAIN;
goto error_exit;
}
if ( aio_get_all_queues_count( ) >= aio_max_requests ||
aio_get_process_count( procp ) >= aio_max_requests_per_process ) {
AIO_UNLOCK;
result = EAGAIN;
goto error_exit;
}
if ( (kindOfIO & AIO_FSYNC) != 0 )
aio_mark_requests( entryp );
TAILQ_INSERT_TAIL( &aio_anchor.aio_async_workq, entryp, aio_workq_link );
aio_anchor.aio_async_workq_count++;
wakeup_one( (caddr_t) &aio_anchor.aio_async_workq );
AIO_UNLOCK;
KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_work_queued)) | DBG_FUNC_NONE,
(int)procp, (int)aiocbp, 0, 0, 0 );
return( 0 );
error_exit:
if ( entryp != NULL ) {
aio_free_request( entryp, entryp->aio_map );
}
return( result );
}
static int
lio_create_async_entry( struct proc *procp, user_addr_t aiocbp,
user_addr_t sigp, long group_tag,
aio_workq_entry **entrypp )
{
aio_workq_entry *entryp;
int result;
entryp = (aio_workq_entry *) zalloc( aio_workq_zonep );
if ( entryp == NULL ) {
result = EAGAIN;
goto error_exit;
}
bzero( entryp, sizeof(*entryp) );
entryp->procp = procp;
entryp->uaiocbp = aiocbp;
entryp->flags |= AIO_LIO;
entryp->group_tag = group_tag;
entryp->aio_map = VM_MAP_NULL;
if ( !IS_64BIT_PROCESS(procp) ) {
struct aiocb aiocb32;
result = copyin( aiocbp, &aiocb32, sizeof(aiocb32) );
if ( result == 0 )
do_munge_aiocb( &aiocb32, &entryp->aiocb );
} else
result = copyin( aiocbp, &entryp->aiocb, sizeof(entryp->aiocb) );
if ( result != 0 ) {
result = EAGAIN;
goto error_exit;
}
if ( entryp->aiocb.aio_lio_opcode == LIO_NOP ) {
result = 0;
goto error_exit;
}
if ( sigp != USER_ADDR_NULL ) {
if ( !IS_64BIT_PROCESS(procp) ) {
struct sigevent sigevent32;
result = copyin( sigp, &sigevent32, sizeof(sigevent32) );
if ( result == 0 ) {
entryp->aiocb.aio_sigevent.sigev_notify = sigevent32.sigev_notify;
entryp->aiocb.aio_sigevent.sigev_signo = sigevent32.sigev_signo;
entryp->aiocb.aio_sigevent.sigev_value.size_equivalent.sival_int =
sigevent32.sigev_value.sival_int;
entryp->aiocb.aio_sigevent.sigev_notify_function =
CAST_USER_ADDR_T(sigevent32.sigev_notify_function);
entryp->aiocb.aio_sigevent.sigev_notify_attributes =
CAST_USER_ADDR_T(sigevent32.sigev_notify_attributes);
}
} else
result = copyin( sigp, &entryp->aiocb.aio_sigevent, sizeof(entryp->aiocb.aio_sigevent) );
if ( result != 0 ) {
result = EAGAIN;
goto error_exit;
}
}
result = aio_validate( entryp );
if ( result != 0 )
goto error_exit;
entryp->aio_map = get_task_map( procp->task );
vm_map_reference( entryp->aio_map );
*entrypp = entryp;
return( 0 );
error_exit:
if ( entryp != NULL )
zfree( aio_workq_zonep, entryp );
return( result );
}
static void
aio_mark_requests( aio_workq_entry *entryp )
{
aio_workq_entry *my_entryp;
TAILQ_FOREACH( my_entryp, &entryp->procp->aio_activeq, aio_workq_link ) {
if ( entryp->aiocb.aio_fildes == my_entryp->aiocb.aio_fildes ) {
my_entryp->fsyncp = entryp->uaiocbp;
}
}
TAILQ_FOREACH( my_entryp, &aio_anchor.aio_async_workq, aio_workq_link ) {
if ( entryp->procp == my_entryp->procp &&
entryp->aiocb.aio_fildes == my_entryp->aiocb.aio_fildes ) {
my_entryp->fsyncp = entryp->uaiocbp;
}
}
}
static int
lio_create_sync_entry( struct proc *procp, user_addr_t aiocbp,
long group_tag, aio_workq_entry **entrypp )
{
aio_workq_entry *entryp;
int result;
entryp = (aio_workq_entry *) zalloc( aio_workq_zonep );
if ( entryp == NULL ) {
result = EAGAIN;
goto error_exit;
}
bzero( entryp, sizeof(*entryp) );
entryp->procp = procp;
entryp->uaiocbp = aiocbp;
entryp->flags |= AIO_LIO;
entryp->group_tag = group_tag;
entryp->aio_map = VM_MAP_NULL;
if ( !IS_64BIT_PROCESS(procp) ) {
struct aiocb aiocb32;
result = copyin( aiocbp, &aiocb32, sizeof(aiocb32) );
if ( result == 0 )
do_munge_aiocb( &aiocb32, &entryp->aiocb );
} else
result = copyin( aiocbp, &entryp->aiocb, sizeof(entryp->aiocb) );
if ( result != 0 ) {
result = EAGAIN;
goto error_exit;
}
if ( entryp->aiocb.aio_lio_opcode == LIO_NOP ) {
result = 0;
goto error_exit;
}
result = aio_validate( entryp );
if ( result != 0 ) {
goto error_exit;
}
*entrypp = entryp;
return( 0 );
error_exit:
if ( entryp != NULL )
zfree( aio_workq_zonep, entryp );
return( result );
}
static int
aio_free_request( aio_workq_entry *entryp, vm_map_t the_map )
{
if ( VM_MAP_NULL != the_map ) {
vm_map_deallocate( the_map );
}
zfree( aio_workq_zonep, entryp );
return( 0 );
}
static int
aio_validate( aio_workq_entry *entryp )
{
struct fileproc *fp;
int flag;
int result;
result = 0;
if ( (entryp->flags & AIO_LIO) != 0 ) {
if ( entryp->aiocb.aio_lio_opcode == LIO_READ )
entryp->flags |= AIO_READ;
else if ( entryp->aiocb.aio_lio_opcode == LIO_WRITE )
entryp->flags |= AIO_WRITE;
else if ( entryp->aiocb.aio_lio_opcode == LIO_NOP )
return( 0 );
else
return( EINVAL );
}
flag = FREAD;
if ( (entryp->flags & (AIO_WRITE | AIO_FSYNC)) != 0 ) {
flag = FWRITE;
}
if ( (entryp->flags & (AIO_READ | AIO_WRITE)) != 0 ) {
if ( entryp->aiocb.aio_nbytes > INT_MAX ||
entryp->aiocb.aio_buf == USER_ADDR_NULL ||
entryp->aiocb.aio_offset < 0 )
return( EINVAL );
}
if ( entryp->aiocb.aio_sigevent.sigev_notify == SIGEV_SIGNAL ) {
int signum;
signum = entryp->aiocb.aio_sigevent.sigev_signo;
if ( signum <= 0 || signum >= NSIG ||
signum == SIGKILL || signum == SIGSTOP )
return (EINVAL);
}
else if ( entryp->aiocb.aio_sigevent.sigev_notify != SIGEV_NONE )
return (EINVAL);
proc_fdlock(entryp->procp);
result = fp_lookup( entryp->procp, entryp->aiocb.aio_fildes, &fp , 1);
if ( result == 0 ) {
if ( (fp->f_fglob->fg_flag & flag) == 0 ) {
result = EBADF;
}
else if ( fp->f_fglob->fg_type != DTYPE_VNODE ) {
result = ESPIPE;
} else
fp->f_flags |= FP_AIOISSUED;
fp_drop(entryp->procp, entryp->aiocb.aio_fildes, fp , 1);
}
else {
result = EBADF;
}
proc_fdunlock(entryp->procp);
return( result );
}
static int
aio_get_process_count( struct proc *procp )
{
aio_workq_entry *entryp;
int count;
count = procp->aio_done_count;
count += procp->aio_active_count;
TAILQ_FOREACH( entryp, &aio_anchor.aio_async_workq, aio_workq_link ) {
if ( procp == entryp->procp ) {
count++;
}
}
TAILQ_FOREACH( entryp, &aio_anchor.lio_sync_workq, aio_workq_link ) {
if ( procp == entryp->procp ) {
count++;
}
}
return( count );
}
static int
aio_get_all_queues_count( void )
{
int count;
count = aio_anchor.aio_async_workq_count;
count += aio_anchor.lio_sync_workq_count;
count += aio_anchor.aio_active_count;
count += aio_anchor.aio_done_count;
return( count );
}
static void
do_aio_completion( aio_workq_entry *entryp )
{
if ( entryp->aiocb.aio_sigevent.sigev_notify == SIGEV_SIGNAL &&
(entryp->flags & AIO_DISABLE) == 0 ) {
if ( entryp->group_tag == 0 ||
(entryp->group_tag != 0 && aio_last_group_io( entryp )) ) {
KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_completion_sig)) | DBG_FUNC_NONE,
(int)entryp->procp, (int)entryp->uaiocbp,
entryp->aiocb.aio_sigevent.sigev_signo, 0, 0 );
psignal( entryp->procp, entryp->aiocb.aio_sigevent.sigev_signo );
return;
}
}
if ( (entryp->flags & AIO_WAITING) != 0 ) {
int active_requests;
KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_completion_cleanup_wait)) | DBG_FUNC_NONE,
(int)entryp->procp, (int)entryp->uaiocbp, 0, 0, 0 );
AIO_LOCK;
active_requests = aio_active_requests_for_process( entryp->procp );
if ( active_requests < 1 ) {
wakeup_one( (caddr_t) &entryp->procp->AIO_CLEANUP_SLEEP_CHAN );
KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_completion_cleanup_wake)) | DBG_FUNC_NONE,
(int)entryp->procp, (int)entryp->uaiocbp, 0, 0, 0 );
}
AIO_UNLOCK;
return;
}
AIO_LOCK;
wakeup_one( (caddr_t) &entryp->procp->AIO_SUSPEND_SLEEP_CHAN );
AIO_UNLOCK;
KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_completion_suspend_wake)) | DBG_FUNC_NONE,
(int)entryp->procp, (int)entryp->uaiocbp, 0, 0, 0 );
return;
}
static boolean_t
aio_last_group_io( aio_workq_entry *entryp )
{
aio_workq_entry *my_entryp;
TAILQ_FOREACH( my_entryp, &entryp->procp->aio_activeq, aio_workq_link ) {
if ( my_entryp->group_tag == entryp->group_tag )
return( FALSE );
}
TAILQ_FOREACH( my_entryp, &aio_anchor.aio_async_workq, aio_workq_link ) {
if ( my_entryp->group_tag == entryp->group_tag )
return( FALSE );
}
TAILQ_FOREACH( my_entryp, &aio_anchor.lio_sync_workq, aio_workq_link ) {
if ( my_entryp->group_tag == entryp->group_tag )
return( FALSE );
}
return( TRUE );
}
static int
do_aio_read( aio_workq_entry *entryp )
{
struct fileproc *fp;
int error;
if ( (error = fp_lookup(entryp->procp, entryp->aiocb.aio_fildes, &fp , 0)) )
return(error);
if ( (fp->f_fglob->fg_flag & FREAD) == 0 ) {
fp_drop(entryp->procp, entryp->aiocb.aio_fildes, fp, 0);
return(EBADF);
}
if ( fp != NULL ) {
error = dofileread( entryp->procp, fp, entryp->aiocb.aio_fildes,
entryp->aiocb.aio_buf,
entryp->aiocb.aio_nbytes,
entryp->aiocb.aio_offset, FOF_OFFSET,
&entryp->returnval );
fp_drop(entryp->procp, entryp->aiocb.aio_fildes, fp, 0);
}
else {
fp_drop(entryp->procp, entryp->aiocb.aio_fildes, fp, 0);
error = EBADF;
}
return( error );
}
static int
do_aio_write( aio_workq_entry *entryp )
{
struct fileproc *fp;
int error;
if ( (error = fp_lookup(entryp->procp, entryp->aiocb.aio_fildes, &fp , 0)) )
return(error);
if ( (fp->f_fglob->fg_flag & FWRITE) == 0 ) {
fp_drop(entryp->procp, entryp->aiocb.aio_fildes, fp, 0);
return(EBADF);
}
if ( fp != NULL ) {
error = dofilewrite( entryp->procp,
fp,
entryp->aiocb.aio_fildes,
entryp->aiocb.aio_buf,
entryp->aiocb.aio_nbytes,
entryp->aiocb.aio_offset,
FOF_OFFSET | FOF_PCRED,
&entryp->returnval);
fp_drop(entryp->procp, entryp->aiocb.aio_fildes, fp, 0);
}
else {
fp_drop(entryp->procp, entryp->aiocb.aio_fildes, fp, 0);
error = EBADF;
}
return( error );
}
static int
aio_active_requests_for_process( struct proc *procp )
{
return( procp->aio_active_count );
}
static int
do_aio_fsync( aio_workq_entry *entryp )
{
struct vfs_context context;
struct vnode *vp;
struct fileproc *fp;
int error;
error = fp_getfvp( entryp->procp, entryp->aiocb.aio_fildes, &fp, &vp);
if ( error == 0 ) {
if ( (error = vnode_getwithref(vp)) ) {
fp_drop(entryp->procp, entryp->aiocb.aio_fildes, fp, 0);
entryp->returnval = -1;
return(error);
}
context.vc_proc = entryp->procp;
context.vc_ucred = fp->f_fglob->fg_cred;
error = VNOP_FSYNC( vp, MNT_WAIT, &context);
(void)vnode_put(vp);
fp_drop(entryp->procp, entryp->aiocb.aio_fildes, fp, 0);
}
if ( error != 0 )
entryp->returnval = -1;
return( error );
}
static boolean_t
is_already_queued( struct proc *procp,
user_addr_t aiocbp )
{
aio_workq_entry *entryp;
boolean_t result;
result = FALSE;
TAILQ_FOREACH( entryp, &procp->aio_doneq, aio_workq_link ) {
if ( aiocbp == entryp->uaiocbp ) {
result = TRUE;
goto ExitThisRoutine;
}
}
TAILQ_FOREACH( entryp, &procp->aio_activeq, aio_workq_link ) {
if ( aiocbp == entryp->uaiocbp ) {
result = TRUE;
goto ExitThisRoutine;
}
}
TAILQ_FOREACH( entryp, &aio_anchor.aio_async_workq, aio_workq_link ) {
if ( procp == entryp->procp && aiocbp == entryp->uaiocbp ) {
result = TRUE;
goto ExitThisRoutine;
}
}
TAILQ_FOREACH( entryp, &aio_anchor.lio_sync_workq, aio_workq_link ) {
if ( procp == entryp->procp && aiocbp == entryp->uaiocbp ) {
result = TRUE;
goto ExitThisRoutine;
}
}
ExitThisRoutine:
return( result );
}
__private_extern__ void
aio_init( void )
{
int i;
aio_lock_grp_attr = lck_grp_attr_alloc_init();
lck_grp_attr_setstat(aio_lock_grp_attr);
aio_lock_grp = lck_grp_alloc_init("aio", aio_lock_grp_attr);
aio_lock_attr = lck_attr_alloc_init();
aio_lock = lck_mtx_alloc_init(aio_lock_grp, aio_lock_attr);
AIO_LOCK;
TAILQ_INIT( &aio_anchor.aio_async_workq );
TAILQ_INIT( &aio_anchor.lio_sync_workq );
aio_anchor.aio_async_workq_count = 0;
aio_anchor.lio_sync_workq_count = 0;
aio_anchor.aio_active_count = 0;
aio_anchor.aio_done_count = 0;
AIO_UNLOCK;
i = sizeof( aio_workq_entry );
aio_workq_zonep = zinit( i, i * aio_max_requests, i * aio_max_requests, "aiowq" );
_aio_create_worker_threads( aio_worker_threads );
return;
}
__private_extern__ void
_aio_create_worker_threads( int num )
{
int i;
for ( i = 0; i < num; i++ ) {
thread_t myThread;
myThread = kernel_thread( kernel_task, aio_work_thread );
if ( THREAD_NULL == myThread ) {
printf( "%s - failed to create a work thread \n", __FUNCTION__ );
}
}
return;
}
task_t
get_aiotask(void)
{
return ((struct uthread *)get_bsdthread_info(current_thread()))->uu_aio_task;
}
static void
do_munge_aiocb( struct aiocb *my_aiocbp, struct user_aiocb *the_user_aiocbp )
{
the_user_aiocbp->aio_fildes = my_aiocbp->aio_fildes;
the_user_aiocbp->aio_offset = my_aiocbp->aio_offset;
the_user_aiocbp->aio_buf = CAST_USER_ADDR_T(my_aiocbp->aio_buf);
the_user_aiocbp->aio_nbytes = my_aiocbp->aio_nbytes;
the_user_aiocbp->aio_reqprio = my_aiocbp->aio_reqprio;
the_user_aiocbp->aio_lio_opcode = my_aiocbp->aio_lio_opcode;
the_user_aiocbp->aio_sigevent.sigev_notify = my_aiocbp->aio_sigevent.sigev_notify;
the_user_aiocbp->aio_sigevent.sigev_signo = my_aiocbp->aio_sigevent.sigev_signo;
the_user_aiocbp->aio_sigevent.sigev_value.size_equivalent.sival_int =
my_aiocbp->aio_sigevent.sigev_value.sival_int;
the_user_aiocbp->aio_sigevent.sigev_notify_function =
CAST_USER_ADDR_T(my_aiocbp->aio_sigevent.sigev_notify_function);
the_user_aiocbp->aio_sigevent.sigev_notify_attributes =
CAST_USER_ADDR_T(my_aiocbp->aio_sigevent.sigev_notify_attributes);
}