#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/file_internal.h>
#include <sys/filedesc.h>
#include <sys/stat.h>
#include <sys/vnode_internal.h>
#include <sys/mount_internal.h>
#include <sys/proc_internal.h>
#include <sys/kauth.h>
#include <sys/sysctl.h>
#include <sys/ubc.h>
#include <sys/uio.h>
#include <sys/malloc.h>
#include <sys/kpi_mbuf.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/domain.h>
#include <sys/protosw.h>
#include <sys/fcntl.h>
#include <sys/lockf.h>
#include <sys/syslog.h>
#include <sys/user.h>
#include <sys/sysproto.h>
#include <sys/kpi_socket.h>
#include <sys/fsevents.h>
#include <libkern/OSAtomic.h>
#include <kern/thread_call.h>
#include <kern/task.h>
#include <bsm/audit_kernel.h>
#include <netinet/in.h>
#include <netinet/tcp.h>
#include <nfs/xdr_subs.h>
#include <nfs/rpcv2.h>
#include <nfs/nfsproto.h>
#include <nfs/nfs.h>
#include <nfs/nfsm_subs.h>
#include <nfs/nfsrvcache.h>
#include <nfs/nfs_gss.h>
#include <nfs/nfsmount.h>
#include <nfs/nfsnode.h>
#include <nfs/nfs_lock.h>
#if CONFIG_MACF
#include <security/mac_framework.h>
#endif
kern_return_t thread_terminate(thread_t);
#if NFSSERVER
extern int (*nfsrv_procs[NFS_NPROCS])(struct nfsrv_descript *nd,
struct nfsrv_sock *slp,
vfs_context_t ctx,
mbuf_t *mrepp);
extern int nfsrv_wg_delay;
extern int nfsrv_wg_delay_v3;
static int nfsrv_require_resv_port = 0;
static int nfsrv_deadsock_timer_on = 0;
static int nfssvc_addsock(socket_t, mbuf_t);
static int nfssvc_nfsd(void);
static int nfssvc_export(user_addr_t);
static void nfsrv_zapsock(struct nfsrv_sock *slp);
static void nfsrv_slpderef(struct nfsrv_sock *);
static void nfsrv_slpfree(struct nfsrv_sock *);
#endif
SYSCTL_DECL(_vfs_generic);
SYSCTL_NODE(_vfs_generic, OID_AUTO, nfs, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "nfs hinge");
#if NFSCLIENT
SYSCTL_NODE(_vfs_generic_nfs, OID_AUTO, client, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "nfs client hinge");
SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, initialdowndelay, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, "");
SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nextdowndelay, CTLFLAG_RW, &nfs_tprintf_delay, 0, "");
SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, iosize, CTLFLAG_RW, &nfs_iosize, 0, "");
SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_cache_timeout, CTLFLAG_RW, &nfs_access_cache_timeout, 0, "");
SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, allow_async, CTLFLAG_RW, &nfs_allow_async, 0, "");
SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, statfs_rate_limit, CTLFLAG_RW, &nfs_statfs_rate_limit, 0, "");
SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nfsiod_thread_max, CTLFLAG_RW, &nfsiod_thread_max, 0, "");
SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nfsiod_thread_count, CTLFLAG_RD, &nfsiod_thread_count, 0, "");
SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, lockd_mounts, CTLFLAG_RD, &nfs_lockd_mounts, 0, "");
SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, max_async_writes, CTLFLAG_RW, &nfs_max_async_writes, 0, "");
#endif
#if NFSSERVER
SYSCTL_NODE(_vfs_generic_nfs, OID_AUTO, server, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "nfs server hinge");
SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, wg_delay, CTLFLAG_RW, &nfsrv_wg_delay, 0, "");
SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, wg_delay_v3, CTLFLAG_RW, &nfsrv_wg_delay_v3, 0, "");
SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, require_resv_port, CTLFLAG_RW, &nfsrv_require_resv_port, 0, "");
SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, async, CTLFLAG_RW, &nfsrv_async, 0, "");
SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, reqcache_size, CTLFLAG_RW, &nfsrv_reqcache_size, 0, "");
SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, request_queue_length, CTLFLAG_RW, &nfsrv_sock_max_rec_queue_length, 0, "");
SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, user_stats, CTLFLAG_RW, &nfsrv_user_stat_enabled, 0, "");
SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, fsevents, CTLFLAG_RW, &nfsrv_fsevents_enabled, 0, "");
SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_thread_max, CTLFLAG_RW, &nfsd_thread_max, 0, "");
SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_thread_count, CTLFLAG_RD, &nfsd_thread_count, 0, "");
#endif
#if NFSCLIENT
int
nfsclnt(proc_t p, struct nfsclnt_args *uap, __unused int *retval)
{
struct lockd_ans la;
int error;
if (uap->flag == NFSCLNT_LOCKDANS) {
error = copyin(uap->argp, &la, sizeof(la));
return (error != 0 ? error : nfslockdans(p, &la));
}
return EINVAL;
}
static int nfsiod_continue(int);
static void
nfsiod_terminate(struct nfsiod *niod)
{
nfsiod_thread_count--;
lck_mtx_unlock(nfsiod_mutex);
if (niod)
FREE(niod, M_TEMP);
else
printf("nfsiod: terminating without niod\n");
thread_terminate(current_thread());
}
static void
nfsiod_thread(void)
{
struct nfsiod *niod;
int error;
MALLOC(niod, struct nfsiod *, sizeof(struct nfsiod), M_TEMP, M_WAITOK);
if (!niod) {
lck_mtx_lock(nfsiod_mutex);
nfsiod_thread_count--;
lck_mtx_unlock(nfsiod_mutex);
thread_terminate(current_thread());
}
bzero(niod, sizeof(*niod));
lck_mtx_lock(nfsiod_mutex);
TAILQ_INSERT_HEAD(&nfsiodfree, niod, niod_link);
wakeup(current_thread());
error = msleep0(niod, nfsiod_mutex, PWAIT | PDROP, "nfsiod", NFS_ASYNCTHREADMAXIDLE*hz, nfsiod_continue);
lck_mtx_lock(nfsiod_mutex);
if ((niod = TAILQ_LAST(&nfsiodfree, nfsiodlist)))
TAILQ_REMOVE(&nfsiodfree, niod, niod_link);
nfsiod_terminate(niod);
}
int
nfsiod_start(void)
{
thread_t thd;
lck_mtx_lock(nfsiod_mutex);
if ((nfsiod_thread_count >= NFSIOD_MAX) && (nfsiod_thread_count > 0)) {
lck_mtx_unlock(nfsiod_mutex);
return (EBUSY);
}
nfsiod_thread_count++;
thd = kernel_thread(kernel_task, nfsiod_thread);
msleep(thd, nfsiod_mutex, PWAIT | PDROP, "nfsiodw", NULL);
return (0);
}
static int
nfsiod_continue(int error)
{
struct nfsiod *niod;
struct nfsmount *nmp;
struct nfsreq *req, *treq;
struct nfs_reqqhead iodq;
int morework;
lck_mtx_lock(nfsiod_mutex);
niod = TAILQ_FIRST(&nfsiodwork);
if (!niod) {
if (error != EWOULDBLOCK)
printf("nfsiod: error %d work %p\n", error, niod);
if ((niod = TAILQ_LAST(&nfsiodfree, nfsiodlist)))
TAILQ_REMOVE(&nfsiodfree, niod, niod_link);
nfsiod_terminate(niod);
}
TAILQ_REMOVE(&nfsiodwork, niod, niod_link);
worktodo:
while ((nmp = niod->niod_nmp)) {
TAILQ_INIT(&iodq);
TAILQ_CONCAT(&iodq, &nmp->nm_iodq, r_achain);
lck_mtx_unlock(nfsiod_mutex);
TAILQ_FOREACH_SAFE(req, &iodq, r_achain, treq) {
TAILQ_REMOVE(&iodq, req, r_achain);
req->r_achain.tqe_next = NFSREQNOLIST;
req->r_callback.rcb_func(req);
}
lck_mtx_lock(nfsiod_mutex);
morework = !TAILQ_EMPTY(&nmp->nm_iodq);
if (!morework || !TAILQ_EMPTY(&nfsiodmounts)) {
if (morework)
TAILQ_INSERT_TAIL(&nfsiodmounts, nmp, nm_iodlink);
nmp->nm_niod = NULL;
niod->niod_nmp = NULL;
}
}
if (!niod->niod_nmp && !TAILQ_EMPTY(&nfsiodmounts)) {
niod->niod_nmp = TAILQ_FIRST(&nfsiodmounts);
TAILQ_REMOVE(&nfsiodmounts, niod->niod_nmp, nm_iodlink);
}
if (niod->niod_nmp)
goto worktodo;
if (nfsiod_thread_count <= NFSIOD_MAX) {
TAILQ_INSERT_HEAD(&nfsiodfree, niod, niod_link);
error = msleep0(niod, nfsiod_mutex, PWAIT | PDROP, "nfsiod", NFS_ASYNCTHREADMAXIDLE*hz, nfsiod_continue);
lck_mtx_lock(nfsiod_mutex);
if ((niod = TAILQ_LAST(&nfsiodfree, nfsiodlist)))
TAILQ_REMOVE(&nfsiodfree, niod, niod_link);
}
nfsiod_terminate(niod);
return (0);
}
#endif
#if NFSSERVER
int
getfh(proc_t p, struct getfh_args *uap, __unused int *retval)
{
vnode_t vp;
struct nfs_filehandle nfh;
int error;
struct nameidata nd;
char path[MAXPATHLEN], *ptr;
u_int pathlen;
struct nfs_exportfs *nxfs;
struct nfs_export *nx;
error = proc_suser(p);
if (error)
return (error);
error = copyinstr(uap->fname, path, MAXPATHLEN, (size_t *)&pathlen);
if (error)
return (error);
if (!nfsrv_is_initialized())
return (EINVAL);
NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1,
UIO_SYSSPACE, CAST_USER_ADDR_T(path), vfs_context_current());
error = namei(&nd);
if (error)
return (error);
nameidone(&nd);
vp = nd.ni_vp;
lck_rw_lock_shared(&nfsrv_export_rwlock);
ptr = vnode_mount(vp)->mnt_vfsstat.f_mntonname;
LIST_FOREACH(nxfs, &nfsrv_exports, nxfs_next) {
if (!strncmp(nxfs->nxfs_path, ptr, MAXPATHLEN))
break;
}
if (!nxfs || strncmp(nxfs->nxfs_path, path, strlen(nxfs->nxfs_path))) {
error = EINVAL;
goto out;
}
ptr = path + strlen(nxfs->nxfs_path);
while (*ptr && (*ptr == '/'))
ptr++;
LIST_FOREACH(nx, &nxfs->nxfs_exports, nx_next) {
int len = strlen(nx->nx_path);
if (len == 0) break;
if (!strncmp(nx->nx_path, ptr, len))
break;
}
if (!nx) {
error = EINVAL;
goto out;
}
bzero(&nfh, sizeof(nfh));
nfh.nfh_xh.nxh_version = htonl(NFS_FH_VERSION);
nfh.nfh_xh.nxh_fsid = htonl(nxfs->nxfs_id);
nfh.nfh_xh.nxh_expid = htonl(nx->nx_id);
nfh.nfh_xh.nxh_flags = 0;
nfh.nfh_xh.nxh_reserved = 0;
nfh.nfh_len = NFSV3_MAX_FID_SIZE;
error = VFS_VPTOFH(vp, (int*)&nfh.nfh_len, &nfh.nfh_fid[0], NULL);
if (nfh.nfh_len > (int)NFSV3_MAX_FID_SIZE)
error = EOVERFLOW;
nfh.nfh_xh.nxh_fidlen = nfh.nfh_len;
nfh.nfh_len += sizeof(nfh.nfh_xh);
nfh.nfh_fhp = (u_char*)&nfh.nfh_xh;
out:
lck_rw_done(&nfsrv_export_rwlock);
vnode_put(vp);
if (error)
return (error);
error = copyout((caddr_t)&nfh, uap->fhp, sizeof(nfh));
return (error);
}
extern struct fileops vnops;
int
fhopen( proc_t p,
struct fhopen_args *uap,
register_t *retval)
{
vnode_t vp;
struct nfs_filehandle nfh;
struct nfs_export *nx;
struct nfs_export_options *nxo;
struct flock lf;
struct fileproc *fp, *nfp;
int fmode, error, type;
int indx;
vfs_context_t ctx = vfs_context_current();
kauth_action_t action;
error = suser(vfs_context_ucred(ctx), 0);
if (error) {
return (error);
}
if (!nfsrv_is_initialized()) {
return (EINVAL);
}
fmode = FFLAGS(uap->flags);
if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
return (EINVAL);
error = copyin(uap->u_fhp, &nfh.nfh_len, sizeof(nfh.nfh_len));
if (error)
return (error);
if ((nfh.nfh_len < (int)sizeof(struct nfs_exphandle)) ||
(nfh.nfh_len > (int)NFSV3_MAX_FH_SIZE))
return (EINVAL);
error = copyin(uap->u_fhp, &nfh, sizeof(nfh.nfh_len) + nfh.nfh_len);
if (error)
return (error);
nfh.nfh_fhp = (u_char*)&nfh.nfh_xh;
lck_rw_lock_shared(&nfsrv_export_rwlock);
error = nfsrv_fhtovp(&nfh, NULL, &vp, &nx, &nxo);
lck_rw_done(&nfsrv_export_rwlock);
if (error) {
if (error == NFSERR_TRYLATER)
error = EAGAIN; return (error);
}
if (vnode_vtype(vp) == VSOCK) {
error = EOPNOTSUPP;
goto bad;
}
if (vnode_isdir(vp) && (fmode & (FWRITE | O_TRUNC))) {
error = EISDIR;
goto bad;
}
action = 0;
if (fmode & FREAD)
action |= KAUTH_VNODE_READ_DATA;
if (fmode & (FWRITE | O_TRUNC))
action |= KAUTH_VNODE_WRITE_DATA;
if ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)
goto bad;
if ((error = VNOP_OPEN(vp, fmode, ctx)))
goto bad;
if ((error = vnode_ref_ext(vp, fmode)))
goto bad;
if ((error = falloc(p, &nfp, &indx, ctx)) != 0) {
vn_close(vp, fmode & FMASK, ctx);
goto bad;
}
fp = nfp;
fp->f_fglob->fg_flag = fmode & FMASK;
fp->f_fglob->fg_type = DTYPE_VNODE;
fp->f_fglob->fg_ops = &vnops;
fp->f_fglob->fg_data = (caddr_t)vp;
if (fmode & (O_EXLOCK | O_SHLOCK)) {
lf.l_whence = SEEK_SET;
lf.l_start = 0;
lf.l_len = 0;
if (fmode & O_EXLOCK)
lf.l_type = F_WRLCK;
else
lf.l_type = F_RDLCK;
type = F_FLOCK;
if ((fmode & FNONBLOCK) == 0)
type |= F_WAIT;
if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, type, ctx))) {
struct vfs_context context = *vfs_context_current();
context.vc_ucred = fp->f_fglob->fg_cred;
vn_close(vp, fp->f_fglob->fg_flag, &context);
fp_free(p, indx, fp);
return (error);
}
fp->f_fglob->fg_flag |= FHASLOCK;
}
vnode_put(vp);
proc_fdlock(p);
procfdtbl_releasefd(p, indx, NULL);
fp_drop(p, indx, fp, 1);
proc_fdunlock(p);
*retval = indx;
return (0);
bad:
vnode_put(vp);
return (error);
}
int
nfssvc(proc_t p, struct nfssvc_args *uap, __unused int *retval)
{
mbuf_t nam;
struct user_nfsd_args user_nfsdarg;
socket_t so;
int error;
AUDIT_ARG(cmd, uap->flag);
error = proc_suser(p);
if (error)
return (error);
#if CONFIG_MACF
error = mac_system_check_nfsd(kauth_cred_get());
if (error)
return (error);
#endif
nfsrv_init();
if (uap->flag & NFSSVC_ADDSOCK) {
if (IS_64BIT_PROCESS(p)) {
error = copyin(uap->argp, (caddr_t)&user_nfsdarg, sizeof(user_nfsdarg));
} else {
struct nfsd_args tmp_args;
error = copyin(uap->argp, (caddr_t)&tmp_args, sizeof(tmp_args));
if (error == 0) {
user_nfsdarg.sock = tmp_args.sock;
user_nfsdarg.name = CAST_USER_ADDR_T(tmp_args.name);
user_nfsdarg.namelen = tmp_args.namelen;
}
}
if (error)
return (error);
error = file_socket(user_nfsdarg.sock, &so);
if (error)
return (error);
if (user_nfsdarg.name == USER_ADDR_NULL || user_nfsdarg.namelen == 0) {
nam = NULL;
} else {
error = sockargs(&nam, user_nfsdarg.name, user_nfsdarg.namelen, MBUF_TYPE_SONAME);
if (error) {
file_drop(user_nfsdarg.sock);
return (error);
}
}
error = nfssvc_addsock(so, nam);
file_drop(user_nfsdarg.sock);
} else if (uap->flag & NFSSVC_NFSD) {
error = nfssvc_nfsd();
} else if (uap->flag & NFSSVC_EXPORT) {
error = nfssvc_export(uap->argp);
} else {
error = EINVAL;
}
if (error == EINTR || error == ERESTART)
error = 0;
return (error);
}
static int
nfssvc_addsock(socket_t so, mbuf_t mynam)
{
struct nfsrv_sock *slp;
int error = 0, sodomain, sotype, soprotocol, on = 1;
struct timeval timeo;
if (!nfs_mbuf_mhlen)
nfs_mbuf_init();
sock_gettype(so, &sodomain, &sotype, &soprotocol);
if ((soprotocol == IPPROTO_UDP) && nfsrv_udpsock) {
mbuf_freem(mynam);
return (EEXIST);
}
if (sotype == SOCK_STREAM)
sock_setsockopt(so, SOL_SOCKET, SO_KEEPALIVE, &on, sizeof(on));
if ((sodomain == AF_INET) && (soprotocol == IPPROTO_TCP))
sock_setsockopt(so, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on));
if (sotype == SOCK_DGRAM) {
int reserve = NFS_UDPSOCKBUF;
error |= sock_setsockopt(so, SOL_SOCKET, SO_SNDBUF, &reserve, sizeof(reserve));
error |= sock_setsockopt(so, SOL_SOCKET, SO_RCVBUF, &reserve, sizeof(reserve));
if (error) {
log(LOG_INFO, "nfssvc_addsock: UDP socket buffer setting error(s) %d\n", error);
error = 0;
}
}
sock_nointerrupt(so, 0);
timeo.tv_usec = 0;
timeo.tv_sec = 1;
error |= sock_setsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo));
timeo.tv_sec = 30;
error |= sock_setsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo));
if (error) {
log(LOG_INFO, "nfssvc_addsock: socket timeout setting error(s) %d\n", error);
error = 0;
}
MALLOC(slp, struct nfsrv_sock *, sizeof(struct nfsrv_sock), M_NFSSVC, M_WAITOK);
if (!slp) {
mbuf_freem(mynam);
return (ENOMEM);
}
bzero((caddr_t)slp, sizeof (struct nfsrv_sock));
lck_rw_init(&slp->ns_rwlock, nfsrv_slp_rwlock_group, LCK_ATTR_NULL);
lck_mtx_init(&slp->ns_wgmutex, nfsrv_slp_mutex_group, LCK_ATTR_NULL);
lck_mtx_lock(nfsd_mutex);
if (soprotocol == IPPROTO_UDP) {
if (nfsrv_udpsock) {
lck_mtx_unlock(nfsd_mutex);
nfsrv_slpfree(slp);
mbuf_freem(mynam);
return (EEXIST);
}
nfsrv_udpsock = slp;
}
TAILQ_INSERT_TAIL(&nfsrv_socklist, slp, ns_chain);
sock_retain(so);
slp->ns_so = so;
slp->ns_sotype = sotype;
slp->ns_nam = mynam;
socket_lock(so, 1);
so->so_upcallarg = (caddr_t)slp;
so->so_upcall = nfsrv_rcv;
so->so_rcv.sb_flags |= SB_UPCALL;
socket_unlock(so, 1);
sock_setsockopt(so, SOL_SOCKET, SO_UPCALLCLOSEWAIT, &on, sizeof(on));
slp->ns_wgq.tqe_next = SLPNOLIST;
slp->ns_flag = SLP_VALID | SLP_NEEDQ;
nfsrv_wakenfsd(slp);
lck_mtx_unlock(nfsd_mutex);
return (0);
}
static int
nfssvc_nfsd(void)
{
mbuf_t m, mrep;
struct nfsrv_sock *slp;
struct nfsd *nfsd;
struct nfsrv_descript *nd = NULL;
int error = 0, cacherep, writes_todo;
int siz, procrastinate, opcnt = 0;
u_quad_t cur_usec;
struct timeval now;
struct vfs_context context;
#ifndef nolint
cacherep = RC_DOIT;
writes_todo = 0;
#endif
MALLOC(nfsd, struct nfsd *, sizeof(struct nfsd), M_NFSD, M_WAITOK);
if (!nfsd)
return (ENOMEM);
bzero(nfsd, sizeof(struct nfsd));
lck_mtx_lock(nfsd_mutex);
if (nfsd_thread_count++ == 0)
nfsrv_initcache();
TAILQ_INSERT_TAIL(&nfsd_head, nfsd, nfsd_chain);
lck_mtx_unlock(nfsd_mutex);
context.vc_thread = current_thread();
for (;;) {
if (nfsd_thread_max <= 0) {
error = EINTR;
slp = nfsd->nfsd_slp;
} else if (nfsd->nfsd_flag & NFSD_REQINPROG) {
error = 0;
slp = nfsd->nfsd_slp;
} else {
error = 0;
lck_mtx_lock(nfsd_mutex);
while (!nfsd->nfsd_slp && TAILQ_EMPTY(&nfsrv_sockwait) && TAILQ_EMPTY(&nfsrv_sockwork)) {
if (nfsd_thread_count > nfsd_thread_max) {
error = 0;
goto done;
}
nfsd->nfsd_flag |= NFSD_WAITING;
TAILQ_INSERT_HEAD(&nfsd_queue, nfsd, nfsd_queue);
error = msleep(nfsd, nfsd_mutex, PSOCK | PCATCH, "nfsd", NULL);
if (error) {
if (nfsd->nfsd_flag & NFSD_WAITING) {
TAILQ_REMOVE(&nfsd_queue, nfsd, nfsd_queue);
nfsd->nfsd_flag &= ~NFSD_WAITING;
}
goto done;
}
}
slp = nfsd->nfsd_slp;
if (!slp && !TAILQ_EMPTY(&nfsrv_sockwait)) {
while ((slp = TAILQ_FIRST(&nfsrv_sockwait))) {
lck_rw_lock_exclusive(&slp->ns_rwlock);
TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq);
slp->ns_flag &= ~SLP_WAITQ;
if ((slp->ns_flag & SLP_VALID) && (slp->ns_flag & SLP_WORKTODO))
break;
lck_rw_done(&slp->ns_rwlock);
}
}
if (!slp && !TAILQ_EMPTY(&nfsrv_sockwork)) {
while ((slp = TAILQ_FIRST(&nfsrv_sockwork))) {
lck_rw_lock_exclusive(&slp->ns_rwlock);
TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq);
slp->ns_flag &= ~SLP_WORKQ;
if ((slp->ns_flag & SLP_VALID) && (slp->ns_flag & SLP_WORKTODO))
break;
lck_rw_done(&slp->ns_rwlock);
}
}
if (!nfsd->nfsd_slp && slp) {
slp->ns_sref++;
nfsd->nfsd_slp = slp;
opcnt = 0;
TAILQ_INSERT_TAIL(&nfsrv_sockwork, slp, ns_svcq);
slp->ns_flag |= SLP_WORKQ;
lck_rw_done(&slp->ns_rwlock);
}
lck_mtx_unlock(nfsd_mutex);
if (!slp)
continue;
lck_rw_lock_exclusive(&slp->ns_rwlock);
if (slp->ns_flag & SLP_VALID) {
if ((slp->ns_flag & (SLP_NEEDQ|SLP_DISCONN)) == SLP_NEEDQ) {
slp->ns_flag &= ~SLP_NEEDQ;
nfsrv_rcv_locked(slp->ns_so, slp, MBUF_WAITOK);
}
if (slp->ns_flag & SLP_DISCONN)
nfsrv_zapsock(slp);
error = nfsrv_dorec(slp, nfsd, &nd);
if (error == EINVAL) { if (slp->ns_sotype == SOCK_STREAM)
nfsrv_zapsock(slp); }
writes_todo = 0;
if (error && (slp->ns_wgtime || (slp->ns_flag & SLP_DOWRITES))) {
microuptime(&now);
cur_usec = (u_quad_t)now.tv_sec * 1000000 +
(u_quad_t)now.tv_usec;
if (slp->ns_wgtime <= cur_usec) {
error = 0;
cacherep = RC_DOIT;
writes_todo = 1;
}
slp->ns_flag &= ~SLP_DOWRITES;
}
nfsd->nfsd_flag |= NFSD_REQINPROG;
}
lck_rw_done(&slp->ns_rwlock);
}
if (error || (slp && !(slp->ns_flag & SLP_VALID))) {
if (nd) {
nfsm_chain_cleanup(&nd->nd_nmreq);
if (nd->nd_nam2)
mbuf_freem(nd->nd_nam2);
if (IS_VALID_CRED(nd->nd_cr))
kauth_cred_unref(&nd->nd_cr);
FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC);
nd = NULL;
}
nfsd->nfsd_slp = NULL;
nfsd->nfsd_flag &= ~NFSD_REQINPROG;
if (slp)
nfsrv_slpderef(slp);
if (nfsd_thread_max <= 0)
break;
continue;
}
if (nd) {
microuptime(&nd->nd_starttime);
if (nd->nd_nam2)
nd->nd_nam = nd->nd_nam2;
else
nd->nd_nam = slp->ns_nam;
cacherep = nfsrv_getcache(nd, slp, &mrep);
if (nfsrv_require_resv_port) {
u_short port;
struct sockaddr *nam = mbuf_data(nd->nd_nam);
struct sockaddr_in *sin;
sin = (struct sockaddr_in *)nam;
port = ntohs(sin->sin_port);
if (port >= IPPORT_RESERVED &&
nd->nd_procnum != NFSPROC_NULL) {
char strbuf[MAX_IPv4_STR_LEN];
nd->nd_procnum = NFSPROC_NOOP;
nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK);
cacherep = RC_DOIT;
printf("NFS request from unprivileged port (%s:%d)\n",
inet_ntop(AF_INET, &sin->sin_addr, strbuf, sizeof(strbuf)),
port);
}
}
}
do {
switch (cacherep) {
case RC_DOIT:
if (nd && (nd->nd_vers == NFS_VER3))
procrastinate = nfsrv_wg_delay_v3;
else
procrastinate = nfsrv_wg_delay;
lck_rw_lock_shared(&nfsrv_export_rwlock);
context.vc_ucred = NULL;
if (writes_todo || ((nd->nd_procnum == NFSPROC_WRITE) && (procrastinate > 0)))
error = nfsrv_writegather(&nd, slp, &context, &mrep);
else
error = (*(nfsrv_procs[nd->nd_procnum]))(nd, slp, &context, &mrep);
lck_rw_done(&nfsrv_export_rwlock);
if (mrep == NULL) {
if (error && slp->ns_sotype == SOCK_STREAM) {
lck_rw_lock_exclusive(&slp->ns_rwlock);
nfsrv_zapsock(slp);
lck_rw_done(&slp->ns_rwlock);
printf("NFS server: NULL reply from proc = %d error = %d\n",
nd->nd_procnum, error);
}
break;
}
if (error) {
OSAddAtomic(1, (SInt32*)&nfsstats.srv_errs);
nfsrv_updatecache(nd, FALSE, mrep);
if (nd->nd_nam2) {
mbuf_freem(nd->nd_nam2);
nd->nd_nam2 = NULL;
}
break;
}
OSAddAtomic(1, (SInt32*)&nfsstats.srvrpccnt[nd->nd_procnum]);
nfsrv_updatecache(nd, TRUE, mrep);
case RC_REPLY:
if (nd->nd_gss_mb != NULL) {
error = nfs_gss_svc_protect_reply(nd, mrep);
if (error) {
mbuf_freem(mrep);
break;
}
}
m = mrep;
siz = 0;
while (m) {
siz += mbuf_len(m);
m = mbuf_next(m);
}
if (siz <= 0 || siz > NFS_MAXPACKET) {
printf("mbuf siz=%d\n",siz);
panic("Bad nfs svc reply");
}
m = mrep;
mbuf_pkthdr_setlen(m, siz);
error = mbuf_pkthdr_setrcvif(m, NULL);
if (error)
panic("nfsd setrcvif failed: %d", error);
if (slp->ns_sotype == SOCK_STREAM) {
error = mbuf_prepend(&m, NFSX_UNSIGNED, MBUF_WAITOK);
if (!error)
*(u_long*)mbuf_data(m) = htonl(0x80000000 | siz);
}
if (!error) {
if (slp->ns_flag & SLP_VALID) {
error = nfsrv_send(slp, nd->nd_nam2, m);
} else {
error = EPIPE;
mbuf_freem(m);
}
} else {
mbuf_freem(m);
}
mrep = NULL;
if (nd->nd_nam2) {
mbuf_freem(nd->nd_nam2);
nd->nd_nam2 = NULL;
}
if (error == EPIPE) {
lck_rw_lock_exclusive(&slp->ns_rwlock);
nfsrv_zapsock(slp);
lck_rw_done(&slp->ns_rwlock);
}
if (error == EINTR || error == ERESTART) {
nfsm_chain_cleanup(&nd->nd_nmreq);
if (IS_VALID_CRED(nd->nd_cr))
kauth_cred_unref(&nd->nd_cr);
FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC);
nfsrv_slpderef(slp);
lck_mtx_lock(nfsd_mutex);
goto done;
}
break;
case RC_DROPIT:
mbuf_freem(nd->nd_nam2);
nd->nd_nam2 = NULL;
break;
};
opcnt++;
if (nd) {
nfsm_chain_cleanup(&nd->nd_nmreq);
if (nd->nd_nam2)
mbuf_freem(nd->nd_nam2);
if (IS_VALID_CRED(nd->nd_cr))
kauth_cred_unref(&nd->nd_cr);
FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC);
nd = NULL;
}
writes_todo = 0;
if (slp->ns_wgtime) {
microuptime(&now);
cur_usec = (u_quad_t)now.tv_sec * 1000000 +
(u_quad_t)now.tv_usec;
if (slp->ns_wgtime <= cur_usec) {
cacherep = RC_DOIT;
writes_todo = 1;
}
}
} while (writes_todo);
nd = NULL;
if (TAILQ_EMPTY(&nfsrv_sockwait) && (opcnt < 8)) {
lck_rw_lock_exclusive(&slp->ns_rwlock);
error = nfsrv_dorec(slp, nfsd, &nd);
if (error == EINVAL) { if (slp->ns_sotype == SOCK_STREAM)
nfsrv_zapsock(slp); }
lck_rw_done(&slp->ns_rwlock);
}
if (!nd) {
nfsd->nfsd_flag &= ~NFSD_REQINPROG;
nfsd->nfsd_slp = NULL;
nfsrv_slpderef(slp);
}
}
lck_mtx_lock(nfsd_mutex);
done:
TAILQ_REMOVE(&nfsd_head, nfsd, nfsd_chain);
FREE(nfsd, M_NFSD);
if (--nfsd_thread_count == 0)
nfsrv_cleanup();
lck_mtx_unlock(nfsd_mutex);
return (error);
}
static int
nfssvc_export(user_addr_t argp)
{
int error = 0, is_64bit;
struct user_nfs_export_args unxa;
vfs_context_t ctx = vfs_context_current();
is_64bit = IS_64BIT_PROCESS(vfs_context_proc(ctx));
if (is_64bit) {
error = copyin(argp, (caddr_t)&unxa, sizeof(unxa));
} else {
struct nfs_export_args tnxa;
error = copyin(argp, (caddr_t)&tnxa, sizeof(tnxa));
if (error == 0) {
unxa.nxa_fsid = tnxa.nxa_fsid;
unxa.nxa_expid = tnxa.nxa_expid;
unxa.nxa_fspath = CAST_USER_ADDR_T(tnxa.nxa_fspath);
unxa.nxa_exppath = CAST_USER_ADDR_T(tnxa.nxa_exppath);
unxa.nxa_flags = tnxa.nxa_flags;
unxa.nxa_netcount = tnxa.nxa_netcount;
unxa.nxa_nets = CAST_USER_ADDR_T(tnxa.nxa_nets);
}
}
if (error)
return (error);
error = nfsrv_export(&unxa, ctx);
return (error);
}
static void
nfsrv_zapsock(struct nfsrv_sock *slp)
{
socket_t so;
if ((slp->ns_flag & SLP_VALID) == 0)
return;
slp->ns_flag &= ~SLP_ALLFLAGS;
so = slp->ns_so;
if (so == NULL)
return;
socket_lock(so, 1);
so->so_rcv.sb_flags &= ~SB_UPCALL;
socket_unlock(so, 1);
sock_shutdown(so, SHUT_RDWR);
}
static void
nfsrv_slpfree(struct nfsrv_sock *slp)
{
struct nfsrv_descript *nwp, *nnwp;
if (slp->ns_so) {
sock_release(slp->ns_so);
slp->ns_so = NULL;
}
if (slp->ns_nam)
mbuf_free(slp->ns_nam);
if (slp->ns_raw)
mbuf_freem(slp->ns_raw);
if (slp->ns_rec)
mbuf_freem(slp->ns_rec);
if (slp->ns_frag)
mbuf_freem(slp->ns_frag);
slp->ns_nam = slp->ns_raw = slp->ns_rec = slp->ns_frag = NULL;
slp->ns_reccnt = 0;
for (nwp = slp->ns_tq.lh_first; nwp; nwp = nnwp) {
nnwp = nwp->nd_tq.le_next;
LIST_REMOVE(nwp, nd_tq);
nfsm_chain_cleanup(&nwp->nd_nmreq);
if (nwp->nd_mrep)
mbuf_freem(nwp->nd_mrep);
if (nwp->nd_nam2)
mbuf_freem(nwp->nd_nam2);
if (IS_VALID_CRED(nwp->nd_cr))
kauth_cred_unref(&nwp->nd_cr);
FREE_ZONE(nwp, sizeof(*nwp), M_NFSRVDESC);
}
LIST_INIT(&slp->ns_tq);
lck_rw_destroy(&slp->ns_rwlock, nfsrv_slp_rwlock_group);
lck_mtx_destroy(&slp->ns_wgmutex, nfsrv_slp_mutex_group);
FREE(slp, M_NFSSVC);
}
void
nfsrv_slpderef(struct nfsrv_sock *slp)
{
struct timeval now;
lck_mtx_lock(nfsd_mutex);
lck_rw_lock_exclusive(&slp->ns_rwlock);
slp->ns_sref--;
if (slp->ns_sref || (slp->ns_flag & SLP_VALID)) {
if ((slp->ns_flag & SLP_QUEUED) && !(slp->ns_flag & SLP_WORKTODO)) {
if (slp->ns_flag & SLP_WAITQ)
TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq);
else
TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq);
slp->ns_flag &= ~SLP_QUEUED;
}
lck_rw_done(&slp->ns_rwlock);
lck_mtx_unlock(nfsd_mutex);
return;
}
if (slp->ns_flag & SLP_QUEUED) {
if (slp->ns_flag & SLP_WAITQ)
TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq);
else
TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq);
slp->ns_flag &= ~SLP_QUEUED;
}
microuptime(&now);
slp->ns_timestamp = now.tv_sec;
TAILQ_REMOVE(&nfsrv_socklist, slp, ns_chain);
TAILQ_INSERT_TAIL(&nfsrv_deadsocklist, slp, ns_chain);
if (!nfsrv_deadsock_timer_on) {
nfsrv_deadsock_timer_on = 1;
nfs_interval_timer_start(nfsrv_deadsock_timer_call,
NFSRV_DEADSOCKDELAY * 1000);
}
lck_rw_done(&slp->ns_rwlock);
if (slp->ns_wgq.tqe_next != SLPNOLIST) {
TAILQ_REMOVE(&nfsrv_sockwg, slp, ns_wgq);
slp->ns_wgq.tqe_next = SLPNOLIST;
}
lck_mtx_unlock(nfsd_mutex);
}
void
nfsrv_deadsock_timer(__unused void *param0, __unused void *param1)
{
struct nfsrv_sock *slp;
struct timeval now;
time_t time_to_wait;
microuptime(&now);
lck_mtx_lock(nfsd_mutex);
while ((slp = TAILQ_FIRST(&nfsrv_deadsocklist))) {
if ((slp->ns_timestamp + NFSRV_DEADSOCKDELAY) > now.tv_sec)
break;
TAILQ_REMOVE(&nfsrv_deadsocklist, slp, ns_chain);
nfsrv_slpfree(slp);
}
if (TAILQ_EMPTY(&nfsrv_deadsocklist)) {
nfsrv_deadsock_timer_on = 0;
lck_mtx_unlock(nfsd_mutex);
return;
}
time_to_wait = (slp->ns_timestamp + NFSRV_DEADSOCKDELAY) - now.tv_sec;
if (time_to_wait < 1)
time_to_wait = 1;
lck_mtx_unlock(nfsd_mutex);
nfs_interval_timer_start(nfsrv_deadsock_timer_call,
time_to_wait * 1000);
}
void
nfsrv_cleanup(void)
{
struct nfsrv_sock *slp, *nslp;
struct timeval now;
struct nfsrv_fmod *fp, *nfp;
int i;
microuptime(&now);
for (slp = TAILQ_FIRST(&nfsrv_socklist); slp != 0; slp = nslp) {
nslp = TAILQ_NEXT(slp, ns_chain);
if (slp->ns_flag & SLP_VALID) {
lck_rw_lock_exclusive(&slp->ns_rwlock);
nfsrv_zapsock(slp);
lck_rw_done(&slp->ns_rwlock);
}
if (slp->ns_flag & SLP_QUEUED) {
if (slp->ns_flag & SLP_WAITQ)
TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq);
else
TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq);
slp->ns_flag &= ~SLP_QUEUED;
}
if (slp->ns_wgq.tqe_next != SLPNOLIST) {
TAILQ_REMOVE(&nfsrv_sockwg, slp, ns_wgq);
slp->ns_wgq.tqe_next = SLPNOLIST;
}
slp->ns_timestamp = now.tv_sec;
TAILQ_REMOVE(&nfsrv_socklist, slp, ns_chain);
TAILQ_INSERT_TAIL(&nfsrv_deadsocklist, slp, ns_chain);
if (!nfsrv_deadsock_timer_on) {
nfsrv_deadsock_timer_on = 1;
nfs_interval_timer_start(nfsrv_deadsock_timer_call,
NFSRV_DEADSOCKDELAY * 1000);
}
}
lck_mtx_lock(nfsrv_fmod_mutex);
for (i = 0; i < NFSRVFMODHASHSZ; i++) {
for (fp = LIST_FIRST(&nfsrv_fmod_hashtbl[i]); fp; fp = nfp) {
#if CONFIG_FSE
if (nfsrv_fsevents_enabled)
add_fsevent(FSE_CONTENT_MODIFIED, &fp->fm_context,
FSE_ARG_VNODE, fp->fm_vp,
FSE_ARG_DONE);
#endif
vnode_put(fp->fm_vp);
kauth_cred_unref(&fp->fm_context.vc_ucred);
nfp = LIST_NEXT(fp, fm_link);
LIST_REMOVE(fp, fm_link);
FREE(fp, M_TEMP);
}
}
nfsrv_fmod_pending = 0;
lck_mtx_unlock(nfsrv_fmod_mutex);
nfs_gss_svc_cleanup();
nfsrv_cleancache();
nfsrv_udpsock = NULL;
}
#endif