#include <sys/param.h>
#include <sys/systm.h>
#include <sys/resourcevar.h>
#include <sys/signalvar.h>
#include <sys/proc.h>
#include <sys/buf.h>
#include <sys/vnode.h>
#include <sys/mount.h>
#include <sys/kernel.h>
#include <sys/sysctl.h>
#include <sys/ubc.h>
#include <sys/vm.h>
#include <sys/vmparam.h>
#include <sys/time.h>
#include <kern/clock.h>
#include <nfs/rpcv2.h>
#include <nfs/nfsproto.h>
#include <nfs/nfs.h>
#include <nfs/nfsmount.h>
#include <nfs/nqnfs.h>
#include <nfs/nfsnode.h>
#include <sys/kdebug.h>
#define FSDBG(A, B, C, D, E) \
KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, (A))) | DBG_FUNC_NONE, \
(int)(B), (int)(C), (int)(D), (int)(E), 0)
#define FSDBG_TOP(A, B, C, D, E) \
KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, (A))) | DBG_FUNC_START, \
(int)(B), (int)(C), (int)(D), (int)(E), 0)
#define FSDBG_BOT(A, B, C, D, E) \
KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, (A))) | DBG_FUNC_END, \
(int)(B), (int)(C), (int)(D), (int)(E), 0)
static struct buf *nfs_getcacheblk __P((struct vnode *vp, daddr_t bn, int size,
struct proc *p, int operation));
extern int nfs_numasync;
extern struct nfsstats nfsstats;
extern int nbdwrite;
int
nfs_bioread(vp, uio, ioflag, cred, getpages)
register struct vnode *vp;
register struct uio *uio;
int ioflag;
struct ucred *cred;
int getpages;
{
register struct nfsnode *np = VTONFS(vp);
register int biosize, i;
off_t diff;
struct buf *bp = 0, *rabp;
struct vattr vattr;
struct proc *p;
struct nfsmount *nmp = VFSTONFS(vp->v_mount);
daddr_t lbn, rabn;
int bufsize;
int nra, error = 0, n = 0, on = 0, not_readin;
int operation = (getpages? BLK_PAGEIN : BLK_READ);
#if DIAGNOSTIC
if (uio->uio_rw != UIO_READ)
panic("nfs_read mode");
#endif
if (uio->uio_resid == 0)
return (0);
if (uio->uio_offset < 0)
return (EINVAL);
p = uio->uio_procp;
if ((nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_GOTFSINFO)) == NFSMNT_NFSV3)
(void)nfs_fsinfo(nmp, vp, cred, p);
biosize = min(vp->v_mount->mnt_stat.f_iosize, PAGE_SIZE);
if ((nmp->nm_flag & NFSMNT_NQNFS) == 0) {
if (np->n_flag & NMODIFIED) {
if (vp->v_type != VREG) {
if (vp->v_type != VDIR)
panic("nfs: bioread, not dir");
nfs_invaldir(vp);
error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
if (error)
return (error);
}
np->n_attrstamp = 0;
error = VOP_GETATTR(vp, &vattr, cred, p);
if (error)
return (error);
np->n_mtime = vattr.va_mtime.tv_sec;
} else {
error = VOP_GETATTR(vp, &vattr, cred, p);
if (error)
return (error);
if (np->n_mtime != vattr.va_mtime.tv_sec) {
if (vp->v_type == VDIR)
nfs_invaldir(vp);
error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
if (error)
return (error);
np->n_mtime = vattr.va_mtime.tv_sec;
}
}
}
do {
if (nmp->nm_flag & NFSMNT_NQNFS) {
if (NQNFS_CKINVALID(vp, np, ND_READ)) {
do {
error = nqnfs_getlease(vp, ND_READ, cred, p);
} while (error == NQNFS_EXPIRED);
if (error)
return (error);
if (np->n_lrev != np->n_brev ||
(np->n_flag & NQNFSNONCACHE) ||
((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) {
if (vp->v_type == VDIR)
nfs_invaldir(vp);
error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
if (error)
return (error);
np->n_brev = np->n_lrev;
}
} else if (vp->v_type == VDIR && (np->n_flag & NMODIFIED)) {
nfs_invaldir(vp);
error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
if (error)
return (error);
}
}
if (np->n_flag & NQNFSNONCACHE) {
switch (vp->v_type) {
case VREG:
return (nfs_readrpc(vp, uio, cred));
case VLNK:
return (nfs_readlinkrpc(vp, uio, cred));
case VDIR:
break;
default:
printf(" NQNFSNONCACHE: type %x unexpected\n",
vp->v_type);
};
}
switch (vp->v_type) {
case VREG:
nfsstats.biocache_reads++;
lbn = uio->uio_offset / biosize;
on = uio->uio_offset & (biosize - 1);
not_readin = 1;
if (nfs_numasync > 0 && nmp->nm_readahead > 0) {
for (nra = 0; nra < nmp->nm_readahead &&
(off_t)(lbn + 1 + nra) * biosize < np->n_size;
nra++) {
rabn = lbn + 1 + nra;
if (!incore(vp, rabn)) {
rabp = nfs_getcacheblk(vp, rabn, biosize, p, operation);
if (!rabp)
return (EINTR);
if (!ISSET(rabp->b_flags, (B_CACHE|B_DELWRI))) {
SET(rabp->b_flags, (B_READ | B_ASYNC));
if (nfs_asyncio(rabp, cred)) {
SET(rabp->b_flags, (B_INVAL|B_ERROR));
rabp->b_error = EIO;
brelse(rabp);
}
} else
brelse(rabp);
}
}
}
again:
bufsize = biosize;
if ((off_t)(lbn + 1) * biosize > np->n_size &&
(off_t)(lbn + 1) * biosize - np->n_size < biosize) {
bufsize = np->n_size - (off_t)lbn * biosize;
bufsize = (bufsize + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1);
}
bp = nfs_getcacheblk(vp, lbn, bufsize, p, operation);
if (!bp)
return (EINTR);
if (!ISSET(bp->b_flags, B_CACHE)) {
SET(bp->b_flags, B_READ);
CLR(bp->b_flags, (B_DONE | B_ERROR | B_INVAL));
not_readin = 0;
error = nfs_doio(bp, cred, p);
if (error) {
brelse(bp);
return (error);
}
}
if (bufsize > on) {
n = min((unsigned)(bufsize - on), uio->uio_resid);
} else {
n = 0;
}
diff = np->n_size - uio->uio_offset;
if (diff < n)
n = diff;
if (not_readin && n > 0) {
if (on < bp->b_validoff || (on + n) > bp->b_validend) {
SET(bp->b_flags, (B_NOCACHE|B_INVAFTERWRITE));
if (bp->b_dirtyend > 0) {
if (!ISSET(bp->b_flags, B_DELWRI))
panic("nfsbioread");
if (VOP_BWRITE(bp) == EINTR)
return (EINTR);
} else
brelse(bp);
goto again;
}
}
vp->v_lastr = lbn;
diff = (on >= bp->b_validend) ? 0 : (bp->b_validend - on);
if (diff < n)
n = diff;
break;
case VLNK:
nfsstats.biocache_readlinks++;
bp = nfs_getcacheblk(vp, (daddr_t)0, NFS_MAXPATHLEN, p, operation);
if (!bp)
return (EINTR);
if (!ISSET(bp->b_flags, B_CACHE)) {
SET(bp->b_flags, B_READ);
error = nfs_doio(bp, cred, p);
if (error) {
SET(bp->b_flags, B_ERROR);
brelse(bp);
return (error);
}
}
n = min(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid);
on = 0;
break;
case VDIR:
nfsstats.biocache_readdirs++;
if (np->n_direofoffset
&& uio->uio_offset >= np->n_direofoffset) {
return (0);
}
lbn = uio->uio_offset / NFS_DIRBLKSIZ;
on = uio->uio_offset & (NFS_DIRBLKSIZ - 1);
bp = nfs_getcacheblk(vp, lbn, NFS_DIRBLKSIZ, p, operation);
if (!bp)
return (EINTR);
if (!ISSET(bp->b_flags, B_CACHE)) {
SET(bp->b_flags, B_READ);
error = nfs_doio(bp, cred, p);
if (error) {
brelse(bp);
}
while (error == NFSERR_BAD_COOKIE) {
nfs_invaldir(vp);
error = nfs_vinvalbuf(vp, 0, cred, p, 1);
for (i = 0; i <= lbn && !error; i++) {
if (np->n_direofoffset
&& (i * NFS_DIRBLKSIZ) >= np->n_direofoffset)
return (0);
bp = nfs_getcacheblk(vp, i, NFS_DIRBLKSIZ, p,
operation);
if (!bp)
return (EINTR);
if (!ISSET(bp->b_flags, B_CACHE)) {
SET(bp->b_flags, B_READ);
error = nfs_doio(bp, cred, p);
if (error == 0 && (bp->b_flags & B_INVAL))
break;
}
if (error || i < lbn)
brelse(bp);
}
}
if (error)
return (error);
}
if (nfs_numasync > 0 && nmp->nm_readahead > 0 &&
(np->n_direofoffset == 0 ||
(lbn + 1) * NFS_DIRBLKSIZ < np->n_direofoffset) &&
!(np->n_flag & NQNFSNONCACHE) &&
!incore(vp, lbn + 1)) {
rabp = nfs_getcacheblk(vp, lbn + 1, NFS_DIRBLKSIZ, p,
operation);
if (rabp) {
if (!ISSET(rabp->b_flags, (B_CACHE|B_DELWRI))) {
SET(rabp->b_flags, (B_READ | B_ASYNC));
if (nfs_asyncio(rabp, cred)) {
SET(rabp->b_flags, (B_INVAL|B_ERROR));
rabp->b_error = EIO;
brelse(rabp);
}
} else {
brelse(rabp);
}
}
}
n = lmin(uio->uio_resid, NFS_DIRBLKSIZ - bp->b_resid - on);
if (np->n_direofoffset &&
n > np->n_direofoffset - uio->uio_offset)
n = np->n_direofoffset - uio->uio_offset;
break;
default:
printf(" nfs_bioread: type %x unexpected\n",vp->v_type);
break;
};
if (n > 0) {
error = uiomove(bp->b_data + on, (int)n, uio);
}
switch (vp->v_type) {
case VREG:
break;
case VLNK:
n = 0;
break;
case VDIR:
if (np->n_flag & NQNFSNONCACHE)
SET(bp->b_flags, B_INVAL);
break;
default:
printf(" nfs_bioread: type %x unexpected\n",vp->v_type);
}
brelse(bp);
} while (error == 0 && uio->uio_resid > 0 && n > 0);
return (error);
}
int
nfs_write(ap)
struct vop_write_args *ap;
{
register int biosize;
register struct uio *uio = ap->a_uio;
struct proc *p = uio->uio_procp;
register struct vnode *vp = ap->a_vp;
struct nfsnode *np = VTONFS(vp);
register struct ucred *cred = ap->a_cred;
int ioflag = ap->a_ioflag;
struct buf *bp;
struct vattr vattr;
struct nfsmount *nmp = VFSTONFS(vp->v_mount);
daddr_t lbn;
int bufsize;
int n, on, error = 0, iomode, must_commit;
off_t boff;
struct iovec iov;
struct uio auio;
#if DIAGNOSTIC
if (uio->uio_rw != UIO_WRITE)
panic("nfs_write mode");
if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != current_proc())
panic("nfs_write proc");
#endif
if (vp->v_type != VREG)
return (EIO);
if (np->n_flag & NWRITEERR) {
np->n_flag &= ~NWRITEERR;
return (np->n_error);
}
if ((nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_GOTFSINFO)) == NFSMNT_NFSV3)
(void)nfs_fsinfo(nmp, vp, cred, p);
if (ioflag & (IO_APPEND | IO_SYNC)) {
if (np->n_flag & NMODIFIED) {
np->n_attrstamp = 0;
error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
if (error)
return (error);
}
if (ioflag & IO_APPEND) {
np->n_attrstamp = 0;
error = VOP_GETATTR(vp, &vattr, cred, p);
if (error)
return (error);
uio->uio_offset = np->n_size;
}
}
if (uio->uio_offset < 0)
return (EINVAL);
if (uio->uio_resid == 0)
return (0);
if (p && uio->uio_offset + uio->uio_resid >
p->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
psignal(p, SIGXFSZ);
return (EFBIG);
}
biosize = min(vp->v_mount->mnt_stat.f_iosize, PAGE_SIZE);
do {
if ((nmp->nm_flag & NFSMNT_NQNFS) &&
NQNFS_CKINVALID(vp, np, ND_WRITE)) {
do {
error = nqnfs_getlease(vp, ND_WRITE, cred, p);
} while (error == NQNFS_EXPIRED);
if (error)
return (error);
if (np->n_lrev != np->n_brev ||
(np->n_flag & NQNFSNONCACHE)) {
error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
if (error)
return (error);
np->n_brev = np->n_lrev;
}
}
if ((np->n_flag & NQNFSNONCACHE) && uio->uio_iovcnt == 1) {
iomode = NFSV3WRITE_FILESYNC;
error = nfs_writerpc(vp, uio, cred, &iomode, &must_commit);
if (must_commit)
nfs_clearcommit(vp->v_mount);
return (error);
}
nfsstats.biocache_writes++;
lbn = uio->uio_offset / biosize;
on = uio->uio_offset & (biosize-1);
n = min((unsigned)(biosize - on), uio->uio_resid);
again:
bufsize = biosize;
#if 0
if ((lbn + 1) * biosize > np->n_size) {
bufsize = np->n_size - lbn * biosize;
bufsize = (bufsize + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1);
}
#endif
bp = nfs_getcacheblk(vp, lbn, bufsize, p, BLK_WRITE);
if (!bp)
return (EINTR);
if (uio->uio_offset + n > np->n_size) {
struct buf *bp0 = NULL;
daddr_t bn = np->n_size / biosize;
int off = np->n_size & (biosize - 1);
if (off && bn < lbn && incore(vp, bn))
bp0 = nfs_getcacheblk(vp, bn, biosize, p,
BLK_WRITE);
np->n_flag |= NMODIFIED;
np->n_size = uio->uio_offset + n;
ubc_setsize(vp, (off_t)np->n_size);
if (bp0) {
bzero((char *)bp0->b_data + off, biosize - off);
bp0->b_validend = biosize;
brelse(bp0);
}
}
if (bp->b_wcred == NOCRED)
bp->b_wcred = crdup(cred);
if ((off_t)bp->b_blkno * DEV_BSIZE + bp->b_dirtyend >
np->n_size)
bp->b_dirtyend = np->n_size - (off_t)bp->b_blkno *
DEV_BSIZE;
if (!ISSET(bp->b_flags, B_CACHE) && n < PAGE_SIZE) {
boff = (off_t)bp->b_blkno * DEV_BSIZE;
auio.uio_iov = &iov;
auio.uio_iovcnt = 1;
auio.uio_offset = boff;
auio.uio_resid = PAGE_SIZE;
auio.uio_segflg = UIO_SYSSPACE;
auio.uio_rw = UIO_READ;
auio.uio_procp = p;
iov.iov_base = bp->b_data;
iov.iov_len = PAGE_SIZE;
error = nfs_readrpc(vp, &auio, cred);
if (error) {
bp->b_error = error;
SET(bp->b_flags, B_ERROR);
printf("nfs_write: readrpc %d", error);
}
if (auio.uio_resid > 0)
bzero(iov.iov_base, auio.uio_resid);
bp->b_validoff = 0;
bp->b_validend = PAGE_SIZE - auio.uio_resid;
if (np->n_size > boff + bp->b_validend)
bp->b_validend = min(np->n_size - boff,
PAGE_SIZE);
bp->b_dirtyoff = 0;
bp->b_dirtyend = 0;
}
if (bp->b_dirtyend > 0 &&
(on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) {
off_t start, end;
boff = (off_t)bp->b_blkno * DEV_BSIZE;
if (on > bp->b_dirtyend) {
start = boff + bp->b_validend;
end = boff + on;
} else {
start = boff + on + n;
end = boff + bp->b_validoff;
}
if (end > start) {
auio.uio_iov = &iov;
auio.uio_iovcnt = 1;
auio.uio_offset = start;
auio.uio_resid = end - start;
auio.uio_segflg = UIO_SYSSPACE;
auio.uio_rw = UIO_READ;
auio.uio_procp = p;
iov.iov_base = bp->b_data + (start - boff);
iov.iov_len = end - start;
error = nfs_readrpc(vp, &auio, cred);
if (error) {
bp->b_error = error;
SET(bp->b_flags, B_ERROR);
printf("nfs_write: readrpc2 %d", error);
brelse(bp);
return (error);
}
if (auio.uio_resid > 0)
bzero(iov.iov_base, auio.uio_resid);
if (on > bp->b_dirtyend)
bp->b_validend = on;
else
bp->b_validoff = on + n;
}
if (on > bp->b_dirtyend)
bp->b_dirtyend = on;
else
bp->b_dirtyoff = on + n;
}
if (ISSET(bp->b_flags, B_ERROR)) {
error = bp->b_error;
brelse(bp);
return (error);
}
if (bp->b_wcred == NOCRED)
bp->b_wcred = crdup(cred);
np->n_flag |= NMODIFIED;
if ((nmp->nm_flag & NFSMNT_NQNFS) &&
NQNFS_CKINVALID(vp, np, ND_WRITE)) {
do {
error = nqnfs_getlease(vp, ND_WRITE, cred, p);
} while (error == NQNFS_EXPIRED);
if (error) {
brelse(bp);
return (error);
}
if (np->n_lrev != np->n_brev ||
(np->n_flag & NQNFSNONCACHE)) {
brelse(bp);
error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
if (error)
return (error);
np->n_brev = np->n_lrev;
goto again;
}
}
error = uiomove((char *)bp->b_data + on, n, uio);
if (error) {
SET(bp->b_flags, B_ERROR);
brelse(bp);
return (error);
}
if (bp->b_dirtyend > 0) {
bp->b_dirtyoff = min(on, bp->b_dirtyoff);
bp->b_dirtyend = max((on + n), bp->b_dirtyend);
} else {
bp->b_dirtyoff = on;
bp->b_dirtyend = on + n;
}
if (bp->b_validend == 0 || bp->b_validend < bp->b_dirtyoff ||
bp->b_validoff > bp->b_dirtyend) {
bp->b_validoff = bp->b_dirtyoff;
bp->b_validend = bp->b_dirtyend;
} else {
bp->b_validoff = min(bp->b_validoff, bp->b_dirtyoff);
bp->b_validend = max(bp->b_validend, bp->b_dirtyend);
}
CLR(bp->b_flags, B_NEEDCOMMIT);
if ((np->n_flag & NQNFSNONCACHE) || (ioflag & IO_SYNC)) {
bp->b_proc = p;
error = VOP_BWRITE(bp);
if (error)
return (error);
if (np->n_flag & NQNFSNONCACHE) {
error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
if (error)
return (error);
}
} else if ((n + on) == biosize &&
(nmp->nm_flag & NFSMNT_NQNFS) == 0) {
bp->b_proc = (struct proc *)0;
SET(bp->b_flags, B_ASYNC);
(void)nfs_writebp(bp, 0);
} else
bdwrite(bp);
} while (uio->uio_resid > 0 && n > 0);
return (0);
}
static struct buf *
nfs_getcacheblk(vp, bn, size, p, operation)
struct vnode *vp;
daddr_t bn;
int size;
struct proc *p;
int operation;
{
register struct buf *bp;
struct nfsmount *nmp = VFSTONFS(vp->v_mount);
int biosize = min(vp->v_mount->mnt_stat.f_iosize, PAGE_SIZE);
if (nbdwrite > ((nbuf/4)*3) && operation == BLK_WRITE) {
#define __BUFFERS_RECLAIMED 2
struct buf *tbp[__BUFFERS_RECLAIMED];
int i;
for (i = 0; i < __BUFFERS_RECLAIMED; i++)
tbp[i] = geteblk(512);
(void)tsleep((caddr_t)&nbdwrite, PCATCH, "nbdwrite", 1);
for (i = (__BUFFERS_RECLAIMED - 1); i >= 0; i--)
brelse(tbp[i]);
}
if (nmp->nm_flag & NFSMNT_INT) {
bp = getblk(vp, bn, size, PCATCH, 0, operation);
while (bp == (struct buf *)0) {
if (nfs_sigintr(nmp, (struct nfsreq *)0, p))
return ((struct buf *)0);
bp = getblk(vp, bn, size, 0, 2 * hz, operation);
}
} else
bp = getblk(vp, bn, size, 0, 0, operation);
if( vp->v_type == VREG)
bp->b_blkno = ((off_t)bn * biosize) / DEV_BSIZE;
return (bp);
}
int
nfs_vinvalbuf(vp, flags, cred, p, intrflg)
struct vnode *vp;
int flags;
struct ucred *cred;
struct proc *p;
int intrflg;
{
register struct nfsnode *np = VTONFS(vp);
struct nfsmount *nmp = VFSTONFS(vp->v_mount);
int error = 0, slpflag, slptimeo;
int didhold = 0;
if ((nmp->nm_flag & NFSMNT_INT) == 0)
intrflg = 0;
if (intrflg) {
slpflag = PCATCH;
slptimeo = 2 * hz;
} else {
slpflag = 0;
slptimeo = 0;
}
while (np->n_flag & NFLUSHINPROG) {
np->n_flag |= NFLUSHWANT;
error = tsleep((caddr_t)&np->n_flag, PRIBIO + 2, "nfsvinval",
slptimeo);
if (error && intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p))
return (EINTR);
}
np->n_flag |= NFLUSHINPROG;
error = vinvalbuf(vp, flags, cred, p, slpflag, 0);
while (error) {
if ((intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p)) ||
(error == EINTR && current_thread_aborted())) {
np->n_flag &= ~NFLUSHINPROG;
if (np->n_flag & NFLUSHWANT) {
np->n_flag &= ~NFLUSHWANT;
wakeup((caddr_t)&np->n_flag);
}
return (EINTR);
}
error = vinvalbuf(vp, flags, cred, p, 0, slptimeo);
}
np->n_flag &= ~(NMODIFIED | NFLUSHINPROG);
if (np->n_flag & NFLUSHWANT) {
np->n_flag &= ~NFLUSHWANT;
wakeup((caddr_t)&np->n_flag);
}
didhold = ubc_hold(vp);
if (didhold) {
(void) ubc_clean(vp, 1);
ubc_rele(vp);
}
return (0);
}
int
nfs_asyncio(bp, cred)
register struct buf *bp;
struct ucred *cred;
{
struct nfsmount *nmp;
int i;
int gotiod;
int slpflag = 0;
int slptimeo = 0;
int error;
if (nfs_numasync == 0)
return (EIO);
nmp = VFSTONFS(bp->b_vp->v_mount);
again:
if (nmp->nm_flag & NFSMNT_INT)
slpflag = PCATCH;
gotiod = FALSE;
for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
if (nfs_iodwant[i]) {
NFS_DPF(ASYNCIO,
("nfs_asyncio: waking iod %d for mount %p\n",
i, nmp));
nfs_iodwant[i] = (struct proc *)0;
nfs_iodmount[i] = nmp;
nmp->nm_bufqiods++;
wakeup((caddr_t)&nfs_iodwant[i]);
gotiod = TRUE;
break;
}
if (!gotiod) {
if (nmp->nm_bufqiods > 0) {
NFS_DPF(ASYNCIO,
("nfs_asyncio: %d iods are already processing mount %p\n",
nmp->nm_bufqiods, nmp));
gotiod = TRUE;
}
}
if (gotiod) {
while (nmp->nm_bufqlen >= 2*nfs_numasync) {
NFS_DPF(ASYNCIO,
("nfs_asyncio: waiting for mount %p queue to drain\n", nmp));
nmp->nm_bufqwant = TRUE;
error = tsleep(&nmp->nm_bufq, slpflag | PRIBIO,
"nfsaio", slptimeo);
if (error) {
if (nfs_sigintr(nmp, NULL, bp->b_proc))
return (EINTR);
if (slpflag == PCATCH) {
slpflag = 0;
slptimeo = 2 * hz;
}
}
if (nmp->nm_bufqiods == 0) {
NFS_DPF(ASYNCIO,
("nfs_asyncio: no iods after mount %p queue was drained, looping\n", nmp));
goto again;
}
}
if (ISSET(bp->b_flags, B_READ)) {
if (bp->b_rcred == NOCRED && cred != NOCRED) {
bp->b_rcred = crdup(cred);
}
} else {
SET(bp->b_flags, B_WRITEINPROG);
if (bp->b_wcred == NOCRED && cred != NOCRED) {
bp->b_wcred = crdup(cred);
}
}
TAILQ_INSERT_TAIL(&nmp->nm_bufq, bp, b_freelist);
nmp->nm_bufqlen++;
return (0);
}
NFS_DPF(ASYNCIO, ("nfs_asyncio: no iods available, i/o is synchronous\n"));
return (EIO);
}
int
nfs_doio(bp, cr, p)
register struct buf *bp;
struct ucred *cr;
struct proc *p;
{
register struct uio *uiop;
register struct vnode *vp;
struct nfsnode *np;
struct nfsmount *nmp;
int error = 0, diff, len, iomode, must_commit = 0;
struct uio uio;
struct iovec io;
vp = bp->b_vp;
np = VTONFS(vp);
nmp = VFSTONFS(vp->v_mount);
uiop = &uio;
uiop->uio_iov = &io;
uiop->uio_iovcnt = 1;
uiop->uio_segflg = UIO_SYSSPACE;
uiop->uio_procp = p;
if (ISSET(bp->b_flags, B_DONE)) {
if (!ISSET(bp->b_flags, B_ASYNC))
panic("nfs_doio: done and not async");
CLR(bp->b_flags, B_DONE);
}
FSDBG_TOP(256, np->n_size, bp->b_blkno * DEV_BSIZE, bp->b_bcount,
bp->b_flags);
FSDBG(257, bp->b_validoff, bp->b_validend, bp->b_dirtyoff,
bp->b_dirtyend);
if (ISSET(bp->b_flags, B_PHYS)) {
io.iov_len = uiop->uio_resid = bp->b_bcount;
io.iov_base = bp->b_data;
uiop->uio_offset = (off_t)bp->b_blkno * DEV_BSIZE;
if (ISSET(bp->b_flags, B_READ)) {
uiop->uio_rw = UIO_READ;
nfsstats.read_physios++;
error = nfs_readrpc(vp, uiop, cr);
} else {
int com;
iomode = NFSV3WRITE_DATASYNC;
uiop->uio_rw = UIO_WRITE;
nfsstats.write_physios++;
error = nfs_writerpc(vp, uiop, cr, &iomode, &com);
}
if (error) {
SET(bp->b_flags, B_ERROR);
bp->b_error = error;
}
} else if (ISSET(bp->b_flags, B_READ)) {
io.iov_len = uiop->uio_resid = bp->b_bcount;
io.iov_base = bp->b_data;
uiop->uio_rw = UIO_READ;
switch (vp->v_type) {
case VREG:
uiop->uio_offset = (off_t)bp->b_blkno * DEV_BSIZE;
nfsstats.read_bios++;
error = nfs_readrpc(vp, uiop, cr);
FSDBG(262, np->n_size, bp->b_blkno * DEV_BSIZE,
uiop->uio_resid, error);
if (!error) {
bp->b_validoff = 0;
if (uiop->uio_resid) {
diff = bp->b_bcount - uiop->uio_resid;
len = np->n_size - ((u_quad_t)bp->b_blkno * DEV_BSIZE +
diff);
if (len > 0) {
len = min(len, uiop->uio_resid);
bzero((char *)bp->b_data + diff, len);
bp->b_validend = diff + len;
FSDBG(258, diff, len, 0, 1);
} else
bp->b_validend = diff;
} else
bp->b_validend = bp->b_bcount;
if (bp->b_validend < bp->b_bufsize) {
bzero((caddr_t)(bp->b_data + bp->b_validend),
bp->b_bufsize - bp->b_validend);
FSDBG(258, bp->b_validend,
bp->b_bufsize - bp->b_validend, 0, 2);
}
}
if (p && (vp->v_flag & VTEXT) &&
(((nmp->nm_flag & NFSMNT_NQNFS) &&
NQNFS_CKINVALID(vp, np, ND_READ) &&
np->n_lrev != np->n_brev) ||
(!(nmp->nm_flag & NFSMNT_NQNFS) &&
np->n_mtime != np->n_vattr.va_mtime.tv_sec))) {
uprintf("Process killed due to text file modification\n");
psignal(p, SIGKILL);
p->p_flag |= P_NOSWAP;
}
break;
case VLNK:
uiop->uio_offset = (off_t)0;
nfsstats.readlink_bios++;
error = nfs_readlinkrpc(vp, uiop, cr);
break;
case VDIR:
nfsstats.readdir_bios++;
uiop->uio_offset = ((u_quad_t)bp->b_lblkno) * NFS_DIRBLKSIZ;
if (!(nmp->nm_flag & NFSMNT_NFSV3))
nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
if (nmp->nm_flag & NFSMNT_RDIRPLUS) {
error = nfs_readdirplusrpc(vp, uiop, cr);
if (error == NFSERR_NOTSUPP)
nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
}
if ((nmp->nm_flag & NFSMNT_RDIRPLUS) == 0)
error = nfs_readdirrpc(vp, uiop, cr);
break;
default:
printf("nfs_doio: type %x unexpected\n", vp->v_type);
break;
};
if (error) {
SET(bp->b_flags, B_ERROR);
bp->b_error = error;
}
} else {
if (UBCINFOEXISTS(vp) && ubc_issetflags(vp, UI_WASMAPPED)) {
bp->b_dirtyoff = min(bp->b_dirtyoff, bp->b_validoff);
bp->b_dirtyend = max(bp->b_dirtyend, bp->b_validend);
}
if ((off_t)bp->b_blkno * DEV_BSIZE + bp->b_dirtyend > np->n_size)
bp->b_dirtyend = np->n_size - (off_t)bp->b_blkno * DEV_BSIZE;
if (bp->b_dirtyend > bp->b_dirtyoff) {
io.iov_len = uiop->uio_resid = bp->b_dirtyend - bp->b_dirtyoff;
uiop->uio_offset = (off_t)bp->b_blkno * DEV_BSIZE +
bp->b_dirtyoff;
io.iov_base = (char *)bp->b_data + bp->b_dirtyoff;
uiop->uio_rw = UIO_WRITE;
nfsstats.write_bios++;
if ((bp->b_flags & (B_ASYNC | B_NEEDCOMMIT | B_NOCACHE)) ==
B_ASYNC)
iomode = NFSV3WRITE_UNSTABLE;
else
iomode = NFSV3WRITE_FILESYNC;
SET(bp->b_flags, B_WRITEINPROG);
error = nfs_writerpc(vp, uiop, cr, &iomode, &must_commit);
if (!error && iomode == NFSV3WRITE_UNSTABLE)
SET(bp->b_flags, B_NEEDCOMMIT);
else
CLR(bp->b_flags, B_NEEDCOMMIT);
CLR(bp->b_flags, B_WRITEINPROG);
if (error == EINTR || (!error && bp->b_flags & B_NEEDCOMMIT)) {
int s;
CLR(bp->b_flags, B_INVAL | B_NOCACHE);
if (!ISSET(bp->b_flags, B_DELWRI)) {
SET(bp->b_flags, B_DELWRI);
nbdwrite++;
}
FSDBG(261, bp->b_validoff, bp->b_validend,
bp->b_bufsize, bp->b_bcount);
if (ISSET(bp->b_flags, B_ASYNC)) {
s = splbio();
reassignbuf(bp, vp);
splx(s);
} else {
SET(bp->b_flags, B_EINTR);
}
} else {
if (error) {
SET(bp->b_flags, B_ERROR);
bp->b_error = np->n_error = error;
np->n_flag |= NWRITEERR;
}
bp->b_dirtyoff = bp->b_dirtyend = 0;
if (bp->b_validoff)
SET(bp->b_flags, B_INVAL);
else
if (bp->b_validend < bp->b_bufsize) {
if ((off_t)bp->b_blkno * DEV_BSIZE +
bp->b_validend == np->n_size) {
bzero((caddr_t)(bp->b_data +
bp->b_validend),
bp->b_bufsize - bp->b_validend);
FSDBG(259, bp->b_validend,
bp->b_bufsize - bp->b_validend, 0,
0);
} else
SET(bp->b_flags, B_INVAL);
}
}
} else {
if (bp->b_validoff ||
(bp->b_validend < bp->b_bufsize &&
(off_t)bp->b_blkno * DEV_BSIZE + bp->b_validend !=
np->n_size)) {
SET(bp->b_flags, B_INVAL);
}
if (bp->b_flags & B_INVAL) {
FSDBG(260, bp->b_validoff, bp->b_validend,
bp->b_bufsize, bp->b_bcount);
}
bp->b_resid = 0;
biodone(bp);
FSDBG_BOT(256, bp->b_validoff, bp->b_validend, bp->b_bufsize,
np->n_size);
return (0);
}
}
bp->b_resid = uiop->uio_resid;
if (must_commit)
nfs_clearcommit(vp->v_mount);
if (bp->b_flags & B_INVAL) {
FSDBG(260, bp->b_validoff, bp->b_validend, bp->b_bufsize,
bp->b_bcount);
}
FSDBG_BOT(256, bp->b_validoff, bp->b_validend, bp->b_bcount, error);
biodone(bp);
return (error);
}