#include "db_config.h"
#ifndef lint
static const char revid[] = "$Id: os_rw.c,v 1.2 2004/03/30 01:23:48 jtownsen Exp $";
#endif
#ifndef NO_SYSTEM_INCLUDES
#include <sys/types.h>
#include <string.h>
#include <unistd.h>
#endif
#include "db_int.h"
#ifdef HAVE_FILESYSTEM_NOTZERO
static int __os_zerofill __P((DB_ENV *, DB_FH *));
#endif
static int __os_physwrite __P((DB_ENV *, DB_FH *, void *, size_t, size_t *));
int
__os_io(dbenv, op, fhp, pgno, pagesize, buf, niop)
DB_ENV *dbenv;
int op;
DB_FH *fhp;
db_pgno_t pgno;
size_t pagesize, *niop;
u_int8_t *buf;
{
int ret;
if (__os_is_winnt()) {
ULONG64 off = (ULONG64)pagesize * pgno;
OVERLAPPED over;
DWORD nbytes;
over.Offset = (DWORD)(off & 0xffffffff);
over.OffsetHigh = (DWORD)(off >> 32);
over.hEvent = 0;
switch (op) {
case DB_IO_READ:
if (DB_GLOBAL(j_read) != NULL)
goto slow;
if (!ReadFile(fhp->handle,
buf, (DWORD)pagesize, &nbytes, &over))
goto slow;
break;
case DB_IO_WRITE:
if (DB_GLOBAL(j_write) != NULL)
goto slow;
#ifdef HAVE_FILESYSTEM_NOTZERO
if (__os_fs_notzero())
goto slow;
#endif
if (!WriteFile(fhp->handle,
buf, (DWORD)pagesize, &nbytes, &over))
goto slow;
break;
}
if (nbytes == pagesize) {
*niop = (size_t)nbytes;
return (0);
}
}
slow: MUTEX_THREAD_LOCK(dbenv, fhp->mutexp);
if ((ret = __os_seek(dbenv, fhp,
pagesize, pgno, 0, 0, DB_OS_SEEK_SET)) != 0)
goto err;
switch (op) {
case DB_IO_READ:
ret = __os_read(dbenv, fhp, buf, pagesize, niop);
break;
case DB_IO_WRITE:
ret = __os_write(dbenv, fhp, buf, pagesize, niop);
break;
}
err: MUTEX_THREAD_UNLOCK(dbenv, fhp->mutexp);
return (ret);
}
int
__os_read(dbenv, fhp, addr, len, nrp)
DB_ENV *dbenv;
DB_FH *fhp;
void *addr;
size_t len;
size_t *nrp;
{
size_t offset, nr;
DWORD count;
int ret, retries;
BOOL success;
u_int8_t *taddr;
retries = 0;
for (taddr = addr,
offset = 0; offset < len; taddr += nr, offset += nr) {
retry: if (DB_GLOBAL(j_read) != NULL) {
nr = DB_GLOBAL(j_read)(fhp->fd,
taddr, len - offset);
success = (nr >= 0);
} else {
success = ReadFile(fhp->handle,
taddr, (DWORD)(len - offset), &count, NULL);
if (!success)
__os_set_errno(__os_win32_errno());
else
nr = (size_t)count;
}
if (!success) {
ret = __os_get_errno();
if ((ret == EINTR || ret == EBUSY) &&
++retries < DB_RETRY)
goto retry;
__db_err(dbenv, "read: 0x%lx, %lu: %s",
P_TO_ULONG(taddr),
(u_long)len - offset, strerror(ret));
return (ret);
}
if (nr == 0)
break;
}
*nrp = taddr - (u_int8_t *)addr;
return (0);
}
int
__os_write(dbenv, fhp, addr, len, nwp)
DB_ENV *dbenv;
DB_FH *fhp;
void *addr;
size_t len;
size_t *nwp;
{
int ret;
#ifdef HAVE_FILESYSTEM_NOTZERO
if (__os_fs_notzero() && (ret = __os_zerofill(dbenv, fhp)) != 0)
return (ret);
#endif
return (__os_physwrite(dbenv, fhp, addr, len, nwp));
}
static int
__os_physwrite(dbenv, fhp, addr, len, nwp)
DB_ENV *dbenv;
DB_FH *fhp;
void *addr;
size_t len;
size_t *nwp;
{
size_t offset, nw;
DWORD count;
int ret, retries;
BOOL success;
u_int8_t *taddr;
retries = 0;
for (taddr = addr,
offset = 0; offset < len; taddr += nw, offset += nw) {
retry: if (DB_GLOBAL(j_write) != NULL) {
nw = DB_GLOBAL(j_write)(fhp->fd,
taddr, len - offset);
success = (nw >= 0);
} else {
success = WriteFile(fhp->handle,
taddr, (DWORD)(len - offset), &count, NULL);
if (!success)
__os_set_errno(__os_win32_errno());
else
nw = (size_t)count;
}
if (!success) {
ret = __os_get_errno();
if ((ret == EINTR || ret == EBUSY) &&
++retries < DB_RETRY)
goto retry;
__db_err(dbenv, "write: 0x%x, %lu: %s", taddr,
(u_long)len-offset, strerror(ret));
return (ret);
}
}
*nwp = len;
return (0);
}
#ifdef HAVE_FILESYSTEM_NOTZERO
static int
__os_zerofill(dbenv, fhp)
DB_ENV *dbenv;
DB_FH *fhp;
{
unsigned __int64 stat_offset, write_offset;
size_t blen, nw;
u_int32_t bytes, mbytes;
int group_sync, need_free, ret;
u_int8_t buf[8 * 1024], *bp;
write_offset = (unsigned __int64)fhp->pgno * fhp->pgsize + fhp->offset;
if ((ret = __os_ioinfo(dbenv, NULL, fhp, &mbytes, &bytes, NULL)) != 0)
return (ret);
stat_offset = (unsigned __int64)mbytes * MEGABYTE + bytes;
if (stat_offset >= write_offset)
return (0);
#undef ZF_LARGE_WRITE
#define ZF_LARGE_WRITE (64 * 1024)
if (write_offset - stat_offset > ZF_LARGE_WRITE) {
if ((ret = __os_calloc(dbenv, 1, ZF_LARGE_WRITE, &bp)) != 0)
return (ret);
blen = ZF_LARGE_WRITE;
need_free = 1;
} else {
bp = buf;
blen = sizeof(buf);
need_free = 0;
memset(buf, 0, sizeof(buf));
}
if ((ret = __os_seek(
dbenv, fhp, MEGABYTE, mbytes, bytes, 0, DB_OS_SEEK_SET)) != 0)
goto err;
for (group_sync = 0; stat_offset < write_offset; group_sync = 1) {
if (write_offset - stat_offset <= blen) {
blen = (size_t)(write_offset - stat_offset);
if (group_sync && (ret = __os_fsync(dbenv, fhp)) != 0)
goto err;
}
if ((ret = __os_physwrite(dbenv, fhp, bp, blen, &nw)) != 0)
goto err;
stat_offset += blen;
}
if ((ret = __os_fsync(dbenv, fhp)) != 0)
goto err;
mbytes = (u_int32_t)(write_offset / MEGABYTE);
bytes = (u_int32_t)(write_offset % MEGABYTE);
ret = __os_seek(dbenv, fhp, MEGABYTE, mbytes, bytes, 0, DB_OS_SEEK_SET);
err: if (need_free)
__os_free(dbenv, bp);
return (ret);
}
#endif