#include "db_config.h"
#include "db_int.h"
#include "dbinc/db_page.h"
#include "dbinc/mp.h"
#include "dbinc/log.h"
#include "dbinc/txn.h"
static int __memp_pgwrite
__P((DB_ENV *, DB_MPOOLFILE *, DB_MPOOL_HASH *, BH *));
int
__memp_bhwrite(dbmp, hp, mfp, bhp, open_extents)
DB_MPOOL *dbmp;
DB_MPOOL_HASH *hp;
MPOOLFILE *mfp;
BH *bhp;
int open_extents;
{
DB_ENV *dbenv;
DB_MPOOLFILE *dbmfp;
DB_MPREG *mpreg;
int ret;
dbenv = dbmp->dbenv;
if (mfp->deadfile)
return (__memp_pgwrite(dbenv, NULL, hp, bhp));
MUTEX_LOCK(dbenv, dbmp->mutex);
TAILQ_FOREACH(dbmfp, &dbmp->dbmfq, q)
if (dbmfp->mfp == mfp && !F_ISSET(dbmfp, MP_READONLY)) {
++dbmfp->ref;
break;
}
MUTEX_UNLOCK(dbenv, dbmp->mutex);
if (dbmfp != NULL) {
if (dbmfp->fhp == NULL) {
if (mfp->no_backing_file) {
--dbmfp->ref;
return (EPERM);
}
MUTEX_LOCK(dbenv, dbmp->mutex);
if (dbmfp->fhp == NULL)
ret = __db_appname(dbenv, DB_APP_TMP, NULL,
F_ISSET(dbenv, DB_ENV_DIRECT_DB) ?
DB_OSO_DIRECT : 0, &dbmfp->fhp, NULL);
else
ret = 0;
MUTEX_UNLOCK(dbenv, dbmp->mutex);
if (ret != 0) {
__db_errx(dbenv,
"unable to create temporary backing file");
--dbmfp->ref;
return (ret);
}
}
goto pgwrite;
}
if (!open_extents && F_ISSET(mfp, MP_EXTENT))
return (EPERM);
if (F_ISSET(mfp, MP_TEMP) || mfp->no_backing_file)
return (EPERM);
if (mfp->ftype != 0 && mfp->ftype != DB_FTYPE_SET) {
MUTEX_LOCK(dbenv, dbmp->mutex);
LIST_FOREACH(mpreg, &dbmp->dbregq, q)
if (mpreg->ftype == mfp->ftype)
break;
MUTEX_UNLOCK(dbenv, dbmp->mutex);
if (mpreg == NULL)
return (EPERM);
}
if ((ret = __memp_fcreate(dbenv, &dbmfp)) != 0)
return (ret);
if ((ret = __memp_fopen(dbmfp,
mfp, NULL, DB_DURABLE_UNKNOWN, 0, mfp->stat.st_pagesize)) != 0) {
(void)__memp_fclose(dbmfp, 0);
if (!mfp->deadfile)
return (ret);
dbmfp = NULL;
}
pgwrite:
MVCC_MPROTECT(bhp->buf, mfp->stat.st_pagesize,
PROT_READ | PROT_WRITE | PROT_EXEC);
ret = __memp_pgwrite(dbenv, dbmfp, hp, bhp);
if (dbmfp == NULL)
return (ret);
MUTEX_LOCK(dbenv, dbmp->mutex);
if (dbmfp->ref == 1)
F_SET(dbmfp, MP_FLUSH);
else
--dbmfp->ref;
MUTEX_UNLOCK(dbenv, dbmp->mutex);
return (ret);
}
int
__memp_pgread(dbmfp, hp, bhp, can_create)
DB_MPOOLFILE *dbmfp;
DB_MPOOL_HASH *hp;
BH *bhp;
int can_create;
{
DB_ENV *dbenv;
MPOOLFILE *mfp;
size_t len, nr;
u_int32_t pagesize;
int ret;
dbenv = dbmfp->dbenv;
mfp = dbmfp->mfp;
pagesize = mfp->stat.st_pagesize;
DB_ASSERT(dbenv, !F_ISSET(bhp, BH_DIRTY_CREATE | BH_LOCKED));
DB_ASSERT(dbenv, can_create || !F_ISSET(bhp, BH_DIRTY));
F_SET(bhp, BH_LOCKED | BH_TRASH);
MUTEX_UNLOCK(dbenv, hp->mtx_hash);
nr = 0;
if (dbmfp->fhp != NULL)
if ((ret = __os_io(dbenv, DB_IO_READ, dbmfp->fhp,
bhp->pgno, pagesize, 0, pagesize, bhp->buf, &nr)) != 0)
goto err;
if (nr < pagesize) {
if (!can_create) {
ret = DB_PAGE_NOTFOUND;
goto err;
}
len = mfp->clear_len == DB_CLEARLEN_NOTSET ?
pagesize : mfp->clear_len;
memset(bhp->buf, 0, len);
#if defined(DIAGNOSTIC) || defined(UMRW)
if (len < pagesize)
memset(bhp->buf + len, CLEAR_BYTE, pagesize - len);
#endif
#ifdef HAVE_STATISTICS
++mfp->stat.st_page_create;
} else
++mfp->stat.st_page_in;
#else
}
#endif
ret = mfp->ftype == 0 ? 0 : __memp_pg(dbmfp, bhp, 1);
err: MUTEX_LOCK(dbenv, hp->mtx_hash);
F_CLR(bhp, BH_LOCKED);
if (ret == 0)
F_CLR(bhp, BH_TRASH);
if (F_ISSET(hp, IO_WAITER)) {
F_CLR(hp, IO_WAITER);
MUTEX_UNLOCK(dbenv, hp->mtx_io);
}
return (ret);
}
static int
__memp_pgwrite(dbenv, dbmfp, hp, bhp)
DB_ENV *dbenv;
DB_MPOOLFILE *dbmfp;
DB_MPOOL_HASH *hp;
BH *bhp;
{
DB_LSN lsn;
MPOOLFILE *mfp;
size_t nw;
int callpgin, ret;
mfp = dbmfp == NULL ? NULL : dbmfp->mfp;
callpgin = ret = 0;
DB_ASSERT(dbenv, F_ISSET(bhp, BH_DIRTY));
DB_ASSERT(dbenv, !F_ISSET(bhp, BH_TRASH));
F_SET(bhp, BH_LOCKED);
MUTEX_UNLOCK(dbenv, hp->mtx_hash);
if (mfp == NULL || mfp->deadfile)
goto file_dead;
if (LOGGING_ON(dbenv) && mfp->lsn_off != DB_LSN_OFF_NOTSET &&
!IS_CLIENT_PGRECOVER(dbenv)) {
memcpy(&lsn, bhp->buf + mfp->lsn_off, sizeof(DB_LSN));
if (!IS_NOT_LOGGED_LSN(lsn) &&
(ret = __log_flush(dbenv, &lsn)) != 0)
goto err;
}
#ifdef DIAGNOSTIC
if (LOGGING_ON(dbenv) && !IS_NOT_LOGGED_LSN(LSN(bhp->buf)) &&
!IS_CLIENT_PGRECOVER(dbenv)) {
DB_LOG *dblp;
LOG *lp;
dblp = dbenv->lg_handle;
lp = dblp->reginfo.primary;
if (!lp->db_log_inmemory &&
LOG_COMPARE(&lp->s_lsn, &LSN(bhp->buf)) <= 0) {
MUTEX_LOCK(dbenv, lp->mtx_flush);
DB_ASSERT(dbenv,
LOG_COMPARE(&lp->s_lsn, &LSN(bhp->buf)) > 0);
MUTEX_UNLOCK(dbenv, lp->mtx_flush);
}
}
#endif
if (mfp->ftype != 0 && !F_ISSET(bhp, BH_CALLPGIN)) {
callpgin = 1;
if ((ret = __memp_pg(dbmfp, bhp, 0)) != 0)
goto err;
}
if ((ret = __os_io(
dbenv, DB_IO_WRITE, dbmfp->fhp, bhp->pgno, mfp->stat.st_pagesize,
0, mfp->stat.st_pagesize, bhp->buf, &nw)) != 0) {
__db_errx(dbenv, "%s: write failed for page %lu",
__memp_fn(dbmfp), (u_long)bhp->pgno);
goto err;
}
STAT(++mfp->stat.st_page_out);
if (bhp->pgno > mfp->last_flushed_pgno) {
MUTEX_LOCK(dbenv, mfp->mutex);
if (bhp->pgno > mfp->last_flushed_pgno)
mfp->last_flushed_pgno = bhp->pgno;
MUTEX_UNLOCK(dbenv, mfp->mutex);
}
err:
file_dead:
MUTEX_LOCK(dbenv, hp->mtx_hash);
if (callpgin)
F_SET(bhp, BH_CALLPGIN);
if (ret == 0) {
DB_ASSERT(dbenv, hp->hash_page_dirty != 0);
--hp->hash_page_dirty;
F_CLR(bhp, BH_DIRTY | BH_DIRTY_CREATE);
}
bhp->ref_sync = 0;
F_CLR(bhp, BH_LOCKED);
if (F_ISSET(hp, IO_WAITER)) {
F_CLR(hp, IO_WAITER);
MUTEX_UNLOCK(dbenv, hp->mtx_io);
}
return (ret);
}
int
__memp_pg(dbmfp, bhp, is_pgin)
DB_MPOOLFILE *dbmfp;
BH *bhp;
int is_pgin;
{
DBT dbt, *dbtp;
DB_ENV *dbenv;
DB_MPOOL *dbmp;
DB_MPREG *mpreg;
MPOOLFILE *mfp;
int ftype, ret;
dbenv = dbmfp->dbenv;
dbmp = dbenv->mp_handle;
mfp = dbmfp->mfp;
if ((ftype = mfp->ftype) == DB_FTYPE_SET)
mpreg = dbmp->pg_inout;
else {
MUTEX_LOCK(dbenv, dbmp->mutex);
LIST_FOREACH(mpreg, &dbmp->dbregq, q)
if (ftype == mpreg->ftype)
break;
MUTEX_UNLOCK(dbenv, dbmp->mutex);
}
if (mpreg == NULL)
return (0);
if (mfp->pgcookie_len == 0)
dbtp = NULL;
else {
DB_SET_DBT(dbt, R_ADDR(
dbmp->reginfo, mfp->pgcookie_off), mfp->pgcookie_len);
dbtp = &dbt;
}
if (is_pgin) {
if (mpreg->pgin != NULL &&
(ret = mpreg->pgin(dbenv, bhp->pgno, bhp->buf, dbtp)) != 0)
goto err;
} else
if (mpreg->pgout != NULL &&
(ret = mpreg->pgout(dbenv, bhp->pgno, bhp->buf, dbtp)) != 0)
goto err;
return (0);
err: __db_errx(dbenv, "%s: %s failed for page %lu",
__memp_fn(dbmfp), is_pgin ? "pgin" : "pgout", (u_long)bhp->pgno);
return (ret);
}
int
__memp_bhfree(dbmp, infop, hp, bhp, flags)
DB_MPOOL *dbmp;
REGINFO *infop;
DB_MPOOL_HASH *hp;
BH *bhp;
u_int32_t flags;
{
DB_ENV *dbenv;
#ifdef DIAGNOSTIC
DB_LSN vlsn;
#endif
MPOOL *c_mp;
MPOOLFILE *mfp;
BH *next_bhp, *prev_bhp;
int reorder, ret, t_ret;
#ifdef DIAG_MVCC
size_t pagesize;
#endif
ret = 0;
dbenv = dbmp->dbenv;
mfp = R_ADDR(dbmp->reginfo, bhp->mf_offset);
#ifdef DIAG_MVCC
pagesize = mfp->stat.st_pagesize;
#endif
DB_ASSERT(dbenv, bhp->ref == 0 && !F_ISSET(bhp, BH_FROZEN));
DB_ASSERT(dbenv, LF_ISSET(BH_FREE_UNLOCKED) ||
SH_CHAIN_SINGLETON(bhp, vc) ||
(SH_CHAIN_HASNEXT(bhp, vc) &&
SH_CHAIN_NEXTP(bhp, vc, __bh)->td_off == bhp->td_off) ||
(SH_CHAIN_HASPREV(bhp, vc) ?
IS_MAX_LSN(*VISIBLE_LSN(dbenv, bhp)) :
BH_OBSOLETE(bhp, hp->old_reader, vlsn)));
reorder = (__memp_bh_priority(bhp) == bhp->priority);
prev_bhp = SH_CHAIN_PREV(bhp, vc, __bh);
if ((next_bhp = SH_CHAIN_NEXT(bhp, vc, __bh)) == NULL) {
if (prev_bhp != NULL)
SH_TAILQ_INSERT_AFTER(&hp->hash_bucket,
bhp, prev_bhp, hq, __bh);
SH_TAILQ_REMOVE(&hp->hash_bucket, bhp, hq, __bh);
next_bhp = prev_bhp;
}
SH_CHAIN_REMOVE(bhp, vc, __bh);
if (reorder) {
if (next_bhp != NULL)
__memp_bucket_reorder(dbenv, hp, next_bhp);
else
hp->hash_priority = SH_TAILQ_EMPTY(&hp->hash_bucket) ?
0 : BH_PRIORITY(
SH_TAILQ_FIRSTP(&hp->hash_bucket, __bh));
}
#ifdef DIAGNOSTIC
__memp_check_order(dbenv, hp);
#endif
if (bhp->td_off != INVALID_ROFF && !LF_ISSET(BH_FREE_UNLOCKED)) {
ret = __txn_remove_buffer(
dbenv, BH_OWNER(dbenv, bhp), hp->mtx_hash);
bhp->td_off = INVALID_ROFF;
}
MVCC_MPROTECT(bhp->buf, pagesize, PROT_READ | PROT_WRITE | PROT_EXEC);
if (LF_ISSET(BH_FREE_REUSE))
return (0);
if (!LF_ISSET(BH_FREE_UNLOCKED))
MUTEX_UNLOCK(dbenv, hp->mtx_hash);
if (LF_ISSET(BH_FREE_FREEMEM)) {
MPOOL_REGION_LOCK(dbenv, infop);
__memp_free(infop, mfp, bhp);
c_mp = infop->primary;
c_mp->stat.st_pages--;
MPOOL_REGION_UNLOCK(dbenv, infop);
}
MUTEX_LOCK(dbenv, mfp->mutex);
if (--mfp->block_cnt == 0 && mfp->mpf_cnt == 0) {
if ((t_ret = __memp_mf_discard(dbmp, mfp)) != 0 && ret == 0)
ret = t_ret;
} else
MUTEX_UNLOCK(dbenv, mfp->mutex);
return (ret);
}