#include "db_config.h"
#include "db_int.h"
#include "dbinc/mp.h"
#include "dbinc/txn.h"
static void __memp_bad_buffer __P((DB_ENV *, DB_MPOOL_HASH *));
int
__memp_alloc(dbmp, infop, mfp, len, offsetp, retp)
DB_MPOOL *dbmp;
REGINFO *infop;
MPOOLFILE *mfp;
size_t len;
roff_t *offsetp;
void *retp;
{
BH *bhp, *oldest_bhp, *tbhp;
BH_FROZEN_PAGE *frozen_bhp;
DB_ENV *dbenv;
DB_LSN vlsn;
DB_MPOOL_HASH *dbht, *hp, *hp_end, *hp_tmp;
MPOOL *c_mp;
MPOOLFILE *bh_mfp;
size_t freed_space;
u_int32_t buckets, buffers, high_priority, priority;
u_int32_t put_counter, total_buckets;
int aggressive, alloc_freeze, giveup, got_oldest, ret;
u_int8_t *endp;
void *p;
dbenv = dbmp->dbenv;
c_mp = infop->primary;
dbht = R_ADDR(infop, c_mp->htab);
hp_end = &dbht[c_mp->htab_buckets];
buckets = buffers = put_counter = total_buckets = 0;
aggressive = alloc_freeze = giveup = got_oldest = 0;
hp_tmp = NULL;
STAT(c_mp->stat.st_alloc++);
if (mfp != NULL) {
len = SSZA(BH, buf) + mfp->stat.st_pagesize;
MVCC_BHSIZE(mfp, len);
}
MPOOL_REGION_LOCK(dbenv, infop);
high_priority = c_mp->lru_count - c_mp->stat.st_pages / 10;
alloc: if ((ret = __env_alloc(infop, len, &p)) == 0) {
if (mfp != NULL)
c_mp->stat.st_pages++;
MPOOL_REGION_UNLOCK(dbenv, infop);
MVCC_BHALIGN(mfp, p);
found: if (offsetp != NULL)
*offsetp = R_OFFSET(infop, p);
*(void **)retp = p;
#ifdef HAVE_STATISTICS
total_buckets += buckets;
if (total_buckets != 0) {
if (total_buckets > c_mp->stat.st_alloc_max_buckets)
c_mp->stat.st_alloc_max_buckets = total_buckets;
c_mp->stat.st_alloc_buckets += total_buckets;
}
if (buffers != 0) {
if (buffers > c_mp->stat.st_alloc_max_pages)
c_mp->stat.st_alloc_max_pages = buffers;
c_mp->stat.st_alloc_pages += buffers;
}
#endif
return (0);
} else if (giveup || c_mp->stat.st_pages == 0) {
MPOOL_REGION_UNLOCK(dbenv, infop);
__db_errx(dbenv,
"unable to allocate space from the buffer cache");
return (ret);
}
ret = 0;
freed_space = 0;
total_buckets += buckets;
buckets = 0;
for (;;) {
if (c_mp->stat.st_pages == 0)
goto alloc;
hp = &dbht[c_mp->last_checked++];
if (hp >= hp_end) {
c_mp->last_checked = 0;
hp = &dbht[c_mp->last_checked++];
}
if (buckets++ == c_mp->htab_buckets) {
if (freed_space > 0)
goto alloc;
MPOOL_REGION_UNLOCK(dbenv, infop);
switch (++aggressive) {
case 1:
break;
case 2:
put_counter = c_mp->put_counter;
case 3:
case 4:
case 5:
case 6:
(void)__memp_sync_int(
dbenv, NULL, 0, DB_SYNC_ALLOC, NULL, NULL);
__os_sleep(dbenv, 1, 0);
break;
default:
aggressive = 1;
if (put_counter == c_mp->put_counter)
giveup = 1;
break;
}
MPOOL_REGION_LOCK(dbenv, infop);
goto alloc;
}
if (SH_TAILQ_FIRST(&hp->hash_bucket, __bh) == NULL)
continue;
if (hp->hash_priority == UINT32_MAX)
continue;
if (!aggressive) {
priority = hp->hash_priority;
if (c_mp->lru_reset != 0 &&
c_mp->lru_reset <= hp - dbht)
priority -= MPOOL_BASE_DECREMENT;
if (priority > high_priority)
continue;
if (hp_tmp == NULL) {
hp_tmp = hp;
continue;
}
if (c_mp->lru_reset &&
c_mp->lru_reset <= hp_tmp - dbht) {
if (priority > hp_tmp->hash_priority -
MPOOL_BASE_DECREMENT)
hp = hp_tmp;
} else if (priority > hp_tmp->hash_priority)
hp = hp_tmp;
hp_tmp = NULL;
}
MPOOL_REGION_UNLOCK(dbenv, infop);
MUTEX_LOCK(dbenv, hp->mtx_hash);
priority = hp->hash_priority;
#ifdef DIAGNOSTIC
__memp_check_order(dbenv, hp);
#endif
this_hb: if ((bhp = SH_TAILQ_FIRST(&hp->hash_bucket, __bh)) == NULL)
goto next_hb;
buffers++;
bh_mfp = R_ADDR(dbmp->reginfo, bhp->mf_offset);
for (oldest_bhp = bhp, tbhp = SH_CHAIN_PREV(bhp, vc, __bh);
tbhp != NULL;
oldest_bhp = tbhp, tbhp = SH_CHAIN_PREV(tbhp, vc, __bh))
if (tbhp->ref <= bhp->ref &&
tbhp->priority <= bhp->priority)
bhp = tbhp;
if (oldest_bhp != bhp && oldest_bhp->ref == 0) {
if (F_ISSET(bhp, BH_FROZEN) &&
!F_ISSET(oldest_bhp, BH_FROZEN))
bhp = oldest_bhp;
else if (BH_OBSOLETE(oldest_bhp, hp->old_reader, vlsn))
bhp = oldest_bhp;
else if (!got_oldest &&
__txn_oldest_reader(dbenv, &hp->old_reader) == 0) {
got_oldest = 1;
if (BH_OBSOLETE(
oldest_bhp, hp->old_reader, vlsn))
bhp = oldest_bhp;
}
}
if (bhp->ref != 0 || (bhp != oldest_bhp &&
!aggressive && bhp->priority > priority))
goto next_hb;
ret = 0;
if (F_ISSET(bhp, BH_DIRTY)) {
++bhp->ref;
ret = __memp_bhwrite(dbmp, hp, bh_mfp, bhp, 0);
--bhp->ref;
#ifdef HAVE_STATISTICS
if (ret == 0)
++c_mp->stat.st_rw_evict;
#endif
}
#ifdef HAVE_STATISTICS
else
++c_mp->stat.st_ro_evict;
#endif
if (ret == 0 && bh_mfp->multiversion) {
if (!got_oldest && !SH_CHAIN_HASPREV(bhp, vc) &&
!BH_OBSOLETE(bhp, hp->old_reader, vlsn)) {
(void)__txn_oldest_reader(dbenv,
&hp->old_reader);
got_oldest = 1;
}
if (SH_CHAIN_HASPREV(bhp, vc) ||
!BH_OBSOLETE(bhp, hp->old_reader, vlsn)) {
if (!aggressive ||
F_ISSET(bhp, BH_FROZEN) || bhp->ref != 0)
goto next_hb;
ret = __memp_bh_freeze(dbmp,
infop, hp, bhp, &alloc_freeze);
}
}
if (ret != 0 || bhp->ref != 0) {
if (ret != 0 && aggressive)
__memp_bad_buffer(dbenv, hp);
goto next_hb;
}
if (F_ISSET(bhp, BH_FROZEN)) {
++bhp->ref;
if ((ret = __memp_bh_thaw(dbmp, infop, hp,
bhp, NULL)) != 0) {
MUTEX_UNLOCK(dbenv, hp->mtx_hash);
return (ret);
}
alloc_freeze = 0;
goto this_hb;
} else if (alloc_freeze) {
if ((ret = __memp_bhfree(dbmp, infop, hp, bhp, 0)) != 0)
return (ret);
MVCC_MPROTECT(bhp->buf, bh_mfp->stat.st_pagesize,
PROT_READ | PROT_WRITE | PROT_EXEC);
MPOOL_REGION_LOCK(dbenv, infop);
SH_TAILQ_INSERT_TAIL(&c_mp->alloc_frozen,
(BH_FROZEN_ALLOC *)bhp, links);
frozen_bhp = (BH_FROZEN_PAGE *)
((BH_FROZEN_ALLOC *)bhp + 1);
endp = (u_int8_t *)bhp->buf + bh_mfp->stat.st_pagesize;
while ((u_int8_t *)(frozen_bhp + 1) < endp) {
SH_TAILQ_INSERT_TAIL(&c_mp->free_frozen,
(BH *)frozen_bhp, hq);
frozen_bhp++;
}
alloc_freeze = 0;
continue;
} else if (mfp != NULL &&
mfp->stat.st_pagesize == bh_mfp->stat.st_pagesize) {
if ((ret = __memp_bhfree(dbmp, infop, hp, bhp, 0)) != 0)
return (ret);
p = bhp;
goto found;
} else {
freed_space += sizeof(*bhp) + bh_mfp->stat.st_pagesize;
if ((ret = __memp_bhfree(dbmp, infop,
hp, bhp, BH_FREE_FREEMEM)) != 0)
return (ret);
}
if (aggressive > 1)
aggressive = 1;
if (0) {
next_hb: MUTEX_UNLOCK(dbenv, hp->mtx_hash);
}
MPOOL_REGION_LOCK(dbenv, infop);
if (freed_space >= 3 * len)
goto alloc;
}
}
void
__memp_free(infop, mfp, buf)
REGINFO *infop;
MPOOLFILE *mfp;
void *buf;
{
MVCC_BHUNALIGN(mfp, buf);
COMPQUIET(mfp, NULL);
__env_alloc_free(infop, buf);
}
static void
__memp_bad_buffer(dbenv, hp)
DB_ENV *dbenv;
DB_MPOOL_HASH *hp;
{
BH *bhp, *last_bhp;
u_int32_t priority;
bhp = SH_TAILQ_FIRST(&hp->hash_bucket, __bh);
last_bhp = SH_TAILQ_LASTP(&hp->hash_bucket, hq, __bh);
if (bhp == last_bhp)
return;
SH_TAILQ_REMOVE(&hp->hash_bucket, bhp, hq, __bh);
priority = BH_PRIORITY(last_bhp);
SH_TAILQ_INSERT_TAIL(&hp->hash_bucket, bhp, hq);
for (; bhp != NULL ; bhp = SH_CHAIN_PREV(bhp, vc, __bh))
bhp->priority = priority;
hp->hash_priority = BH_PRIORITY(
SH_TAILQ_FIRSTP(&hp->hash_bucket, __bh));
#ifdef DIAGNOSTIC
__memp_check_order(dbenv, hp);
#else
COMPQUIET(dbenv, NULL);
#endif
}
#ifdef DIAGNOSTIC
void
__memp_check_order(dbenv, hp)
DB_ENV *dbenv;
DB_MPOOL_HASH *hp;
{
BH *bhp, *first_bhp, *tbhp;
u_int32_t dirty, priority, last_priority;
dirty = 0;
last_priority = hp->hash_priority;
for (bhp = first_bhp = SH_TAILQ_FIRST(&hp->hash_bucket, __bh);
bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, hq, __bh)) {
DB_ASSERT(dbenv, !SH_CHAIN_HASNEXT(bhp, vc));
if (F_ISSET(bhp, BH_DIRTY))
dirty++;
priority = BH_PRIORITY(bhp);
DB_ASSERT(dbenv, (bhp == first_bhp) ?
priority == last_priority : priority >= last_priority);
last_priority = priority;
for (tbhp = SH_CHAIN_PREV(bhp, vc, __bh); tbhp != NULL;
tbhp = SH_CHAIN_PREV(tbhp, vc, __bh))
DB_ASSERT(dbenv, tbhp == SH_CHAIN_PREV(
SH_CHAIN_NEXT(tbhp, vc, __bh), vc, __bh));
for (tbhp = SH_TAILQ_NEXT(bhp, hq, __bh); tbhp != NULL;
tbhp = SH_TAILQ_NEXT(tbhp, hq, __bh))
DB_ASSERT(dbenv, bhp->pgno != tbhp->pgno ||
bhp->mf_offset != tbhp->mf_offset);
}
DB_ASSERT(dbenv, dirty == hp->hash_page_dirty);
}
#endif