#include "db_config.h"
#include "db_int.h"
#include "dbinc/mp.h"
static int __memp_init_config __P((ENV *, MPOOL *));
static void __memp_region_size __P((ENV *, roff_t *, u_int32_t *));
int
__memp_open(env, create_ok)
ENV *env;
int create_ok;
{
DB_ENV *dbenv;
DB_MPOOL *dbmp;
MPOOL *mp;
REGINFO reginfo;
roff_t reg_size;
u_int i, max_nreg;
u_int32_t htab_buckets, *regids;
int ret;
dbenv = env->dbenv;
__memp_region_size(env, ®_size, &htab_buckets);
if ((ret = __os_calloc(env, 1, sizeof(*dbmp), &dbmp)) != 0)
return (ret);
LIST_INIT(&dbmp->dbregq);
TAILQ_INIT(&dbmp->dbmfq);
dbmp->env = env;
memset(®info, 0, sizeof(REGINFO));
reginfo.env = env;
reginfo.type = REGION_TYPE_MPOOL;
reginfo.id = INVALID_REGION_ID;
reginfo.flags = REGION_JOIN_OK;
if (create_ok)
F_SET(®info, REGION_CREATE_OK);
if ((ret = __env_region_attach(env, ®info, reg_size)) != 0)
goto err;
if (F_ISSET(®info, REGION_CREATE)) {
max_nreg = __memp_max_regions(env);
if ((ret = __os_calloc(env,
max_nreg, sizeof(REGINFO), &dbmp->reginfo)) != 0)
goto err;
dbmp->reginfo[0] = reginfo;
for (i = 1; i < max_nreg; ++i)
dbmp->reginfo[i].id = INVALID_REGION_ID;
if ((ret = __memp_init(env, dbmp,
0, htab_buckets, max_nreg)) != 0)
goto err;
mp = R_ADDR(dbmp->reginfo, dbmp->reginfo[0].rp->primary);
regids = R_ADDR(dbmp->reginfo, mp->regids);
regids[0] = dbmp->reginfo[0].id;
for (i = 1; i < dbenv->mp_ncache; ++i) {
dbmp->reginfo[i].env = env;
dbmp->reginfo[i].type = REGION_TYPE_MPOOL;
dbmp->reginfo[i].id = INVALID_REGION_ID;
dbmp->reginfo[i].flags = REGION_CREATE_OK;
if ((ret = __env_region_attach(
env, &dbmp->reginfo[i], reg_size)) != 0)
goto err;
if ((ret = __memp_init(env, dbmp,
i, htab_buckets, max_nreg)) != 0)
goto err;
regids[i] = dbmp->reginfo[i].id;
}
} else {
mp = R_ADDR(®info, reginfo.rp->primary);
dbenv->mp_ncache = mp->nreg;
if ((ret = __os_calloc(env,
mp->max_nreg, sizeof(REGINFO), &dbmp->reginfo)) != 0)
goto err;
for (i = 0; i < dbenv->mp_ncache; ++i)
dbmp->reginfo[i].id = INVALID_REGION_ID;
dbmp->reginfo[0] = reginfo;
regids = R_ADDR(dbmp->reginfo, mp->regids);
for (i = 1; i < dbenv->mp_ncache; ++i) {
dbmp->reginfo[i].env = env;
dbmp->reginfo[i].type = REGION_TYPE_MPOOL;
dbmp->reginfo[i].id = regids[i];
dbmp->reginfo[i].flags = REGION_JOIN_OK;
if ((ret = __env_region_attach(
env, &dbmp->reginfo[i], 0)) != 0)
goto err;
}
}
for (i = 0; i < dbenv->mp_ncache; ++i)
dbmp->reginfo[i].primary =
R_ADDR(&dbmp->reginfo[i], dbmp->reginfo[i].rp->primary);
if ((ret = __mutex_alloc(env,
MTX_MPOOL_HANDLE, DB_MUTEX_PROCESS_ONLY, &dbmp->mutex)) != 0)
goto err;
env->mp_handle = dbmp;
if ((ret = __memp_init_config(env, mp)) != 0)
return (ret);
return (0);
err: env->mp_handle = NULL;
if (dbmp->reginfo != NULL && dbmp->reginfo[0].addr != NULL) {
for (i = 0; i < dbenv->mp_ncache; ++i)
if (dbmp->reginfo[i].id != INVALID_REGION_ID)
(void)__env_region_detach(
env, &dbmp->reginfo[i], 0);
__os_free(env, dbmp->reginfo);
}
(void)__mutex_free(env, &dbmp->mutex);
__os_free(env, dbmp);
return (ret);
}
int
__memp_init(env, dbmp, reginfo_off, htab_buckets, max_nreg)
ENV *env;
DB_MPOOL *dbmp;
u_int reginfo_off, max_nreg;
u_int32_t htab_buckets;
{
BH *frozen_bhp;
BH_FROZEN_ALLOC *frozen;
DB_ENV *dbenv;
DB_MPOOL_HASH *htab, *hp;
MPOOL *mp, *main_mp;
REGINFO *infop;
db_mutex_t mtx_base, mtx_discard, mtx_prev;
u_int32_t i;
int ret;
void *p;
dbenv = env->dbenv;
infop = &dbmp->reginfo[reginfo_off];
if ((ret = __env_alloc(infop, sizeof(MPOOL), &infop->primary)) != 0)
goto mem_err;
infop->rp->primary = R_OFFSET(infop, infop->primary);
mp = infop->primary;
memset(mp, 0, sizeof(*mp));
if ((ret =
__mutex_alloc(env, MTX_MPOOL_REGION, 0, &mp->mtx_region)) != 0)
return (ret);
if (reginfo_off == 0) {
ZERO_LSN(mp->lsn);
mp->nreg = dbenv->mp_ncache;
mp->max_nreg = max_nreg;
if ((ret = __env_alloc(&dbmp->reginfo[0],
max_nreg * sizeof(u_int32_t), &p)) != 0)
goto mem_err;
mp->regids = R_OFFSET(dbmp->reginfo, p);
mp->nbuckets = dbenv->mp_ncache * htab_buckets;
if ((ret = __env_alloc(infop,
MPOOL_FILE_BUCKETS * sizeof(DB_MPOOL_HASH), &htab)) != 0)
goto mem_err;
mp->ftab = R_OFFSET(infop, htab);
for (i = 0; i < MPOOL_FILE_BUCKETS; i++) {
if ((ret = __mutex_alloc(env,
MTX_MPOOL_FILE_BUCKET, 0, &htab[i].mtx_hash)) != 0)
return (ret);
SH_TAILQ_INIT(&htab[i].hash_bucket);
htab[i].hash_page_dirty = 0;
}
mtx_base = mtx_prev = MUTEX_INVALID;
for (i = 0; i < mp->max_nreg * htab_buckets; i++) {
if ((ret = __mutex_alloc(env, MTX_MPOOL_HASH_BUCKET,
0, &mtx_discard)) != 0)
return (ret);
if (i == 0) {
mtx_base = mtx_discard;
mtx_prev = mtx_discard - 1;
}
DB_ASSERT(env, mtx_discard == mtx_prev + 1 ||
mtx_base == MUTEX_INVALID);
mtx_prev = mtx_discard;
if ((ret = __mutex_alloc(env, MTX_MPOOL_IO,
DB_MUTEX_SELF_BLOCK, &mtx_discard)) != 0)
return (ret);
DB_ASSERT(env, mtx_discard == mtx_prev + 1 ||
mtx_base == MUTEX_INVALID);
mtx_prev = mtx_discard;
}
} else {
main_mp = dbmp->reginfo[0].primary;
htab = R_ADDR(&dbmp->reginfo[0], main_mp->htab);
mtx_base = htab[0].mtx_hash;
}
if (mtx_base != MUTEX_INVALID)
mtx_base += reginfo_off * htab_buckets * 2;
if ((ret = __env_alloc(infop,
htab_buckets * sizeof(DB_MPOOL_HASH), &htab)) != 0)
goto mem_err;
mp->htab = R_OFFSET(infop, htab);
for (i = 0; i < htab_buckets; i++) {
hp = &htab[i];
hp->mtx_hash = (mtx_base == MUTEX_INVALID) ? MUTEX_INVALID :
mtx_base + i * 2;
hp->mtx_io = (mtx_base == MUTEX_INVALID) ? MUTEX_INVALID :
mtx_base + i * 2 + 1;
SH_TAILQ_INIT(&hp->hash_bucket);
hp->hash_page_dirty = 0;
#ifdef HAVE_STATISTICS
hp->hash_io_wait = 0;
hp->hash_frozen = hp->hash_thawed = hp->hash_frozen_freed = 0;
#endif
hp->flags = 0;
ZERO_LSN(hp->old_reader);
}
mp->htab_buckets = htab_buckets;
#ifdef HAVE_STATISTICS
mp->stat.st_hash_buckets = htab_buckets;
#endif
SH_TAILQ_INIT(&mp->free_frozen);
SH_TAILQ_INIT(&mp->alloc_frozen);
if ((ret = __env_alloc(infop,
sizeof(BH_FROZEN_ALLOC) + sizeof(BH_FROZEN_PAGE), &frozen)) != 0)
goto mem_err;
frozen_bhp = (BH *)(frozen + 1);
SH_TAILQ_INSERT_TAIL(&mp->alloc_frozen, frozen, links);
SH_TAILQ_INSERT_TAIL(&mp->free_frozen, frozen_bhp, hq);
mp->stat.st_gbytes = dbenv->mp_gbytes;
mp->stat.st_bytes = dbenv->mp_bytes;
return (0);
mem_err:__db_errx(env, "Unable to allocate memory for mpool region");
return (ret);
}
u_int32_t
__memp_max_regions(env)
ENV *env;
{
DB_ENV *dbenv;
roff_t reg_size, max_size;
size_t max_nreg;
dbenv = env->dbenv;
__memp_region_size(env, ®_size, NULL);
max_size =
(roff_t)dbenv->mp_max_gbytes * GIGABYTE + dbenv->mp_max_bytes;
max_nreg = (max_size + reg_size / 2) / reg_size;
DB_ASSERT(env, max_nreg == (u_int32_t)max_nreg);
if (max_nreg <= dbenv->mp_ncache)
max_nreg = dbenv->mp_ncache;
return ((u_int32_t)max_nreg);
}
static void
__memp_region_size(env, reg_sizep, htab_bucketsp)
ENV *env;
roff_t *reg_sizep;
u_int32_t *htab_bucketsp;
{
DB_ENV *dbenv;
roff_t reg_size, cache_size;
dbenv = env->dbenv;
cache_size = (roff_t)dbenv->mp_gbytes * GIGABYTE + dbenv->mp_bytes;
reg_size = cache_size / dbenv->mp_ncache;
if (reg_sizep != NULL)
*reg_sizep = reg_size;
if (htab_bucketsp != NULL)
*htab_bucketsp =
__db_tablesize((u_int32_t)(reg_size / (10 * 1024)));
}
u_int32_t
__memp_region_mutex_count(env)
ENV *env;
{
DB_ENV *dbenv;
u_int32_t htab_buckets;
dbenv = env->dbenv;
__memp_region_size(env, NULL, &htab_buckets);
return (dbenv->mp_ncache * htab_buckets * 2 + 50 + MPOOL_FILE_BUCKETS);
}
static int
__memp_init_config(env, mp)
ENV *env;
MPOOL *mp;
{
DB_ENV *dbenv;
dbenv = env->dbenv;
MPOOL_SYSTEM_LOCK(env);
if (dbenv->mp_mmapsize != 0)
mp->mp_mmapsize = dbenv->mp_mmapsize;
if (dbenv->mp_maxopenfd != 0)
mp->mp_maxopenfd = dbenv->mp_maxopenfd;
if (dbenv->mp_maxwrite != 0)
mp->mp_maxwrite = dbenv->mp_maxwrite;
if (dbenv->mp_maxwrite_sleep != 0)
mp->mp_maxwrite_sleep = dbenv->mp_maxwrite_sleep;
MPOOL_SYSTEM_UNLOCK(env);
return (0);
}
int
__memp_env_refresh(env)
ENV *env;
{
BH *bhp;
BH_FROZEN_ALLOC *frozen_alloc;
DB_MPOOL *dbmp;
DB_MPOOLFILE *dbmfp;
DB_MPOOL_HASH *hp;
DB_MPREG *mpreg;
MPOOL *mp, *c_mp;
REGINFO *infop;
db_mutex_t mtx_base, mtx;
u_int32_t bucket, htab_buckets, i, max_nreg, nreg;
int ret, t_ret;
ret = 0;
dbmp = env->mp_handle;
mp = dbmp->reginfo[0].primary;
htab_buckets = mp->htab_buckets;
nreg = mp->nreg;
max_nreg = mp->max_nreg;
hp = R_ADDR(&dbmp->reginfo[0], mp->htab);
mtx_base = hp->mtx_hash;
if (!F_ISSET(env, ENV_PRIVATE))
goto not_priv;
for (i = 0; i < nreg; ++i) {
infop = &dbmp->reginfo[i];
c_mp = infop->primary;
for (hp = R_ADDR(infop, c_mp->htab), bucket = 0;
bucket < c_mp->htab_buckets; ++hp, ++bucket) {
while ((bhp = SH_TAILQ_FIRST(
&hp->hash_bucket, __bh)) != NULL)
if (F_ISSET(bhp, BH_FROZEN))
SH_TAILQ_REMOVE(
&hp->hash_bucket, bhp,
hq, __bh);
else {
if (F_ISSET(bhp, BH_DIRTY)) {
--hp->hash_page_dirty;
F_CLR(bhp,
BH_DIRTY | BH_DIRTY_CREATE);
}
if ((t_ret = __memp_bhfree(
dbmp, infop, hp, bhp,
BH_FREE_FREEMEM |
BH_FREE_UNLOCKED)) != 0 && ret == 0)
ret = t_ret;
}
}
while ((frozen_alloc = SH_TAILQ_FIRST(
&c_mp->alloc_frozen, __bh_frozen_a)) != NULL) {
SH_TAILQ_REMOVE(&c_mp->alloc_frozen, frozen_alloc,
links, __bh_frozen_a);
__env_alloc_free(infop, frozen_alloc);
}
}
if (mtx_base != MUTEX_INVALID)
for (i = 0; i < 2 * max_nreg * htab_buckets; ++i) {
mtx = mtx_base + i;
if ((t_ret = __mutex_free(env, &mtx)) != 0 &&
ret == 0)
ret = t_ret;
}
not_priv:
while ((dbmfp = TAILQ_FIRST(&dbmp->dbmfq)) != NULL)
if ((t_ret = __memp_fclose(dbmfp, 0)) != 0 && ret == 0)
ret = t_ret;
if (dbmp->pg_inout != NULL)
__os_free(env, dbmp->pg_inout);
while ((mpreg = LIST_FIRST(&dbmp->dbregq)) != NULL) {
LIST_REMOVE(mpreg, q);
__os_free(env, mpreg);
}
if ((t_ret = __mutex_free(env, &dbmp->mutex)) != 0 && ret == 0)
ret = t_ret;
if (F_ISSET(env, ENV_PRIVATE)) {
infop = &dbmp->reginfo[0];
__memp_free(infop, NULL, R_ADDR(infop, mp->regids));
__memp_free(infop, NULL, R_ADDR(infop, mp->ftab));
for (i = 0; i < nreg; ++i) {
infop = &dbmp->reginfo[i];
c_mp = infop->primary;
__memp_free(infop, NULL, R_ADDR(infop, c_mp->htab));
}
}
for (i = 0; i < nreg; ++i) {
infop = &dbmp->reginfo[i];
if ((t_ret =
__env_region_detach(env, infop, 0)) != 0 && ret == 0)
ret = t_ret;
}
__os_free(env, dbmp->reginfo);
__os_free(env, dbmp);
env->mp_handle = NULL;
return (ret);
}