#include "db_config.h"
#ifndef lint
static const char copyright[] =
"Copyright (c) 1996-2003\nSleepycat Software Inc. All rights reserved.\n";
static const char revid[] =
"$Id: env_recover.c,v 1.2 2004/03/30 01:23:16 jtownsen Exp $";
#endif
#ifndef NO_SYSTEM_INCLUDES
#include <sys/types.h>
#if TIME_WITH_SYS_TIME
#include <sys/time.h>
#include <time.h>
#else
#if HAVE_SYS_TIME_H
#include <sys/time.h>
#else
#include <time.h>
#endif
#endif
#include <string.h>
#endif
#include "db_int.h"
#include "dbinc/db_page.h"
#include "dbinc/db_shash.h"
#include "dbinc/log.h"
#include "dbinc/txn.h"
#include "dbinc/mp.h"
#include "dbinc/db_am.h"
static int __log_backup __P((DB_ENV *, DB_LOGC *, DB_LSN *, DB_LSN *));
static int __log_earliest __P((DB_ENV *, DB_LOGC *, int32_t *, DB_LSN *));
static double __lsn_diff __P((DB_LSN *, DB_LSN *, DB_LSN *, u_int32_t, int));
int
__db_apprec(dbenv, max_lsn, trunclsn, update, flags)
DB_ENV *dbenv;
DB_LSN *max_lsn, *trunclsn;
u_int32_t update, flags;
{
DBT data;
DB_LOGC *logc;
DB_LSN ckp_lsn, first_lsn, last_lsn, lowlsn, lsn, stop_lsn;
DB_REP *db_rep;
DB_TXNREGION *region;
REP *rep;
__txn_ckp_args *ckp_args;
time_t now, tlow;
int32_t log_size, low;
double nfiles;
int have_rec, is_thread, progress, ret, t_ret;
int (**dtab) __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *));
u_int32_t hi_txn, txnid;
char *p, *pass, t1[60], t2[60];
void *txninfo;
COMPQUIET(nfiles, (double)0);
logc = NULL;
ckp_args = NULL;
dtab = NULL;
hi_txn = TXN_MAXIMUM;
txninfo = NULL;
pass = "initial";
log_size =
((LOG *)(((DB_LOG *)dbenv->lg_handle)->reginfo.primary))->log_size;
is_thread = F_ISSET(dbenv, DB_ENV_THREAD) ? 1 : 0;
F_CLR(dbenv, DB_ENV_THREAD);
if (update && (db_rep = dbenv->rep_handle) != NULL &&
(rep = db_rep->region) != NULL)
(void)time(&rep->timestamp);
F_SET((DB_LOG *)dbenv->lg_handle, DBLOG_RECOVER);
region = ((DB_TXNMGR *)dbenv->tx_handle)->reginfo.primary;
F_SET(region, TXN_IN_RECOVERY);
if ((ret = __log_cursor(dbenv, &logc)) != 0)
goto err;
ZERO_LSN(lowlsn);
if (max_lsn != NULL) {
if ((ret = __log_backup(dbenv, logc, max_lsn, &lowlsn)) != 0)
goto err;
} else if (dbenv->tx_timestamp != 0) {
if ((ret = __log_earliest(dbenv, logc, &low, &lowlsn)) != 0)
goto err;
if ((int32_t)dbenv->tx_timestamp < low) {
(void)snprintf(t1, sizeof(t1),
"%s", ctime(&dbenv->tx_timestamp));
if ((p = strchr(t1, '\n')) != NULL)
*p = '\0';
tlow = (time_t)low;
(void)snprintf(t2, sizeof(t2), "%s", ctime(&tlow));
if ((p = strchr(t2, '\n')) != NULL)
*p = '\0';
__db_err(dbenv,
"Invalid recovery timestamp %s; earliest time is %s",
t1, t2);
ret = EINVAL;
goto err;
}
}
#ifdef UMRW
ZERO_LSN(last_lsn);
#endif
memset(&data, 0, sizeof(data));
if ((ret = __log_c_get(logc, &last_lsn, &data, DB_LAST)) != 0) {
if (ret == DB_NOTFOUND)
ret = 0;
else
__db_err(dbenv, "Last log record not found");
goto err;
}
do {
memcpy(&txnid,
(u_int8_t *)data.data + sizeof(u_int32_t), sizeof(txnid));
if (txnid != 0)
break;
} while ((ret = __log_c_get(logc, &lsn, &data, DB_PREV)) == 0);
if (ret == DB_NOTFOUND)
ret = 0;
else if (ret != 0)
goto err;
hi_txn = txnid;
if ((ret = __log_c_get(logc, &ckp_lsn, &data, DB_FIRST)) != 0) {
if (ret == DB_NOTFOUND)
ret = 0;
else
__db_err(dbenv, "First log record not found");
goto err;
}
first_lsn = ckp_lsn;
have_rec = 1;
if (!LF_ISSET(DB_RECOVER_FATAL)) {
if ((ret = __txn_getckp(dbenv, &ckp_lsn)) == 0 &&
(ret = __log_c_get(logc, &ckp_lsn, &data, DB_SET)) == 0) {
if ((ret = __txn_ckp_read(dbenv,
data.data, &ckp_args)) != 0) {
__db_err(dbenv,
"Invalid checkpoint record at [%ld][%ld]",
(u_long)ckp_lsn.file,
(u_long)ckp_lsn.offset);
goto err;
}
first_lsn = ckp_args->ckp_lsn;
have_rec = 0;
}
if ((dbenv->tx_timestamp != 0 || max_lsn != NULL) &&
log_compare(&lowlsn, &first_lsn) < 0) {
DB_ASSERT(have_rec == 0);
first_lsn = lowlsn;
}
}
if (!have_rec &&
(ret = __log_c_get(logc, &first_lsn, &data, DB_SET)) != 0) {
__db_err(dbenv, "Checkpoint LSN record [%ld][%ld] not found",
(u_long)first_lsn.file, (u_long)first_lsn.offset);
goto err;
}
if (dbenv->db_feedback != NULL) {
if (last_lsn.file == first_lsn.file)
nfiles = (double)
(last_lsn.offset - first_lsn.offset) / log_size;
else
nfiles = (double)(last_lsn.file - first_lsn.file) +
(double)(log_size - first_lsn.offset +
last_lsn.offset) / log_size;
if (nfiles == 0)
nfiles = (double)0.001;
}
ret = 0;
if (hi_txn != 0) do {
memcpy(&txnid,
(u_int8_t *)data.data + sizeof(u_int32_t), sizeof(txnid));
if (txnid != 0)
break;
} while ((ret = __log_c_get(logc, &lsn, &data, DB_NEXT)) == 0);
if (ret == DB_NOTFOUND)
ret = 0;
if (ret != 0 ||
(ret = __log_c_get(logc, &first_lsn, &data, DB_SET)) != 0)
goto err;
if ((ret =
__db_txnlist_init(dbenv, txnid, hi_txn, max_lsn, &txninfo)) != 0)
goto err;
if ((ret = __env_openfiles(dbenv, logc,
txninfo, &data, &first_lsn, &last_lsn, nfiles, 1)) != 0)
goto err;
if (hi_txn == 0 && max_lsn == NULL)
goto done;
if (FLD_ISSET(dbenv->verbose, DB_VERB_RECOVERY))
__db_err(dbenv, "Recovery starting from [%lu][%lu]",
(u_long)first_lsn.file, (u_long)first_lsn.offset);
pass = "backward";
for (ret = __log_c_get(logc, &lsn, &data, DB_LAST);
ret == 0 && log_compare(&lsn, &first_lsn) >= 0;
ret = __log_c_get(logc, &lsn, &data, DB_PREV)) {
if (dbenv->db_feedback != NULL) {
progress = 34 + (int)(33 * (__lsn_diff(&first_lsn,
&last_lsn, &lsn, log_size, 0) / nfiles));
dbenv->db_feedback(dbenv, DB_RECOVER, progress);
}
ret = __db_dispatch(dbenv, dbenv->recover_dtab,
dbenv->recover_dtab_size, &data, &lsn,
DB_TXN_BACKWARD_ROLL, txninfo);
if (ret != 0) {
if (ret != DB_TXN_CKP)
goto msgerr;
else
ret = 0;
}
}
if (ret != 0 && ret != DB_NOTFOUND)
goto err;
pass = "forward";
stop_lsn = last_lsn;
if (max_lsn != NULL || dbenv->tx_timestamp != 0)
stop_lsn = ((DB_TXNHEAD *)txninfo)->maxlsn;
for (ret = __log_c_get(logc, &lsn, &data, DB_NEXT);
ret == 0; ret = __log_c_get(logc, &lsn, &data, DB_NEXT)) {
if (log_compare(&lsn, &stop_lsn) > 0)
break;
if (dbenv->db_feedback != NULL) {
progress = 67 + (int)(33 * (__lsn_diff(&first_lsn,
&last_lsn, &lsn, log_size, 1) / nfiles));
dbenv->db_feedback(dbenv, DB_RECOVER, progress);
}
ret = __db_dispatch(dbenv, dbenv->recover_dtab,
dbenv->recover_dtab_size, &data, &lsn,
DB_TXN_FORWARD_ROLL, txninfo);
if (ret != 0) {
if (ret != DB_TXN_CKP)
goto msgerr;
else
ret = 0;
}
}
if (ret != 0 && ret != DB_NOTFOUND)
goto err;
if ((ret = __db_do_the_limbo(dbenv, NULL, NULL, txninfo,
dbenv->tx_timestamp != 0 ? LIMBO_TIMESTAMP : LIMBO_RECOVER)) != 0)
goto err;
if (max_lsn == NULL)
region->last_txnid = ((DB_TXNHEAD *)txninfo)->maxid;
if (dbenv->tx_timestamp != 0) {
if (logc != NULL && (ret = __log_c_close(logc)) != 0)
goto err;
logc = NULL;
if ((ret = __memp_sync(dbenv, NULL)) != 0)
goto err;
region->last_ckp = ((DB_TXNHEAD *)txninfo)->ckplsn;
__log_vtruncate(dbenv, &((DB_TXNHEAD *)txninfo)->maxlsn,
&((DB_TXNHEAD *)txninfo)->ckplsn, trunclsn);
if ((ret = __db_do_the_limbo(dbenv,
NULL, NULL, txninfo, LIMBO_COMPENSATE)) != 0)
goto err;
}
if ((ret = __txn_checkpoint(dbenv, 0, 0, DB_FORCE)) != 0)
goto err;
if ((ret = __dbreg_close_files(dbenv)) != 0)
goto err;
done:
if (max_lsn != NULL) {
region->last_ckp = ((DB_TXNHEAD *)txninfo)->ckplsn;
if (logc != NULL && (ret = __log_c_close(logc)) != 0)
goto err;
__log_vtruncate(dbenv,
max_lsn, &((DB_TXNHEAD *)txninfo)->ckplsn, trunclsn);
if ((ret = __log_cursor(dbenv, &logc)) != 0)
goto err;
if ((ret =
__log_c_get(logc, &first_lsn, &data, DB_FIRST)) != 0) {
if (ret == DB_NOTFOUND)
ret = 0;
else
__db_err(dbenv, "First log record not found");
goto err;
}
if ((ret = __txn_getckp(dbenv, &first_lsn)) == 0 &&
(ret = __log_c_get(logc, &first_lsn, &data, DB_SET)) == 0) {
if ((ret = __txn_ckp_read(dbenv,
data.data, &ckp_args)) != 0) {
__db_err(dbenv,
"Invalid checkpoint record at [%ld][%ld]",
(u_long)first_lsn.file,
(u_long)first_lsn.offset);
goto err;
}
first_lsn = ckp_args->ckp_lsn;
}
if ((ret = __log_c_get(logc, &first_lsn, &data, DB_SET)) != 0)
goto err;
if ((ret = __env_openfiles(dbenv, logc,
txninfo, &data, &first_lsn, NULL, nfiles, 1)) != 0)
goto err;
} else if (region->stat.st_nrestores == 0)
if ((ret = __txn_reset(dbenv)) != 0)
goto err;
if (FLD_ISSET(dbenv->verbose, DB_VERB_RECOVERY)) {
(void)time(&now);
__db_err(dbenv, "Recovery complete at %.24s", ctime(&now));
__db_err(dbenv, "%s %lx %s [%lu][%lu]",
"Maximum transaction ID",
(u_long)(txninfo == NULL ?
TXN_MINIMUM : ((DB_TXNHEAD *)txninfo)->maxid),
"Recovery checkpoint",
(u_long)region->last_ckp.file,
(u_long)region->last_ckp.offset);
}
if (0) {
msgerr: __db_err(dbenv,
"Recovery function for LSN %lu %lu failed on %s pass",
(u_long)lsn.file, (u_long)lsn.offset, pass);
}
err: if (logc != NULL && (t_ret = __log_c_close(logc)) != 0 && ret == 0)
ret = t_ret;
if (txninfo != NULL)
__db_txnlist_end(dbenv, txninfo);
if (dtab != NULL)
__os_free(dbenv, dtab);
if (ckp_args != NULL)
__os_free(dbenv, ckp_args);
dbenv->tx_timestamp = 0;
if (is_thread)
F_SET(dbenv, DB_ENV_THREAD);
F_CLR((DB_LOG *)dbenv->lg_handle, DBLOG_RECOVER);
F_CLR(region, TXN_IN_RECOVERY);
return (ret);
}
static double
__lsn_diff(low, high, current, max, is_forward)
DB_LSN *low, *high, *current;
u_int32_t max;
int is_forward;
{
double nf;
if (is_forward) {
if (current->file == low->file)
nf = (double)(current->offset - low->offset) / max;
else if (current->offset < low->offset)
nf = (double)(current->file - low->file - 1) +
(double)(max - low->offset + current->offset) / max;
else
nf = (double)(current->file - low->file) +
(double)(current->offset - low->offset) / max;
} else {
if (current->file == high->file)
nf = (double)(high->offset - current->offset) / max;
else if (current->offset > high->offset)
nf = (double)(high->file - current->file - 1) +
(double)
(max - current->offset + high->offset) / max;
else
nf = (double)(high->file - current->file) +
(double)(high->offset - current->offset) / max;
}
return (nf);
}
static int
__log_backup(dbenv, logc, max_lsn, start_lsn)
DB_ENV *dbenv;
DB_LOGC *logc;
DB_LSN *max_lsn, *start_lsn;
{
DB_LSN lsn;
DBT data;
__txn_ckp_args *ckp_args;
int ret;
memset(&data, 0, sizeof(data));
ckp_args = NULL;
if ((ret = __txn_getckp(dbenv, &lsn)) != 0)
goto err;
while ((ret = __log_c_get(logc, &lsn, &data, DB_SET)) == 0) {
if ((ret = __txn_ckp_read(dbenv, data.data, &ckp_args)) != 0)
return (ret);
if (log_compare(&ckp_args->ckp_lsn, max_lsn) <= 0) {
*start_lsn = ckp_args->ckp_lsn;
break;
}
lsn = ckp_args->last_ckp;
if (IS_ZERO_LSN(lsn))
break;
__os_free(dbenv, ckp_args);
}
if (ckp_args != NULL)
__os_free(dbenv, ckp_args);
err: if (IS_ZERO_LSN(*start_lsn) && (ret == 0 || ret == DB_NOTFOUND))
ret = __log_c_get(logc, start_lsn, &data, DB_FIRST);
return (ret);
}
static int
__log_earliest(dbenv, logc, lowtime, lowlsn)
DB_ENV *dbenv;
DB_LOGC *logc;
int32_t *lowtime;
DB_LSN *lowlsn;
{
DB_LSN first_lsn, lsn;
DBT data;
__txn_ckp_args *ckpargs;
u_int32_t rectype;
int cmp, ret;
memset(&data, 0, sizeof(data));
for (ret = __log_c_get(logc, &first_lsn, &data, DB_FIRST);
ret == 0; ret = __log_c_get(logc, &lsn, &data, DB_NEXT)) {
memcpy(&rectype, data.data, sizeof(rectype));
if (rectype != DB___txn_ckp)
continue;
if ((ret = __txn_ckp_read(dbenv, data.data, &ckpargs)) == 0) {
cmp = log_compare(&ckpargs->ckp_lsn, &first_lsn);
*lowlsn = ckpargs->ckp_lsn;
*lowtime = ckpargs->timestamp;
__os_free(dbenv, ckpargs);
if (cmp >= 0)
break;
}
}
return (ret);
}
int
__env_openfiles(dbenv, logc, txninfo,
data, open_lsn, last_lsn, nfiles, in_recovery)
DB_ENV *dbenv;
DB_LOGC *logc;
void *txninfo;
DBT *data;
DB_LSN *open_lsn, *last_lsn;
int in_recovery;
double nfiles;
{
DB_LSN lsn;
u_int32_t log_size;
int progress, ret;
log_size =
((LOG *)(((DB_LOG *)dbenv->lg_handle)->reginfo.primary))->log_size;
lsn = *open_lsn;
for (;;) {
if (in_recovery && dbenv->db_feedback != NULL) {
DB_ASSERT(last_lsn != NULL);
progress = (int)(33 * (__lsn_diff(open_lsn,
last_lsn, &lsn, log_size, 1) / nfiles));
dbenv->db_feedback(dbenv, DB_RECOVER, progress);
}
ret = __db_dispatch(dbenv,
dbenv->recover_dtab, dbenv->recover_dtab_size, data, &lsn,
in_recovery ? DB_TXN_OPENFILES : DB_TXN_POPENFILES,
txninfo);
if (ret != 0 && ret != DB_TXN_CKP) {
__db_err(dbenv,
"Recovery function for LSN %lu %lu failed",
(u_long)lsn.file, (u_long)lsn.offset);
break;
}
if ((ret = __log_c_get(logc, &lsn, data, DB_NEXT)) != 0) {
if (ret == DB_NOTFOUND)
ret = 0;
break;
}
}
return (ret);
}