/*- * See the file LICENSE for redistribution information. * * Copyright (c) 1998,2008 Oracle. All rights reserved. * * $Id: db_am.c,v 12.50 2008/02/18 19:11:59 bschmeck Exp $ */ #include "db_config.h" #include "db_int.h" #include "dbinc/db_page.h" #include "dbinc/btree.h" #include "dbinc/hash.h" #include "dbinc/lock.h" #include "dbinc/log.h" #include "dbinc/mp.h" #include "dbinc/qam.h" #include "dbinc/txn.h" static int __db_append_primary __P((DBC *, DBT *, DBT *)); static int __db_secondary_get __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t)); static int __dbc_set_priority __P((DBC *, DB_CACHE_PRIORITY)); static int __dbc_get_priority __P((DBC *, DB_CACHE_PRIORITY* )); /* * __db_cursor_int -- * Internal routine to create a cursor. * * PUBLIC: int __db_cursor_int __P((DB *, DB_THREAD_INFO *, * PUBLIC: DB_TXN *, DBTYPE, db_pgno_t, int, DB_LOCKER *, DBC **)); */ int __db_cursor_int(dbp, ip, txn, dbtype, root, flags, locker, dbcp) DB *dbp; DB_THREAD_INFO *ip; DB_TXN *txn; DBTYPE dbtype; db_pgno_t root; int flags; DB_LOCKER *locker; DBC **dbcp; { DBC *dbc; DBC_INTERNAL *cp; ENV *env; db_threadid_t tid; int allocated, ret; pid_t pid; env = dbp->env; allocated = 0; /* * If dbcp is non-NULL it is assumed to point to an area to initialize * as a cursor. * * Take one from the free list if it's available. Take only the * right type. With off page dups we may have different kinds * of cursors on the queue for a single database. */ MUTEX_LOCK(env, dbp->mutex); #ifndef HAVE_NO_DB_REFCOUNT /* * If this DBP is being logged then refcount the log filename * relative to this transaction. We do this here because we have * the dbp->mutex which protects the refcount. We want to avoid * calling the function if we are duplicating a cursor. This includes * the case of creating an off page duplicate cursor. If we know this * cursor will not be used in an update, we could avoid this, * but we don't have that information. */ if (txn != NULL && !LF_ISSET(DBC_OPD|DBC_DUPLICATE) && !F_ISSET(dbp, DB_AM_RECOVER) && dbp->log_filename != NULL && !IS_REP_CLIENT(env) && (ret = __txn_record_fname(env, txn, dbp->log_filename)) != 0) return (ret); #endif TAILQ_FOREACH(dbc, &dbp->free_queue, links) if (dbtype == dbc->dbtype) { TAILQ_REMOVE(&dbp->free_queue, dbc, links); F_CLR(dbc, ~DBC_OWN_LID); break; } MUTEX_UNLOCK(env, dbp->mutex); if (dbc == NULL) { if ((ret = __os_calloc(env, 1, sizeof(DBC), &dbc)) != 0) return (ret); allocated = 1; dbc->flags = 0; dbc->dbp = dbp; dbc->dbenv = dbp->dbenv; dbc->env = dbp->env; /* Set up locking information. */ if (LOCKING_ON(env)) { /* * If we are not threaded, we share a locker ID among * all cursors opened in the environment handle, * allocating one if this is the first cursor. * * This relies on the fact that non-threaded DB handles * always have non-threaded environment handles, since * we set DB_THREAD on DB handles created with threaded * environment handles. */ if (!DB_IS_THREADED(dbp)) { if (env->env_lref == NULL && (ret = __lock_id(env, NULL, &env->env_lref)) != 0) goto err; dbc->lref = env->env_lref; } else { if ((ret = __lock_id(env, NULL, &dbc->lref)) != 0) goto err; F_SET(dbc, DBC_OWN_LID); } /* * In CDB, secondary indices should share a lock file * ID with the primary; otherwise we're susceptible * to deadlocks. We also use __db_cursor_int rather * than __db_cursor to create secondary update cursors * in c_put and c_del; these won't acquire a new lock. * * !!! * Since this is in the one-time cursor allocation * code, we need to be sure to destroy, not just * close, all cursors in the secondary when we * associate. */ if (CDB_LOCKING(env) && F_ISSET(dbp, DB_AM_SECONDARY)) memcpy(dbc->lock.fileid, dbp->s_primary->fileid, DB_FILE_ID_LEN); else memcpy(dbc->lock.fileid, dbp->fileid, DB_FILE_ID_LEN); if (CDB_LOCKING(env)) { if (F_ISSET(env->dbenv, DB_ENV_CDB_ALLDB)) { /* * If we are doing a single lock per * environment, set up the global * lock object just like we do to * single thread creates. */ DB_ASSERT(env, sizeof(db_pgno_t) == sizeof(u_int32_t)); dbc->lock_dbt.size = sizeof(u_int32_t); dbc->lock_dbt.data = &dbc->lock.pgno; dbc->lock.pgno = 0; } else { dbc->lock_dbt.size = DB_FILE_ID_LEN; dbc->lock_dbt.data = dbc->lock.fileid; } } else { dbc->lock.type = DB_PAGE_LOCK; dbc->lock_dbt.size = sizeof(dbc->lock); dbc->lock_dbt.data = &dbc->lock; } } /* Init the DBC internal structure. */ switch (dbtype) { case DB_BTREE: case DB_RECNO: if ((ret = __bamc_init(dbc, dbtype)) != 0) goto err; break; case DB_HASH: if ((ret = __hamc_init(dbc)) != 0) goto err; break; case DB_QUEUE: if ((ret = __qamc_init(dbc)) != 0) goto err; break; case DB_UNKNOWN: default: ret = __db_unknown_type(env, "DB->cursor", dbtype); goto err; } cp = dbc->internal; } /* Refresh the DBC structure. */ dbc->dbtype = dbtype; RESET_RET_MEM(dbc); dbc->set_priority = __dbc_set_priority; dbc->get_priority = __dbc_get_priority; dbc->priority = dbp->priority; if ((dbc->txn = txn) != NULL) dbc->locker = txn->locker; else if (LOCKING_ON(env)) { /* * There are certain cases in which we want to create a * new cursor with a particular locker ID that is known * to be the same as (and thus not conflict with) an * open cursor. * * The most obvious case is cursor duplication; when we * call DBC->dup or __dbc_idup, we want to use the original * cursor's locker ID. * * Another case is when updating secondary indices. Standard * CDB locking would mean that we might block ourself: we need * to open an update cursor in the secondary while an update * cursor in the primary is open, and when the secondary and * primary are subdatabases or we're using env-wide locking, * this is disastrous. * * In these cases, our caller will pass a nonzero locker * ID into this function. Use this locker ID instead of * the default as the locker ID for our new cursor. */ if (locker != NULL) dbc->locker = locker; else { /* * If we are threaded then we need to set the * proper thread id into the locker. */ if (DB_IS_THREADED(dbp)) { env->dbenv->thread_id(env->dbenv, &pid, &tid); __lock_set_thread_id(dbc->lref, pid, tid); } dbc->locker = dbc->lref; } } /* * These fields change when we are used as a secondary index, so * if the DB is a secondary, make sure they're set properly just * in case we opened some cursors before we were associated. * * __dbc_get is used by all access methods, so this should be safe. */ if (F_ISSET(dbp, DB_AM_SECONDARY)) dbc->get = dbc->c_get = __dbc_secondary_get_pp; if (LF_ISSET(DBC_OPD)) F_SET(dbc, DBC_OPD); if (F_ISSET(dbp, DB_AM_RECOVER)) F_SET(dbc, DBC_RECOVER); if (F_ISSET(dbp, DB_AM_COMPENSATE)) F_SET(dbc, DBC_DONTLOCK); /* Refresh the DBC internal structure. */ cp = dbc->internal; cp->opd = NULL; cp->indx = 0; cp->page = NULL; cp->pgno = PGNO_INVALID; cp->root = root; switch (dbtype) { case DB_BTREE: case DB_RECNO: if ((ret = __bamc_refresh(dbc)) != 0) goto err; break; case DB_HASH: case DB_QUEUE: break; case DB_UNKNOWN: default: ret = __db_unknown_type(env, "DB->cursor", dbp->type); goto err; } /* * The transaction keeps track of how many cursors were opened within * it to catch application errors where the cursor isn't closed when * the transaction is resolved. */ if (txn != NULL) ++txn->cursors; if (ip != NULL) dbc->thread_info = ip; else if (txn != NULL) dbc->thread_info = txn->thread_info; else ENV_GET_THREAD_INFO(env, dbc->thread_info); MUTEX_LOCK(env, dbp->mutex); TAILQ_INSERT_TAIL(&dbp->active_queue, dbc, links); F_SET(dbc, DBC_ACTIVE); MUTEX_UNLOCK(env, dbp->mutex); *dbcp = dbc; return (0); err: if (allocated) __os_free(env, dbc); return (ret); } /* * __db_put -- * Store a key/data pair. * * PUBLIC: int __db_put __P((DB *, * PUBLIC: DB_THREAD_INFO *, DB_TXN *, DBT *, DBT *, u_int32_t)); */ int __db_put(dbp, ip, txn, key, data, flags) DB *dbp; DB_THREAD_INFO *ip; DB_TXN *txn; DBT *key, *data; u_int32_t flags; { DBC *dbc; DBT tdata; ENV *env; int ret, t_ret; env = dbp->env; if ((ret = __db_cursor(dbp, ip, txn, &dbc, DB_WRITELOCK)) != 0) return (ret); DEBUG_LWRITE(dbc, txn, "DB->put", key, data, flags); SET_RET_MEM(dbc, dbp); /* * See the comment in __db_get(). * * Note that the c_get in the DB_NOOVERWRITE case is safe to * do with this flag set; if it errors in any way other than * DB_NOTFOUND, we're going to close the cursor without doing * anything else, and if it returns DB_NOTFOUND then it's safe * to do a c_put(DB_KEYLAST) even if an access method moved the * cursor, since that's not position-dependent. */ F_SET(dbc, DBC_TRANSIENT); switch (flags) { case DB_APPEND: /* * If there is an append callback, the value stored in * data->data may be replaced and then freed. To avoid * passing a freed pointer back to the user, just operate * on a copy of the data DBT. */ tdata = *data; /* * Append isn't a normal put operation; call the appropriate * access method's append function. */ switch (dbp->type) { case DB_QUEUE: if ((ret = __qam_append(dbc, key, &tdata)) != 0) goto err; break; case DB_RECNO: if ((ret = __ram_append(dbc, key, &tdata)) != 0) goto err; break; case DB_BTREE: case DB_HASH: case DB_UNKNOWN: default: /* The interface should prevent this. */ DB_ASSERT(env, dbp->type == DB_QUEUE || dbp->type == DB_RECNO); ret = __db_ferr(env, "DB->put", 0); goto err; } /* * Secondary indices: since we've returned zero from an append * function, we've just put a record, and done so outside * __dbc_put. We know we're not a secondary-- the interface * prevents puts on them--but we may be a primary. If so, * update our secondary indices appropriately. * * If the application is managing this key's data, we need a * copy of it here. It will be freed in __db_put_pp. */ DB_ASSERT(env, !F_ISSET(dbp, DB_AM_SECONDARY)); if (LIST_FIRST(&dbp->s_secondaries) != NULL && (ret = __dbt_usercopy(env, key)) == 0) ret = __db_append_primary(dbc, key, &tdata); /* * The append callback, if one exists, may have allocated * a new tdata.data buffer. If so, free it. */ FREE_IF_NEEDED(env, &tdata); /* No need for a cursor put; we're done. */ goto done; default: /* Fall through to normal cursor put. */ break; } if (ret == 0) ret = __dbc_put(dbc, key, data, flags == 0 ? DB_KEYLAST : flags); err: done: /* Close the cursor. */ if ((t_ret = __dbc_close(dbc)) != 0 && ret == 0) ret = t_ret; return (ret); } /* * __db_del -- * Delete the items referenced by a key. * * PUBLIC: int __db_del __P((DB *, * PUBLIC: DB_THREAD_INFO *, DB_TXN *, DBT *, u_int32_t)); */ int __db_del(dbp, ip, txn, key, flags) DB *dbp; DB_THREAD_INFO *ip; DB_TXN *txn; DBT *key; u_int32_t flags; { DBC *dbc; DBT data; u_int32_t f_init, f_next; int ret, t_ret; /* Allocate a cursor. */ if ((ret = __db_cursor(dbp, ip, txn, &dbc, DB_WRITELOCK)) != 0) goto err; DEBUG_LWRITE(dbc, txn, "DB->del", key, NULL, flags); COMPQUIET(flags, 0); /* * Walk a cursor through the key/data pairs, deleting as we go. Set * the DB_DBT_USERMEM flag, as this might be a threaded application * and the flags checking will catch us. We don't actually want the * keys or data, set DB_DBT_ISSET. We rely on __dbc_get to clear * this. */ memset(&data, 0, sizeof(data)); F_SET(&data, DB_DBT_USERMEM | DB_DBT_ISSET); F_SET(key, DB_DBT_ISSET); /* * If locking (and we haven't already acquired CDB locks), set the * read-modify-write flag. */ f_init = DB_SET; f_next = DB_NEXT_DUP; if (STD_LOCKING(dbc)) { f_init |= DB_RMW; f_next |= DB_RMW; } /* * Optimize the simple cases. For all AMs if we don't have secondaries * and are not a secondary and we aren't a foreign database and there * are no dups then we can avoid a bunch of overhead. For queue we * don't need to fetch the record since we delete by direct calculation * from the record number. * * Hash permits an optimization in DB->del: since on-page duplicates are * stored in a single HKEYDATA structure, it's possible to delete an * entire set of them at once, and as the HKEYDATA has to be rebuilt * and re-put each time it changes, this is much faster than deleting * the duplicates one by one. Thus, if not pointing at an off-page * duplicate set, and we're not using secondary indices (in which case * we'd have to examine the items one by one anyway), let hash do this * "quick delete". * * !!! * Note that this is the only application-executed delete call in * Berkeley DB that does not go through the __dbc_del function. * If anything other than the delete itself (like a secondary index * update) has to happen there in a particular situation, the * conditions here should be modified not to use these optimizations. * The ordinary AM-independent alternative will work just fine; * it'll just be slower. */ if (!F_ISSET(dbp, DB_AM_SECONDARY) && LIST_FIRST(&dbp->s_secondaries) == NULL && LIST_FIRST(&dbp->f_primaries) == NULL) { #ifdef HAVE_QUEUE if (dbp->type == DB_QUEUE) { ret = __qam_delete(dbc, key); F_CLR(key, DB_DBT_ISSET); goto done; } #endif /* Fetch the first record. */ if ((ret = __dbc_get(dbc, key, &data, f_init)) != 0) goto err; #ifdef HAVE_HASH if (dbp->type == DB_HASH && dbc->internal->opd == NULL) { ret = __ham_quick_delete(dbc); goto done; } #endif if ((dbp->type == DB_BTREE || dbp->type == DB_RECNO) && !F_ISSET(dbp, DB_AM_DUP)) { ret = dbc->am_del(dbc); goto done; } } else if ((ret = __dbc_get(dbc, key, &data, f_init)) != 0) goto err; /* Walk through the set of key/data pairs, deleting as we go. */ for (;;) { if ((ret = __dbc_del(dbc, 0)) != 0) break; F_SET(key, DB_DBT_ISSET); F_SET(&data, DB_DBT_ISSET); if ((ret = __dbc_get(dbc, key, &data, f_next)) != 0) { if (ret == DB_NOTFOUND) ret = 0; break; } } done: err: /* Discard the cursor. */ if ((t_ret = __dbc_close(dbc)) != 0 && ret == 0) ret = t_ret; return (ret); } /* * __db_sync -- * Flush the database cache. * * PUBLIC: int __db_sync __P((DB *)); */ int __db_sync(dbp) DB *dbp; { int ret, t_ret; ret = 0; /* If the database was read-only, we're done. */ if (F_ISSET(dbp, DB_AM_RDONLY)) return (0); /* If it's a Recno tree, write the backing source text file. */ if (dbp->type == DB_RECNO) ret = __ram_writeback(dbp); /* If the database was never backed by a database file, we're done. */ if (F_ISSET(dbp, DB_AM_INMEM)) return (ret); if (dbp->type == DB_QUEUE) ret = __qam_sync(dbp); else /* Flush any dirty pages from the cache to the backing file. */ if ((t_ret = __memp_fsync(dbp->mpf)) != 0 && ret == 0) ret = t_ret; return (ret); } /* * __db_associate -- * Associate another database as a secondary index to this one. * * PUBLIC: int __db_associate __P((DB *, DB_THREAD_INFO *, DB_TXN *, DB *, * PUBLIC: int (*)(DB *, const DBT *, const DBT *, DBT *), u_int32_t)); */ int __db_associate(dbp, ip, txn, sdbp, callback, flags) DB *dbp, *sdbp; DB_THREAD_INFO *ip; DB_TXN *txn; int (*callback) __P((DB *, const DBT *, const DBT *, DBT *)); u_int32_t flags; { DBC *pdbc, *sdbc; DBT key, data, skey, *tskeyp; ENV *env; int build, ret, t_ret; u_int32_t nskey; env = dbp->env; pdbc = sdbc = NULL; ret = 0; memset(&skey, 0, sizeof(DBT)); nskey = 0; tskeyp = NULL; /* * Check to see if the secondary is empty -- and thus if we should * build it -- before we link it in and risk making it show up in other * threads. Do this first so that the databases remain unassociated on * error. */ build = 0; if (LF_ISSET(DB_CREATE)) { if ((ret = __db_cursor(sdbp, ip, txn, &sdbc, 0)) != 0) goto err; /* * We don't care about key or data; we're just doing * an existence check. */ memset(&key, 0, sizeof(DBT)); memset(&data, 0, sizeof(DBT)); F_SET(&key, DB_DBT_PARTIAL | DB_DBT_USERMEM); F_SET(&data, DB_DBT_PARTIAL | DB_DBT_USERMEM); if ((ret = __dbc_get(sdbc, &key, &data, (STD_LOCKING(sdbc) ? DB_RMW : 0) | DB_FIRST)) == DB_NOTFOUND) { build = 1; ret = 0; } if ((t_ret = __dbc_close(sdbc)) != 0 && ret == 0) ret = t_ret; /* Reset for later error check. */ sdbc = NULL; if (ret != 0) goto err; } /* * Set up the database handle as a secondary. */ sdbp->s_callback = callback; sdbp->s_primary = dbp; sdbp->stored_get = sdbp->get; sdbp->get = __db_secondary_get; sdbp->stored_close = sdbp->close; sdbp->close = __db_secondary_close_pp; F_SET(sdbp, DB_AM_SECONDARY); if (LF_ISSET(DB_IMMUTABLE_KEY)) FLD_SET(sdbp->s_assoc_flags, DB_ASSOC_IMMUTABLE_KEY); /* * Add the secondary to the list on the primary. Do it here * so that we see any updates that occur while we're walking * the primary. */ MUTEX_LOCK(env, dbp->mutex); /* See __db_s_next for an explanation of secondary refcounting. */ DB_ASSERT(env, sdbp->s_refcnt == 0); sdbp->s_refcnt = 1; LIST_INSERT_HEAD(&dbp->s_secondaries, sdbp, s_links); MUTEX_UNLOCK(env, dbp->mutex); if (build) { /* * We loop through the primary, putting each item we * find into the new secondary. * * If we're using CDB, opening these two cursors puts us * in a bit of a locking tangle: CDB locks are done on the * primary, so that we stay deadlock-free, but that means * that updating the secondary while we have a read cursor * open on the primary will self-block. To get around this, * we force the primary cursor to use the same locker ID * as the secondary, so they won't conflict. This should * be harmless even if we're not using CDB. */ if ((ret = __db_cursor(sdbp, ip, txn, &sdbc, CDB_LOCKING(sdbp->env) ? DB_WRITECURSOR : 0)) != 0) goto err; if ((ret = __db_cursor_int(dbp, ip, txn, dbp->type, PGNO_INVALID, 0, sdbc->locker, &pdbc)) != 0) goto err; /* Lock out other threads, now that we have a locker. */ dbp->associate_locker = sdbc->locker; memset(&key, 0, sizeof(DBT)); memset(&data, 0, sizeof(DBT)); while ((ret = __dbc_get(pdbc, &key, &data, DB_NEXT)) == 0) { if ((ret = callback(sdbp, &key, &data, &skey)) != 0) { if (ret == DB_DONOTINDEX) continue; goto err; } if (F_ISSET(&skey, DB_DBT_MULTIPLE)) { #ifdef DIAGNOSTIC __db_check_skeyset(sdbp, &skey); #endif nskey = skey.size; tskeyp = (DBT *)skey.data; } else { nskey = 1; tskeyp = &skey; } SWAP_IF_NEEDED(sdbp, &key); for (; nskey > 0; nskey--, tskeyp++) { if ((ret = __dbc_put(sdbc, tskeyp, &key, DB_UPDATE_SECONDARY)) != 0) goto err; FREE_IF_NEEDED(env, tskeyp); } SWAP_IF_NEEDED(sdbp, &key); FREE_IF_NEEDED(env, &skey); } if (ret == DB_NOTFOUND) ret = 0; } err: if (sdbc != NULL && (t_ret = __dbc_close(sdbc)) != 0 && ret == 0) ret = t_ret; if (pdbc != NULL && (t_ret = __dbc_close(pdbc)) != 0 && ret == 0) ret = t_ret; dbp->associate_locker = NULL; for (; nskey > 0; nskey--, tskeyp++) FREE_IF_NEEDED(env, tskeyp); FREE_IF_NEEDED(env, &skey); return (ret); } /* * __db_secondary_get -- * This wrapper function for DB->pget() is the DB->get() function * on a database which has been made into a secondary index. */ static int __db_secondary_get(sdbp, txn, skey, data, flags) DB *sdbp; DB_TXN *txn; DBT *skey, *data; u_int32_t flags; { DB_ASSERT(sdbp->env, F_ISSET(sdbp, DB_AM_SECONDARY)); return (__db_pget_pp(sdbp, txn, skey, NULL, data, flags)); } /* * __db_secondary_close -- * Wrapper function for DB->close() which we use on secondaries to * manage refcounting and make sure we don't close them underneath * a primary that is updating. * * PUBLIC: int __db_secondary_close __P((DB *, u_int32_t)); */ int __db_secondary_close(sdbp, flags) DB *sdbp; u_int32_t flags; { DB *primary; ENV *env; int doclose; doclose = 0; primary = sdbp->s_primary; env = primary->env; MUTEX_LOCK(env, primary->mutex); /* * Check the refcount--if it was at 1 when we were called, no * thread is currently updating this secondary through the primary, * so it's safe to close it for real. * * If it's not safe to do the close now, we do nothing; the * database will actually be closed when the refcount is decremented, * which can happen in either __db_s_next or __db_s_done. */ DB_ASSERT(env, sdbp->s_refcnt != 0); if (--sdbp->s_refcnt == 0) { LIST_REMOVE(sdbp, s_links); /* We don't want to call close while the mutex is held. */ doclose = 1; } MUTEX_UNLOCK(env, primary->mutex); /* * sdbp->close is this function; call the real one explicitly if * need be. */ return (doclose ? __db_close(sdbp, NULL, flags) : 0); } /* * __db_append_primary -- * Perform the secondary index updates necessary to put(DB_APPEND) * a record to a primary database. */ static int __db_append_primary(dbc, key, data) DBC *dbc; DBT *key, *data; { DB *dbp, *sdbp; DBC *fdbc, *sdbc, *pdbc; DBT fdata, oldpkey, pkey, pdata, skey; ENV *env; int cmp, ret, t_ret; dbp = dbc->dbp; env = dbp->env; sdbp = NULL; ret = 0; /* * Worrying about partial appends seems a little like worrying * about Linear A character encodings. But we support those * too if your application understands them. */ pdbc = NULL; if (F_ISSET(data, DB_DBT_PARTIAL) || F_ISSET(key, DB_DBT_PARTIAL)) { /* * The dbc we were passed is all set to pass things * back to the user; we can't safely do a call on it. * Dup the cursor, grab the real data item (we don't * care what the key is--we've been passed it directly), * and use that instead of the data DBT we were passed. * * Note that we can get away with this simple get because * an appended item is by definition new, and the * correctly-constructed full data item from this partial * put is on the page waiting for us. */ if ((ret = __dbc_idup(dbc, &pdbc, DB_POSITION)) != 0) return (ret); memset(&pkey, 0, sizeof(DBT)); memset(&pdata, 0, sizeof(DBT)); if ((ret = __dbc_get(pdbc, &pkey, &pdata, DB_CURRENT)) != 0) goto err; key = &pkey; data = &pdata; } /* * Loop through the secondary indices, putting a new item in * each that points to the appended item. * * This is much like the loop in "step 3" in __dbc_put, so * I'm not commenting heavily here; it was unclean to excerpt * just that section into a common function, but the basic * overview is the same here. */ if ((ret = __db_s_first(dbp, &sdbp)) != 0) goto err; for (; sdbp != NULL && ret == 0; ret = __db_s_next(&sdbp, dbc->txn)) { memset(&skey, 0, sizeof(DBT)); if ((ret = sdbp->s_callback(sdbp, key, data, &skey)) != 0) { if (ret == DB_DONOTINDEX) continue; goto err; } /* * If this secondary index is associated with a foreign * database, check that the foreign db contains this key to * maintain referential integrity. Set flags in fdata to avoid * mem copying, we just need to know existence. */ memset(&fdata, 0, sizeof(DBT)); F_SET(&fdata, DB_DBT_PARTIAL | DB_DBT_USERMEM); if (sdbp->s_foreign != NULL) { if ((ret = __db_cursor_int(sdbp->s_foreign, dbc->thread_info, dbc->txn, sdbp->s_foreign->type, PGNO_INVALID, 0, dbc->locker, &fdbc)) != 0) goto err; if ((ret = __dbc_get(fdbc, &skey, &fdata, DB_SET | (STD_LOCKING(dbc) ? DB_RMW : 0))) != 0) { if (ret == DB_NOTFOUND || ret == DB_KEYEMPTY) ret = DB_FOREIGN_CONFLICT; goto err; } if ((ret = __dbc_close(fdbc)) != 0) goto err; } if ((ret = __db_cursor_int(sdbp, dbc->thread_info, dbc->txn, sdbp->type, PGNO_INVALID, 0, dbc->locker, &sdbc)) != 0) { FREE_IF_NEEDED(env, &skey); goto err; } if (CDB_LOCKING(env)) { DB_ASSERT(env, sdbc->mylock.off == LOCK_INVALID); F_SET(sdbc, DBC_WRITER); } /* * Since we know we have a new primary key, it can't be a * duplicate duplicate in the secondary. It can be a * duplicate in a secondary that doesn't support duplicates, * however, so we need to be careful to avoid an overwrite * (which would corrupt our index). */ if (!F_ISSET(sdbp, DB_AM_DUP)) { memset(&oldpkey, 0, sizeof(DBT)); F_SET(&oldpkey, DB_DBT_MALLOC); ret = __dbc_get(sdbc, &skey, &oldpkey, DB_SET | (STD_LOCKING(dbc) ? DB_RMW : 0)); if (ret == 0) { cmp = __bam_defcmp(sdbp, &oldpkey, key); /* * XXX * This needs to use the right free function * as soon as this is possible. */ __os_ufree(env, oldpkey.data); if (cmp != 0) { __db_errx(env, "%s%s", "Append results in a non-unique secondary key in", " an index not configured to support duplicates"); ret = EINVAL; goto err1; } } else if (ret != DB_NOTFOUND && ret != DB_KEYEMPTY) goto err1; } ret = __dbc_put(sdbc, &skey, key, DB_UPDATE_SECONDARY); err1: FREE_IF_NEEDED(env, &skey); if ((t_ret = __dbc_close(sdbc)) != 0 && ret == 0) ret = t_ret; if (ret != 0) goto err; } err: if (pdbc != NULL && (t_ret = __dbc_close(pdbc)) != 0 && ret == 0) ret = t_ret; if (sdbp != NULL && (t_ret = __db_s_done(sdbp, dbc->txn)) != 0 && ret == 0) ret = t_ret; return (ret); } /* * __db_associate_foreign -- * Associate this database (fdbp) as a foreign constraint to another * database (pdbp). That is, dbp's keys appear as foreign key values in * pdbp. * * PUBLIC: int __db_associate_foreign __P((DB *, DB *, * PUBLIC: int (*)(DB *, const DBT *, DBT *, const DBT *, int *), * PUBLIC: u_int32_t)); */ int __db_associate_foreign(fdbp, pdbp, callback, flags) DB *fdbp, *pdbp; int (*callback)(DB *, const DBT *, DBT *, const DBT *, int *); u_int32_t flags; { DB_FOREIGN_INFO *f_info; ENV *env; int ret; env = fdbp->env; ret = 0; if ((ret = __os_malloc(env, sizeof(DB_FOREIGN_INFO), &f_info)) != 0) { return ret; } memset(f_info, 0, sizeof(DB_FOREIGN_INFO)); f_info->dbp = pdbp; f_info->callback = callback; /* * It might be wise to filter this, but for now the flags only * set the delete action type. */ FLD_SET(f_info->flags, flags); /* * Add f_info to the foreign database's list of primaries. That is to * say, fdbp->f_primaries lists all databases for which fdbp is a * foreign constraint. */ MUTEX_LOCK(env, fdbp->mutex); LIST_INSERT_HEAD(&fdbp->f_primaries, f_info, f_links); MUTEX_UNLOCK(env, fdbp->mutex); /* * Associate fdbp as pdbp's foreign db, for referential integrity * checks. We don't allow the foreign db to be changed, because we * currently have no way of removing pdbp from the old foreign db's list * of primaries. */ if (pdbp->s_foreign != NULL) return (EINVAL); pdbp->s_foreign = fdbp; return (ret); } static int __dbc_set_priority(dbc, priority) DBC *dbc; DB_CACHE_PRIORITY priority; { dbc->priority = priority; return (0); } static int __dbc_get_priority(dbc, priority) DBC *dbc; DB_CACHE_PRIORITY *priority; { *priority = dbc->priority; return (0); }