diff options
Diffstat (limited to 'debian/htdig/htdig-3.2.0b6/db/hash.c')
-rw-r--r-- | debian/htdig/htdig-3.2.0b6/db/hash.c | 1912 |
1 files changed, 1912 insertions, 0 deletions
diff --git a/debian/htdig/htdig-3.2.0b6/db/hash.c b/debian/htdig/htdig-3.2.0b6/db/hash.c new file mode 100644 index 00000000..43612dc4 --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/db/hash.c @@ -0,0 +1,1912 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996, 1997, 1998, 1999 + * Sleepycat Software. All rights reserved. + */ +/* + * Copyright (c) 1990, 1993, 1994 + * Margo Seltzer. All rights reserved. + */ +/* + * Copyright (c) 1990, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Margo Seltzer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "db_config.h" + +#ifndef lint +static const char sccsid[] = "@(#)hash.c 11.29 (Sleepycat) 11/14/99"; +#endif /* not lint */ + +#ifndef NO_SYSTEM_INCLUDES +#include <sys/types.h> + +#include <errno.h> +#include <stdlib.h> +#include <string.h> +#endif + +#include "db_int.h" +#include "db_page.h" +#include "db_am.h" +#include "db_ext.h" +#include "db_shash.h" +#include "db_swap.h" +#include "hash.h" +#include "btree.h" +#include "log.h" +#include "lock.h" +#include "txn.h" + +static int CDB___ham_c_close __P((DBC *)); +static int CDB___ham_c_del __P((DBC *, u_int32_t)); +static int CDB___ham_c_destroy __P((DBC *)); +static int CDB___ham_c_get __P((DBC *, DBT *, DBT *, u_int32_t)); +static int CDB___ham_c_put __P((DBC *, DBT *, DBT *, u_int32_t)); +static int CDB___ham_delete __P((DB *, DB_TXN *, DBT *, u_int32_t)); +static int CDB___ham_dup_return __P((DBC *, DBT *, u_int32_t)); +static int CDB___ham_expand_table __P((DBC *)); +static int CDB___ham_init_htab __P((DBC *, + const char *, db_pgno_t, u_int32_t, u_int32_t)); +static int CDB___ham_lookup __P((DBC *, const DBT *, u_int32_t, db_lockmode_t)); +static int CDB___ham_overwrite __P((DBC *, DBT *)); + +/* + * CDB___ham_metachk -- + * + * PUBLIC: int CDB___ham_metachk __P((DB *, const char *, HMETA *)); + */ +int +CDB___ham_metachk(dbp, name, hashm) + DB *dbp; + const char *name; + HMETA *hashm; +{ + DB_ENV *dbenv; + u_int32_t vers; + int ret; + + dbenv = dbp->dbenv; + + /* + * At this point, all we know is that the magic number is for a Hash. + * Check the version, the database may be out of date. + */ + vers = hashm->dbmeta.version; + if (F_ISSET(dbp, DB_AM_SWAP)) + M_32_SWAP(vers); + switch (vers) { + case 4: + /* FALLTHROUGH */ + case 5: + CDB___db_err(dbenv, + "%s: hash version %lu requires a version upgrade", + name, (u_long)vers); + return (DB_OLD_VERSION); + case 6: + break; + default: + CDB___db_err(dbenv, + "%s: unsupported hash version: %lu", name, (u_long)vers); + return (EINVAL); + } + + /* Swap the page if we need to. */ + if (F_ISSET(dbp, DB_AM_SWAP) && (ret = CDB___ham_mswap((PAGE *)hashm)) != 0) + return (ret); + + /* Check the type. */ + if (dbp->type != DB_HASH && dbp->type != DB_UNKNOWN) + return (EINVAL); + dbp->type = DB_HASH; + DB_ILLEGAL_METHOD(dbp, DB_OK_HASH); + + /* + * Check application info against metadata info, and set info, flags, + * and type based on metadata info. + */ + if ((ret = CDB___db_fchk(dbenv, + "DB->open", hashm->dbmeta.flags, + DB_HASH_DUP | DB_HASH_SUBDB)) != 0) + return (ret); + + if (F_ISSET(&hashm->dbmeta, DB_HASH_DUP)) + F_SET(&hashm->dbmeta, DB_HASH_DUP); + else + if (F_ISSET(dbp, DB_AM_DUP)) { + CDB___db_err(dbenv, + "%s: DB_DUP specified to open method but not set in database", + name); + return (EINVAL); + } + + if (F_ISSET(&hashm->dbmeta, DB_HASH_SUBDB)) + F_SET(dbp, DB_AM_SUBDB); + else + if (F_ISSET(dbp, DB_AM_SUBDB)) { + CDB___db_err(dbenv, + "%s: subdatabase specified but not supported in database", + name); + return (EINVAL); + } + + /* Set the page size. */ + dbp->pgsize = hashm->dbmeta.pagesize; + F_CLR(dbp, DB_AM_PGDEF); + + /* Copy the file's ID. */ + memcpy(dbp->fileid, hashm->dbmeta.uid, DB_FILE_ID_LEN); + + return (0); +} + +/* + * CDB___ham_open -- + * + * PUBLIC: int CDB___ham_open __P((DB *, const char *, db_pgno_t)); + */ +int +CDB___ham_open(dbp, name, base_pgno) + DB *dbp; + const char *name; + db_pgno_t base_pgno; +{ + DB_ENV *dbenv; + DBC *dbc; + HASH_CURSOR *hcp; + HASH *hashp; + int need_sync, ret, t_ret; + + dbc = NULL; + dbenv = dbp->dbenv; + need_sync = 0; + + /* Initialize the remaining fields/methods of the DB. */ + dbp->del = CDB___ham_delete; + dbp->stat = CDB___ham_stat; + + /* Get a cursor we can use for the rest of this function. */ + if ((ret = dbp->cursor(dbp, dbp->open_txn, &dbc, 0)) != 0) + return (ret); + + hcp = (HASH_CURSOR *)dbc->internal; + hashp = dbp->h_internal; + hashp->meta_pgno = base_pgno; + if ((ret = CDB___ham_get_meta(dbc)) != 0) + goto err1; + + /* + * If this is a new file, initialize it, and put it back dirty. + * + * Initialize the hdr structure. + */ + if (hcp->hdr->dbmeta.magic == DB_HASHMAGIC) { + /* File exists, verify the data in the header. */ + if (hashp->h_hash == NULL) + hashp->h_hash = hcp->hdr->dbmeta.version < 5 + ? CDB___ham_func4 : CDB___ham_func5; + if (hashp->h_hash(CHARKEY, sizeof(CHARKEY)) != + hcp->hdr->h_charkey) { + CDB___db_err(dbp->dbenv, + "hash: incompatible hash function"); + ret = EINVAL; + goto err2; + } + if (F_ISSET(&hcp->hdr->dbmeta, DB_HASH_DUP)) + F_SET(dbp, DB_AM_DUP); + if (F_ISSET(&hcp->hdr->dbmeta, DB_HASH_SUBDB)) + F_SET(dbp, DB_AM_SUBDB); + } else { + /* + * File does not exist, we must initialize the header. If + * locking is enabled that means getting a write lock first. + */ + dbc->lock.pgno = base_pgno; + + if (F_ISSET(dbenv, DB_ENV_LOCKING) && + ((ret = CDB_lock_put(dbenv, &hcp->hlock)) != 0 || + (ret = CDB_lock_get(dbenv, dbc->locker, + DB_NONBLOCK(dbc) ? DB_LOCK_NOWAIT : 0, + &dbc->lock_dbt, DB_LOCK_WRITE, &hcp->hlock)) != 0)) + goto err2; + + if ((ret = CDB___ham_init_htab(dbc, name, + base_pgno, hashp->h_nelem, hashp->h_ffactor)) != 0) + goto err2; + + need_sync = 1; + } + + /* Make sure we always have a valid hashp->h_hash function. */ + if (hashp->h_hash == NULL) + hashp->h_hash = hcp->hdr->dbmeta.version < 5 + ? CDB___ham_func4 : CDB___ham_func5; + +err2: /* Release the meta data page */ + if ((t_ret = CDB___ham_release_meta(dbc)) != 0 && ret == 0) + ret = t_ret; +err1: if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0) + ret = t_ret; + + /* Sync the file so that we know that the meta data goes to disk. */ + if (ret == 0 && need_sync) + ret = dbp->sync(dbp, 0); +#if CONFIG_TEST + if (ret == 0) + DB_TEST_RECOVERY(dbp, DB_TEST_POSTSYNC, ret, name); + +DB_TEST_RECOVERY_LABEL +#endif + if (ret != 0) + (void)CDB___ham_db_close(dbp); + + return (ret); +} + +/************************** LOCAL CREATION ROUTINES **********************/ +/* + * Returns 0 on No Error + */ +static int +CDB___ham_init_htab(dbc, name, pgno, nelem, ffactor) + DBC *dbc; + const char *name; + db_pgno_t pgno; + u_int32_t nelem, ffactor; +{ + DB *dbp; + DB_LOCK metalock; + DB_LSN orig_lsn; + DBMETA *mmeta; + HASH_CURSOR *hcp; + HASH *hashp; + PAGE *h; + db_pgno_t mpgno; + int32_t l2, nbuckets; + int dirty_mmeta, i, ret, t_ret; + + hcp = (HASH_CURSOR *)dbc->internal; + dbp = dbc->dbp; + hashp = dbp->h_internal; + mmeta = NULL; + dirty_mmeta = 0; + metalock.off = LOCK_INVALID; + + if (hashp->h_hash == NULL) + hashp->h_hash = DB_HASHVERSION < 5 ? CDB___ham_func4 : CDB___ham_func5; + + if (nelem != 0 && ffactor != 0) { + nelem = (nelem - 1) / ffactor + 1; + l2 = CDB___db_log2(nelem > 2 ? nelem : 2); + } else + l2 = 1; + nbuckets = 1 << l2; + + orig_lsn = hcp->hdr->dbmeta.lsn; + memset(hcp->hdr, 0, sizeof(HMETA)); + ZERO_LSN(hcp->hdr->dbmeta.lsn); + hcp->hdr->dbmeta.pgno = pgno; + hcp->hdr->dbmeta.magic = DB_HASHMAGIC; + hcp->hdr->dbmeta.version = DB_HASHVERSION; + hcp->hdr->dbmeta.pagesize = dbp->pgsize; + hcp->hdr->dbmeta.type = P_HASHMETA; + hcp->hdr->dbmeta.free = PGNO_INVALID; + hcp->hdr->max_bucket = hcp->hdr->high_mask = nbuckets - 1; + hcp->hdr->low_mask = (nbuckets >> 1) - 1; + hcp->hdr->ffactor = ffactor; + hcp->hdr->h_charkey = hashp->h_hash(CHARKEY, sizeof(CHARKEY)); + memcpy(hcp->hdr->dbmeta.uid, dbp->fileid, DB_FILE_ID_LEN); + + if (F_ISSET(dbp, DB_AM_DUP)) + F_SET(&hcp->hdr->dbmeta, DB_HASH_DUP); + if (F_ISSET(dbp, DB_AM_SUBDB)) { + F_SET(&hcp->hdr->dbmeta, DB_HASH_SUBDB); + + /* + * If this is a subdatabase, then we need to get the LSN + * off the master meta data page because that's where free + * pages are linked and during recovery we need to access + * that page and roll it backward/forward correctly with + * respect to LSN. + */ + mpgno = PGNO_BASE_MD; + if ((ret = CDB___db_lget(dbc, + 0, mpgno, DB_LOCK_WRITE, 0, &metalock)) != 0) + return (ret); + if ((ret = CDB_memp_fget(dbp->mpf, + &mpgno, 0, (PAGE **)&mmeta)) != 0) + goto err; + } + if ((ret = CDB___ham_dirty_page(dbp, (PAGE *)hcp->hdr)) != 0) + goto err; + + /* + * Create the first and second buckets pages so that we have the + * page numbers for them and we can store that page number + * in the meta-data header (spares[0]). + */ + hcp->hdr->spares[0] = nbuckets; + if ((ret = CDB_memp_fget(dbp->mpf, + &hcp->hdr->spares[0], DB_MPOOL_NEW_GROUP, &h)) != 0) + goto err; + + P_INIT(h, dbp->pgsize, hcp->hdr->spares[0], PGNO_INVALID, + PGNO_INVALID, 0, P_HASH); + + /* Fill in the last fields of the meta data page. */ + hcp->hdr->spares[0] -= (nbuckets - 1); + for (i = 1; i <= l2; i++) + hcp->hdr->spares[i] = hcp->hdr->spares[0]; + for (; i < NCACHED; i++) + hcp->hdr->spares[i] = PGNO_INVALID; + + /* + * Before we are about to put any dirty pages, we need to log + * the meta-data page create. + */ + ret = CDB___db_log_page(dbp, name, &orig_lsn, pgno, (PAGE *)hcp->hdr); + + if (dbp->open_txn != NULL) { + if ((t_ret = CDB___ham_groupalloc_log(dbp->dbenv, dbp->open_txn, + &hcp->hdr->dbmeta.lsn, 0, dbp->log_fileid, + hcp->hdr->dbmeta.pgno, &hcp->hdr->dbmeta.lsn, + mmeta == NULL ? &hcp->hdr->dbmeta.lsn : &mmeta->lsn, + hcp->hdr->spares[0], hcp->hdr->max_bucket + 1)) != 0 && + ret == 0) + ret = t_ret; + if (t_ret == 0 && mmeta != NULL) { + mmeta->lsn = hcp->hdr->dbmeta.lsn; + dirty_mmeta = 1; + } + } + + DB_TEST_RECOVERY(dbp, DB_TEST_POSTLOG, ret, name); + +DB_TEST_RECOVERY_LABEL + if ((t_ret = CDB_memp_fput(dbp->mpf, h, DB_MPOOL_DIRTY)) != 0 && ret == 0) + ret = t_ret; + +err: if (mmeta != NULL) + if ((t_ret = CDB_memp_fput(dbp->mpf, mmeta, + dirty_mmeta ? DB_MPOOL_DIRTY : 0)) != 0 && ret == 0) + ret = t_ret; + if (metalock.off != LOCK_INVALID) + (void)__TLPUT(dbc, metalock); + + return (ret); +} + +static int +CDB___ham_delete(dbp, txn, key, flags) + DB *dbp; + DB_TXN *txn; + DBT *key; + u_int32_t flags; +{ + DBC *dbc; + HASH_CURSOR *hcp; + int ret, t_ret; + + PANIC_CHECK(dbp->dbenv); + DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->del"); + + if ((ret = + CDB___db_delchk(dbp, key, flags, F_ISSET(dbp, DB_AM_RDONLY))) != 0) + return (ret); + + if ((ret = dbp->cursor(dbp, txn, &dbc, DB_WRITELOCK)) != 0) + return (ret); + + DEBUG_LWRITE(dbc, txn, "ham_delete", key, NULL, flags); + + hcp = (HASH_CURSOR *)dbc->internal; + if ((ret = CDB___ham_get_meta(dbc)) != 0) + goto out; + + if ((ret = CDB___ham_lookup(dbc, key, 0, DB_LOCK_WRITE)) == 0) { + if (F_ISSET(hcp, H_OK)) + ret = CDB___ham_del_pair(dbc, 1); + else + ret = DB_NOTFOUND; + } + + if ((t_ret = CDB___ham_release_meta(dbc)) != 0 && ret == 0) + ret = t_ret; + +out: if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0) + ret = t_ret; + return (ret); +} + +/* ****************** CURSORS ********************************** */ +/* + * CDB___ham_c_init -- + * Initialize the hash-specific portion of a cursor. + * + * PUBLIC: int CDB___ham_c_init __P((DBC *)); + */ +int +CDB___ham_c_init(dbc) + DBC *dbc; +{ + HASH_CURSOR *new_curs; + int ret; + + if ((ret = CDB___os_calloc(1, sizeof(struct cursor_t), &new_curs)) != 0) + return (ret); + if ((ret = + CDB___os_malloc(dbc->dbp->pgsize, NULL, &new_curs->split_buf)) != 0) { + CDB___os_free(new_curs, sizeof(*new_curs)); + return (ret); + } + + new_curs->dbc = dbc; + + dbc->internal = new_curs; + dbc->c_am_close = CDB___ham_c_close; + dbc->c_am_destroy = CDB___ham_c_destroy; + dbc->c_del = CDB___ham_c_del; + dbc->c_get = CDB___ham_c_get; + dbc->c_put = CDB___ham_c_put; + + CDB___ham_item_init(new_curs); + + return (0); +} + +/* + * CDB___ham_c_close -- + * Close down the cursor from a single use. + */ +static int +CDB___ham_c_close(dbc) + DBC *dbc; +{ + int ret; + + if ((ret = CDB___ham_item_done(dbc, 0)) != 0) + return (ret); + + CDB___ham_item_init((HASH_CURSOR *)dbc->internal); + return (0); +} + +/* + * CDB___ham_c_destroy -- + * Cleanup the access method private part of a cursor. + */ +static int +CDB___ham_c_destroy(dbc) + DBC *dbc; +{ + HASH_CURSOR *hcp; + + hcp = (HASH_CURSOR *)dbc->internal; + if (hcp->split_buf != NULL) + CDB___os_free(hcp->split_buf, dbc->dbp->pgsize); + CDB___os_free(hcp, sizeof(HASH_CURSOR)); + + return (0); +} + +static int +CDB___ham_c_del(dbc, flags) + DBC *dbc; + u_int32_t flags; +{ + DB *dbp; + DBT repldbt; + HASH_CURSOR *hcp; + HASH_CURSOR save_curs; + db_pgno_t ppgno, chg_pgno; + int ret, t_ret; + + DEBUG_LWRITE(dbc, dbc->txn, "ham_c_del", NULL, NULL, flags); + dbp = dbc->dbp; + PANIC_CHECK(dbp->dbenv); + hcp = (HASH_CURSOR *)dbc->internal; + + if ((ret = CDB___db_cdelchk(dbc->dbp, flags, + F_ISSET(dbc->dbp, DB_AM_RDONLY), IS_VALID(hcp))) != 0) + return (ret); + + if (F_ISSET(hcp, H_DELETED)) + return (DB_NOTFOUND); + + /* + * If we are in the concurrent DB product and this cursor + * is not a write cursor, then this request is invalid. + * If it is a simple write cursor, then we need to upgrade its + * lock. + */ + if (F_ISSET(dbp->dbenv, DB_ENV_CDB)) { + /* Make sure it's a valid update cursor. */ + if (!F_ISSET(dbc, DBC_WRITECURSOR | DBC_WRITER)) + return (EPERM); + + if (F_ISSET(dbc, DBC_WRITECURSOR) && + (ret = CDB_lock_get(dbp->dbenv, dbc->locker, + DB_LOCK_UPGRADE, &dbc->lock_dbt, DB_LOCK_WRITE, + &dbc->mylock)) != 0) + return (ret); + } + + SAVE_CURSOR(hcp, &save_curs); + + if ((ret = CDB___ham_get_meta(dbc)) != 0) + goto out; + + if ((ret = CDB___ham_get_cpage(dbc, DB_LOCK_WRITE)) != 0) + goto out; + if (F_ISSET(hcp, H_ISDUP) && hcp->dpgno != PGNO_INVALID) { + /* + * We are about to remove a duplicate from offpage. + * + * There are 4 cases. + * 1. We will remove an item on a page, but there are more + * items on that page. + * 2. We will remove the last item on a page, but there is a + * following page of duplicates. + * 3. We will remove the last item on a page, this page was the + * last page in a duplicate set, but there were dups before + * it. + * 4. We will remove the last item on a page, removing the last + * duplicate. + * In case 1 hcp->dpagep is unchanged. + * In case 2 hcp->dpagep comes back pointing to the next dup + * page. + * In case 3 hcp->dpagep comes back NULL. + * In case 4 hcp->dpagep comes back NULL. + * + * Case 4 results in deleting the pair off the master page. + * The normal code for doing this knows how to delete the + * duplicates, so we will handle this case in the normal code. + */ + ppgno = PREV_PGNO(hcp->dpagep); + if (ppgno == PGNO_INVALID && + NEXT_PGNO(hcp->dpagep) == PGNO_INVALID && + NUM_ENT(hcp->dpagep) == 1) + goto normal; + + /* Remove item from duplicate page. */ + chg_pgno = hcp->dpgno; + if ((ret = CDB___db_drem(dbc, &hcp->dpagep, hcp->dndx)) != 0) + goto out; + + if (hcp->dpagep == NULL) { + if (ppgno != PGNO_INVALID) { /* Case 3 */ + hcp->dpgno = ppgno; + if ((ret = CDB___ham_get_cpage(dbc, + DB_LOCK_READ)) != 0) + goto out; + hcp->dndx = NUM_ENT(hcp->dpagep); + F_SET(hcp, H_DELETED); + } else { /* Case 4 */ + ret = CDB___ham_del_pair(dbc, 1); + hcp->dpgno = PGNO_INVALID; + /* + * Delpair updated the cursor queue, so we + * don't have to do that here. + */ + chg_pgno = PGNO_INVALID; + } + } else if (PGNO(hcp->dpagep) != hcp->dpgno) { + hcp->dndx = 0; /* Case 2 */ + hcp->dpgno = PGNO(hcp->dpagep); + if (ppgno == PGNO_INVALID) + memcpy(HOFFDUP_PGNO(P_ENTRY(hcp->pagep, + H_DATAINDEX(hcp->bndx))), + &hcp->dpgno, sizeof(db_pgno_t)); + /* + * We need to put the master page here, because + * although we have a duplicate page, the master + * page is dirty, and ham_item_done assumes that + * if you have a duplicate page, it's the only one + * that can be dirty. + */ + ret = CDB___ham_put_page(dbp, hcp->pagep, 1); + hcp->pagep = NULL; + F_SET(hcp, H_DELETED); + } else /* Case 1 */ + F_SET(hcp, H_DELETED); + if (chg_pgno != PGNO_INVALID) + CDB___ham_c_update(hcp, chg_pgno, 0, 0, 1); + } else if (F_ISSET(hcp, H_ISDUP)) { /* on page */ + if (hcp->dup_off == 0 && DUP_SIZE(hcp->dup_len) == + LEN_HDATA(hcp->pagep, hcp->hdr->dbmeta.pagesize, hcp->bndx)) + ret = CDB___ham_del_pair(dbc, 1); + else { + repldbt.flags = 0; + F_SET(&repldbt, DB_DBT_PARTIAL); + repldbt.doff = hcp->dup_off; + repldbt.dlen = DUP_SIZE(hcp->dup_len); + repldbt.size = 0; + repldbt.data = + HKEYDATA_DATA(H_PAIRDATA(hcp->pagep, hcp->bndx)); + ret = CDB___ham_replpair(dbc, &repldbt, 0); + hcp->dup_tlen -= DUP_SIZE(hcp->dup_len); + F_SET(hcp, H_DELETED); + CDB___ham_c_update(hcp, hcp->pgno, + DUP_SIZE(hcp->dup_len), 0, 1); + } + + } else + /* Not a duplicate */ +normal: ret = CDB___ham_del_pair(dbc, 1); + +out: if ((t_ret = CDB___ham_item_done(dbc, ret == 0)) != 0 && ret == 0) + ret = t_ret; + if ((t_ret = CDB___ham_release_meta(dbc)) != 0 && ret == 0) + ret = t_ret; + RESTORE_CURSOR(dbp, hcp, &save_curs, ret); + if (F_ISSET(dbc, DBC_WRITECURSOR)) + (void)CDB___lock_downgrade(dbp->dbenv, + &dbc->mylock, DB_LOCK_IWRITE, 0); + return (ret); +} + +/* + * CDB___ham_c_dup -- + * Duplicate a hash cursor, such that the new one holds appropriate + * locks for the position of the original. + * + * PUBLIC: int CDB___ham_c_dup __P((DBC *, DBC *)); + */ +int +CDB___ham_c_dup(orig_dbc, new_dbc) + DBC *orig_dbc, *new_dbc; +{ + HASH_CURSOR *orig, *new; + + orig = (HASH_CURSOR *)orig_dbc->internal; + new = (HASH_CURSOR *)new_dbc->internal; + +#ifdef DIAGNOSTIC + memset(new, 0, sizeof(*new)); +#endif + new->dbc = orig->dbc; + new->bucket = orig->bucket; + new->lbucket = orig->lbucket; + new->pgno = orig->pgno; + new->bndx = orig->bndx; + new->dpgno = orig->dpgno; + new->dndx = orig->dndx; + new->dup_off = orig->dup_off; + new->dup_len = orig->dup_len; + new->dup_tlen = orig->dup_tlen; + + if (F_ISSET(orig, H_DELETED)) + F_SET(new, H_DELETED); + if (F_ISSET(orig, H_ISDUP)) + F_SET(new, H_ISDUP); + + /* + * If the old cursor held a lock and we're not in transactions, get one + * for the new one. The reason that we don't need a new lock if we're + * in a transaction is because we already hold a lock and will continue + * to do so until commit, so there is no point in reaquiring it. We + * don't know if the old lock was a read or write lock, but it doesn't + * matter. We'll get a read lock. We know that this locker already + * holds a lock of the correct type, so if we need a write lock and + * request it, we know that we'll get it. + */ + if (orig->lock.off == LOCK_INVALID || orig_dbc->txn != NULL) { + new->lock.off = LOCK_INVALID; + return (0); + } + + return (CDB___ham_lock_bucket(new_dbc, DB_LOCK_READ)); +} + +static int +CDB___ham_c_get(dbc, key, data, flags) + DBC *dbc; + DBT *key; + DBT *data; + u_int32_t flags; +{ + DB *dbp; + HASH_CURSOR *hcp, save_curs; + db_lockmode_t lock_type; + int get_key, ret, t_ret; + + DEBUG_LREAD(dbc, dbc->txn, "ham_c_get", + flags == DB_SET || flags == DB_SET_RANGE ? key : NULL, + NULL, flags); + + hcp = (HASH_CURSOR *)dbc->internal; + dbp = dbc->dbp; + PANIC_CHECK(dbp->dbenv); + if ((ret = CDB___db_cgetchk(dbp, key, data, flags, IS_VALID(hcp))) != 0) + return (ret); + + /* Clear OR'd in additional bits so we can check for flag equality. */ + if (LF_ISSET(DB_RMW)) { + lock_type = DB_LOCK_WRITE; + LF_CLR(DB_RMW); + } else + lock_type = DB_LOCK_READ; + + SAVE_CURSOR(hcp, &save_curs); + if ((ret = CDB___ham_get_meta(dbc)) != 0) + return (ret); + hcp->seek_size = 0; + + ret = 0; + get_key = 1; + switch (flags) { + case DB_PREV: + if (hcp->bucket != BUCKET_INVALID) { + ret = CDB___ham_item_prev(dbc, lock_type); + break; + } + /* FALLTHROUGH */ + case DB_LAST: + ret = CDB___ham_item_last(dbc, lock_type); + break; + case DB_NEXT: + if (hcp->bucket != BUCKET_INVALID) { + ret = CDB___ham_item_next(dbc, lock_type); + break; + } + /* FALLTHROUGH */ + case DB_FIRST: + ret = CDB___ham_item_first(dbc, lock_type); + break; + case DB_NEXT_DUP: + /* cgetchk has already determined that the cursor is set. */ + F_SET(hcp, H_DUPONLY); + ret = CDB___ham_item_next(dbc, lock_type); + break; + case DB_SET: + case DB_SET_RANGE: + case DB_GET_BOTH: + if (F_ISSET(dbc, DBC_CONTINUE)) { + F_SET(hcp, H_DUPONLY); + ret = CDB___ham_item_next(dbc, lock_type); + } else + ret = CDB___ham_lookup(dbc, key, 0, lock_type); + get_key = 0; + break; + case DB_CURRENT: + /* cgetchk has already determined that the cursor is set. */ + if (F_ISSET(hcp, H_DELETED)) { + ret = DB_KEYEMPTY; + goto err1; + } + + ret = CDB___ham_item(dbc, lock_type); + break; + } + + /* + * Must always enter this loop to do error handling and + * check for big key/data pair. + */ + while (1) { + if (ret != 0 && ret != DB_NOTFOUND) + goto err2; + else if (F_ISSET(hcp, H_OK)) { + /* Get the key. */ + if (get_key && (ret = CDB___db_ret(dbp, hcp->pagep, + H_KEYINDEX(hcp->bndx), key, &dbc->rkey.data, + &dbc->rkey.size)) != 0) + goto err2; + + ret = CDB___ham_dup_return(dbc, data, flags); + break; + } else if (!F_ISSET(hcp, H_NOMORE)) { + abort(); + break; + } + + /* + * Ran out of entries in a bucket; change buckets. + */ + switch (flags) { + case DB_LAST: + case DB_PREV: + ret = CDB___ham_item_done(dbc, 0); + if (hcp->bucket == 0) { + ret = DB_NOTFOUND; + goto err2; + } + hcp->bucket--; + hcp->bndx = NDX_INVALID; + if (ret == 0) + ret = CDB___ham_item_prev(dbc, lock_type); + break; + case DB_FIRST: + case DB_NEXT: + ret = CDB___ham_item_done(dbc, 0); + hcp->bndx = NDX_INVALID; + hcp->bucket++; + hcp->pgno = PGNO_INVALID; + hcp->pagep = NULL; + if (hcp->bucket > hcp->hdr->max_bucket) { + ret = DB_NOTFOUND; + goto err2; + } + if (ret == 0) + ret = CDB___ham_item_next(dbc, lock_type); + break; + case DB_GET_BOTH: + case DB_NEXT_DUP: + case DB_SET: + case DB_SET_RANGE: + /* Key not found. */ + ret = DB_NOTFOUND; + goto err2; + case DB_CURRENT: + /* + * This should only happen if you are doing + * deletes and reading with concurrent threads + * and not doing proper locking. We return + * the same error code as we would if the + * cursor were deleted. + */ + ret = DB_KEYEMPTY; + goto err2; + } + } + +err2: if ((t_ret = CDB___ham_item_done(dbc, 0)) != 0 && ret == 0) + ret = t_ret; +err1: if ((t_ret = CDB___ham_release_meta(dbc)) != 0 && ret == 0) + ret = t_ret; + + RESTORE_CURSOR(dbp, hcp, &save_curs, ret); + + F_CLR(hcp, H_DUPONLY); + + return (ret); +} + +static int +CDB___ham_c_put(dbc, key, data, flags) + DBC *dbc; + DBT *key; + DBT *data; + u_int32_t flags; +{ + DB *dbp; + DBT tmp_val, *myval; + HASH_CURSOR *hcp, save_curs; + u_int32_t nbytes; + int ret, t_ret; + + /* + * The compiler doesn't realize that we only use this when ret is + * equal to 0 and that if ret is equal to 0, that we must have set + * myval. So, we initialize it here to shut the compiler up. + */ + COMPQUIET(myval, NULL); + + dbp = dbc->dbp; + PANIC_CHECK(dbp->dbenv); + DEBUG_LWRITE(dbc, dbc->txn, "ham_c_put", + flags == DB_KEYFIRST || flags == DB_KEYLAST ? key : NULL, + data, flags); + hcp = (HASH_CURSOR *)dbc->internal; + + if ((ret = CDB___db_cputchk(dbp, key, data, flags, + F_ISSET(dbp, DB_AM_RDONLY), IS_VALID(hcp))) != 0) + return (ret); + + if (F_ISSET(hcp, H_DELETED) && + flags != DB_KEYFIRST && flags != DB_KEYLAST) + return (DB_NOTFOUND); + + /* + * If we are in the concurrent DB product and this cursor + * is not a write cursor, then this request is invalid. + * If it is a simple write cursor, then we need to upgrade its + * lock. + */ + if (F_ISSET(dbp->dbenv, DB_ENV_CDB)) { + /* Make sure it's a valid update cursor. */ + if (!F_ISSET(dbc, DBC_WRITECURSOR | DBC_WRITER)) + return (EPERM); + + if (F_ISSET(dbc, DBC_WRITECURSOR) && + (ret = CDB_lock_get(dbp->dbenv, dbc->locker, + DB_LOCK_UPGRADE, &dbc->lock_dbt, DB_LOCK_WRITE, + &dbc->mylock)) != 0) + return (ret); + } + + SAVE_CURSOR(hcp, &save_curs); + + if ((ret = CDB___ham_get_meta(dbc)) != 0) + goto err1; + + switch (flags) { + case DB_KEYLAST: + case DB_KEYFIRST: + nbytes = (ISBIG(hcp, key->size) ? HOFFPAGE_PSIZE : + HKEYDATA_PSIZE(key->size)) + + (ISBIG(hcp, data->size) ? HOFFPAGE_PSIZE : + HKEYDATA_PSIZE(data->size)); + if ((ret = CDB___ham_lookup(dbc, + key, nbytes, DB_LOCK_WRITE)) == DB_NOTFOUND) { + ret = 0; + if (hcp->seek_found_page != PGNO_INVALID && + hcp->seek_found_page != hcp->pgno) { + if ((ret = CDB___ham_item_done(dbc, 0)) != 0) + goto err2; + hcp->pgno = hcp->seek_found_page; + hcp->bndx = NDX_INVALID; + } + + if (F_ISSET(data, DB_DBT_PARTIAL) && data->doff != 0) { + /* + * A partial put, but the key does not exist + * and we are not beginning the write at 0. + * We must create a data item padded up to doff + * and then write the new bytes represented by + * val. + */ + if ((ret = CDB___ham_init_dbt(&tmp_val, + data->size + data->doff, + &dbc->rdata.data, &dbc->rdata.size)) == 0) { + memset(tmp_val.data, 0, data->doff); + memcpy((u_int8_t *)tmp_val.data + + data->doff, data->data, data->size); + myval = &tmp_val; + } + } else + myval = (DBT *)data; + + if (ret == 0) + ret = CDB___ham_add_el(dbc, key, myval, H_KEYDATA); + goto done; + } + break; + case DB_BEFORE: + case DB_AFTER: + case DB_CURRENT: + ret = CDB___ham_item(dbc, DB_LOCK_WRITE); + break; + } + + if (ret == 0) { + if (flags == DB_CURRENT || + ((flags == DB_KEYFIRST || flags == DB_KEYLAST) && + !F_ISSET(dbp, DB_AM_DUP))) + ret = CDB___ham_overwrite(dbc, data); + else + ret = CDB___ham_add_dup(dbc, data, flags); + } + +done: if (ret == 0 && F_ISSET(hcp, H_EXPAND)) { + ret = CDB___ham_expand_table(dbc); + F_CLR(hcp, H_EXPAND); + } + + if ((t_ret = CDB___ham_item_done(dbc, ret == 0)) != 0 && ret == 0) + ret = t_ret; + +err2: if ((t_ret = CDB___ham_release_meta(dbc)) != 0 && ret == 0) + ret = t_ret; + +err1: RESTORE_CURSOR(dbp, hcp, &save_curs, ret); + + + if (F_ISSET(dbc, DBC_WRITECURSOR)) + (void)CDB___lock_downgrade(dbp->dbenv, + &dbc->mylock, DB_LOCK_IWRITE, 0); + + return (ret); +} + +/********************************* UTILITIES ************************/ + +/* + * CDB___ham_expand_table -- + */ +static int +CDB___ham_expand_table(dbc) + DBC *dbc; +{ + DB *dbp; + PAGE *h; + HASH_CURSOR *hcp; + db_pgno_t pgno; + u_int32_t old_bucket, new_bucket; + int ret; + + dbp = dbc->dbp; + hcp = (HASH_CURSOR *)dbc->internal; + if ((ret = CDB___ham_dirty_meta(dbc)) != 0) + return (ret); + + /* + * If the split point is about to increase, make sure that we + * have enough extra pages. The calculation here is weird. + * We'd like to do this after we've upped max_bucket, but it's + * too late then because we've logged the meta-data split. What + * we'll do between then and now is increment max bucket and then + * see what the log of one greater than that is; here we have to + * look at the log of max + 2. VERY NASTY STUFF. + * + * It just got even nastier. With subdatabases, we have to request + * a chunk of contiguous pages, so we do that here using an + * undocumented feature of mpool (the MPOOL_NEW_GROUP flag) to + * give us a number of contiguous pages. Ouch. + */ + if (hcp->hdr->max_bucket == hcp->hdr->high_mask) { + /* + * Ask mpool to give us a set of contiguous page numbers + * large enough to contain the next doubling. + * + * Figure out how many new pages we need. This will return + * us the last page. We calculate its page number, initialize + * the page and then write it back to reserve all the pages + * in between. It is possible that the allocation of new pages + * has already been done, but the tranaction aborted. Since + * we don't undo the allocation, check for a valid pgno before + * doing the allocation. + */ + pgno = hcp->hdr->max_bucket + 1; + if (hcp->hdr->spares[CDB___db_log2(pgno) + 1] == PGNO_INVALID) + /* Allocate a group of pages. */ + ret = CDB_memp_fget(dbp->mpf, + &pgno, DB_MPOOL_NEW_GROUP, &h); + else { + /* Just read in the last page of the batch */ + pgno = hcp->hdr->spares[CDB___db_log2(pgno) + 1] + + hcp->hdr->max_bucket + 1; + ret = CDB_memp_fget(dbp->mpf, + &pgno, DB_MPOOL_CREATE, &h); + } + if (ret != 0) + return (ret); + + P_INIT(h, dbp->pgsize, pgno, + PGNO_INVALID, PGNO_INVALID, 0, P_HASH); + pgno -= hcp->hdr->max_bucket; + } else { + pgno = BUCKET_TO_PAGE(hcp, hcp->hdr->max_bucket + 1); + if ((ret = + CDB_memp_fget(dbp->mpf, &pgno, DB_MPOOL_CREATE, &h)) != 0) + return (ret); + } + + /* Now we can log the meta-data split. */ + if (DB_LOGGING(dbc)) { + if ((ret = CDB___ham_metagroup_log(dbp->dbenv, + dbc->txn, &h->lsn, 0, dbp->log_fileid, + hcp->hdr->max_bucket, pgno, &hcp->hdr->dbmeta.lsn, + &h->lsn)) != 0) + return (ret); + + hcp->hdr->dbmeta.lsn = h->lsn; + } + + /* If we allocated some new pages, write out the last page. */ + if ((ret = CDB_memp_fput(dbp->mpf, h, DB_MPOOL_DIRTY)) != 0) + return (ret); + + new_bucket = ++hcp->hdr->max_bucket; + old_bucket = (hcp->hdr->max_bucket & hcp->hdr->low_mask); + + /* + * If we started a new doubling, fill in the spares array with + * the starting page number negatively offset by the bucket number. + */ + if (new_bucket > hcp->hdr->high_mask) { + /* Starting a new doubling */ + hcp->hdr->low_mask = hcp->hdr->high_mask; + hcp->hdr->high_mask = new_bucket | hcp->hdr->low_mask; + if (hcp->hdr->spares[CDB___db_log2(new_bucket) + 1] == PGNO_INVALID) + hcp->hdr->spares[CDB___db_log2(new_bucket) + 1] = + pgno - new_bucket; + } + + /* Relocate records to the new bucket */ + return (CDB___ham_split_page(dbc, old_bucket, new_bucket)); +} + +/* + * PUBLIC: u_int32_t CDB___ham_call_hash __P((HASH_CURSOR *, u_int8_t *, int32_t)); + */ +u_int32_t +CDB___ham_call_hash(hcp, k, len) + HASH_CURSOR *hcp; + u_int8_t *k; + int32_t len; +{ + u_int32_t n, bucket; + HASH *hashp; + + hashp = hcp->dbc->dbp->h_internal; + n = (u_int32_t)(hashp->h_hash(k, len)); + + bucket = n & hcp->hdr->high_mask; + if (bucket > hcp->hdr->max_bucket) + bucket = bucket & hcp->hdr->low_mask; + return (bucket); +} + +/* + * Check for duplicates, and call CDB___db_ret appropriately. Release + * everything held by the cursor. + */ +static int +CDB___ham_dup_return(dbc, val, flags) + DBC *dbc; + DBT *val; + u_int32_t flags; +{ + DB *dbp; + HASH_CURSOR *hcp; + PAGE *pp; + DBT *myval, tmp_val; + db_indx_t ndx; + db_pgno_t pgno; + u_int32_t off, tlen; + u_int8_t *hk, type; + int cmp, ret; + db_indx_t len; + + /* Check for duplicate and return the first one. */ + dbp = dbc->dbp; + hcp = (HASH_CURSOR *)dbc->internal; + ndx = H_DATAINDEX(hcp->bndx); + type = HPAGE_TYPE(hcp->pagep, ndx); + pp = hcp->pagep; + myval = val; + + /* + * There are 4 cases: + * 1. We are not in duplicate, simply call db_ret. + * 2. We are looking at keys and stumbled onto a duplicate. + * 3. We are in the middle of a duplicate set. (ISDUP set) + * 4. This is a duplicate and we need to return a specific item. + */ + + /* + * Here we check for the case where we just stumbled onto a + * duplicate. In this case, we do initialization and then + * let the normal duplicate code handle it. + */ + if (!F_ISSET(hcp, H_ISDUP)) { + if (type == H_DUPLICATE) { + F_SET(hcp, H_ISDUP); + hcp->dup_tlen = LEN_HDATA(hcp->pagep, + hcp->hdr->dbmeta.pagesize, hcp->bndx); + hk = H_PAIRDATA(hcp->pagep, hcp->bndx); + if (flags == DB_LAST || flags == DB_PREV) { + hcp->dndx = 0; + hcp->dup_off = 0; + do { + memcpy(&len, + HKEYDATA_DATA(hk) + hcp->dup_off, + sizeof(db_indx_t)); + hcp->dup_off += DUP_SIZE(len); + hcp->dndx++; + } while (hcp->dup_off < hcp->dup_tlen); + hcp->dup_off -= DUP_SIZE(len); + hcp->dndx--; + } else { + memcpy(&len, + HKEYDATA_DATA(hk), sizeof(db_indx_t)); + hcp->dup_off = 0; + hcp->dndx = 0; + } + hcp->dup_len = len; + } else if (type == H_OFFDUP) { + F_SET(hcp, H_ISDUP); + if (flags == DB_CURRENT) { + pgno = hcp->dpgno; + ndx = hcp->dndx; + } else + memcpy(&pgno, + HOFFDUP_PGNO(P_ENTRY(hcp->pagep, ndx)), + sizeof(db_pgno_t)); + if (flags == DB_LAST || flags == DB_PREV) { + if ((ret = CDB___db_dend(dbc, + pgno, &hcp->dpagep)) != 0) + return (ret); + hcp->dpgno = PGNO(hcp->dpagep); + hcp->dndx = NUM_ENT(hcp->dpagep) - 1; + } else if ((ret = CDB___ham_next_cpage(dbc, + pgno, 0, H_ISDUP)) != 0) + return (ret); + if (flags == DB_CURRENT) + hcp->dndx = ndx; + } + } + + /* + * If we are retrieving a specific key/data pair, then we + * may need to adjust the cursor before returning data. + */ + if (flags == DB_GET_BOTH) { + if (F_ISSET(hcp, H_ISDUP)) { + if (hcp->dpgno != PGNO_INVALID) { + if ((ret = CDB___db_dsearch(dbc, 0, val, + hcp->dpgno, &hcp->dndx, &hcp->dpagep, &cmp)) + != 0) + return (ret); + if (cmp == 0) + hcp->dpgno = PGNO(hcp->dpagep); + } else { + CDB___ham_dsearch(dbc, val, &off, &cmp); + hcp->dup_off = off; + } + } else { + hk = H_PAIRDATA(hcp->pagep, hcp->bndx); + if (((HKEYDATA *)hk)->type == H_OFFPAGE) { + memcpy(&tlen, + HOFFPAGE_TLEN(hk), sizeof(u_int32_t)); + memcpy(&pgno, + HOFFPAGE_PGNO(hk), sizeof(db_pgno_t)); + if ((ret = CDB___db_moff(dbp, val, + pgno, tlen, dbp->dup_compare, &cmp)) != 0) + return (ret); + } else { + /* + * We do not zero tmp_val since the comparison + * routines may only look at data and size. + */ + tmp_val.data = HKEYDATA_DATA(hk); + tmp_val.size = LEN_HDATA(hcp->pagep, + dbp->pgsize, hcp->bndx); + cmp = dbp->dup_compare == NULL ? + CDB___bam_defcmp(&tmp_val, val) : + dbp->dup_compare(&tmp_val, val); + } + } + + if (cmp != 0) + return (DB_NOTFOUND); + } + + /* + * Now, everything is initialized, grab a duplicate if + * necessary. + */ + if (F_ISSET(hcp, H_ISDUP)) { + if (hcp->dpgno != PGNO_INVALID) { + pp = hcp->dpagep; + ndx = hcp->dndx; + } else { + /* + * Copy the DBT in case we are retrieving into user + * memory and we need the parameters for it. If the + * user requested a partial, then we need to adjust + * the user's parameters to get the partial of the + * duplicate which is itself a partial. + */ + memcpy(&tmp_val, val, sizeof(*val)); + if (F_ISSET(&tmp_val, DB_DBT_PARTIAL)) { + /* + * Take the user's length unless it would go + * beyond the end of the duplicate. + */ + if (tmp_val.doff + hcp->dup_off > hcp->dup_len) + tmp_val.dlen = 0; + else if (tmp_val.dlen + tmp_val.doff > + hcp->dup_len) + tmp_val.dlen = + hcp->dup_len - tmp_val.doff; + + /* + * Calculate the new offset. + */ + tmp_val.doff += hcp->dup_off; + } else { + F_SET(&tmp_val, DB_DBT_PARTIAL); + tmp_val.dlen = hcp->dup_len; + tmp_val.doff = hcp->dup_off + sizeof(db_indx_t); + } + myval = &tmp_val; + } + } + + + /* + * Finally, if we had a duplicate, pp, ndx, and myval should be + * set appropriately. + */ + if ((ret = CDB___db_ret(dbp, pp, ndx, myval, &dbc->rdata.data, + &dbc->rdata.size)) != 0) + return (ret); + + /* + * In case we sent a temporary off to db_ret, set the real + * return values. + */ + val->data = myval->data; + val->size = myval->size; + + return (0); +} + +static int +CDB___ham_overwrite(dbc, nval) + DBC *dbc; + DBT *nval; +{ + HASH_CURSOR *hcp; + DBT *myval, tmp_val, tmp_val2; + void *newrec; + u_int8_t *hk, *p; + u_int32_t len, nondup_size; + db_pgno_t prev; + db_indx_t newsize, dndx; + int ret; + + hcp = (HASH_CURSOR *)dbc->internal; + if (F_ISSET(hcp, H_ISDUP)) { + /* + * This is an overwrite of a duplicate; check for + * onpage versus offpage and whether it's partial. + */ + if (hcp->dpagep != NULL) { +do_offpage: if (F_ISSET(nval, DB_DBT_PARTIAL)) { + /* + * We are using btree routines that are + * actually OK for hash to use. Since all + * dbps have bt_internal initialized, this + * *should* just work. + */ + newsize = CDB___bam_partsize( + DB_CURRENT, nval, hcp->dpagep, hcp->dndx); + memcpy(&tmp_val, nval, sizeof(tmp_val)); + if ((ret = + CDB___bam_build(dbc, DB_CURRENT, &tmp_val, + hcp->dpagep, hcp->dndx, newsize)) != 0) + return (ret); + myval = &tmp_val; + } else + myval = nval; + + /* + * Make sure that the caller isn't corrupting + * the sort order. + */ + if (dbc->dbp->dup_compare != NULL && + CDB___bam_cmp(dbc->dbp, myval, hcp->dpagep, + hcp->dndx, dbc->dbp->dup_compare) != 0) + return (EINVAL); + + prev = PREV_PGNO(hcp->dpagep); + if ((ret = + CDB___db_drem(dbc, &hcp->dpagep, hcp->dndx)) != 0) + return (ret); + /* + * It's possible that hcp->dpagep is now NULL. If + * we have a prev, we can deal pretty easily; if not + * this gets ugly. + */ + if (hcp->dpagep == NULL) { + if (prev == PGNO_INVALID) { + /* + * This was a duplicate page with + * a single item. Pretend to reenter + * this routine simply overwriting the + * entry on the main page. + */ + F_CLR(hcp, H_ISDUP); + goto doreplace; + } + if ((ret = CDB___ham_next_cpage(dbc, + prev, 0, H_ISDUP)) != 0) + return (ret); + hcp->dndx = NUM_ENT(hcp->dpagep); + } + + /* + * On page splits, the 4th parameter of db_dput returns + * the location the new item was put. We cannot pass + * in permanent fields from the cursor, they may have + * been updated in cursor adjustment. + */ + dndx = hcp->dndx; + ret = CDB___db_dput(dbc, myval, &hcp->dpagep, &dndx); + hcp->dpgno = PGNO(hcp->dpagep); + hcp->dndx = dndx; + return (ret); + } + + /* On page dups */ + if (F_ISSET(nval, DB_DBT_PARTIAL)) { + /* + * We're going to have to get the current item, then + * construct the record, do any padding and do a + * replace. + */ + memset(&tmp_val, 0, sizeof(tmp_val)); + if ((ret = + CDB___ham_dup_return(dbc, &tmp_val, DB_CURRENT)) != 0) + return (ret); + + /* Figure out new size. */ + nondup_size = tmp_val.size; + newsize = nondup_size; + + /* + * Three cases: + * 1. strictly append (may need to allocate space + * for pad bytes; really gross). + * 2. overwrite some and append. + * 3. strictly overwrite. + */ + if (nval->doff > nondup_size) + newsize += + (nval->doff - nondup_size + nval->size); + else if (nval->doff + nval->dlen > nondup_size) + newsize += nval->size - + (nondup_size - nval->doff); + else + newsize += nval->size - nval->dlen; + + /* + * Make sure that the new size doesn't put us over + * the onpage duplicate size in which case we need + * to convert to off-page duplicates. + */ + if (ISBIG(hcp, hcp->dup_tlen - nondup_size + newsize)) { + if ((ret = CDB___ham_dup_convert(dbc)) != 0) + return (ret); + goto do_offpage; + } + + if ((ret = + CDB___os_malloc(DUP_SIZE(newsize), NULL, &newrec)) != 0) + return (ret); + memset(&tmp_val2, 0, sizeof(tmp_val2)); + F_SET(&tmp_val2, DB_DBT_PARTIAL); + + /* Construct the record. */ + p = newrec; + /* Initial size. */ + memcpy(p, &newsize, sizeof(db_indx_t)); + p += sizeof(db_indx_t); + + /* First part of original record. */ + len = nval->doff > tmp_val.size + ? tmp_val.size : nval->doff; + memcpy(p, tmp_val.data, len); + p += len; + + if (nval->doff > tmp_val.size) { + /* Padding */ + memset(p, 0, nval->doff - tmp_val.size); + p += nval->doff - tmp_val.size; + } + + /* New bytes */ + memcpy(p, nval->data, nval->size); + p += nval->size; + + /* End of original record (if there is any) */ + if (nval->doff + nval->dlen < tmp_val.size) { + len = tmp_val.size - nval->doff - nval->dlen; + memcpy(p, (u_int8_t *)tmp_val.data + + nval->doff + nval->dlen, len); + p += len; + } + + /* Final size. */ + memcpy(p, &newsize, sizeof(db_indx_t)); + + /* + * Make sure that the caller isn't corrupting + * the sort order. + */ + if (dbc->dbp->dup_compare != NULL) { + tmp_val2.data = + (u_int8_t *)newrec + sizeof(db_indx_t); + tmp_val2.size = newsize; + if (dbc->dbp->dup_compare(&tmp_val, &tmp_val2) + != 0) { + (void)CDB___os_free(newrec, + DUP_SIZE(newsize)); + return (EINVAL); + } + } + + tmp_val2.data = newrec; + tmp_val2.size = DUP_SIZE(newsize); + tmp_val2.doff = hcp->dup_off; + tmp_val2.dlen = DUP_SIZE(hcp->dup_len); + + ret = CDB___ham_replpair(dbc, &tmp_val2, 0); + (void)CDB___os_free(newrec, DUP_SIZE(newsize)); + + /* Update cursor */ + if (ret != 0) + return (ret); + + if (newsize > nondup_size) + hcp->dup_tlen += (newsize - nondup_size); + else + hcp->dup_tlen -= (nondup_size - newsize); + hcp->dup_len = DUP_SIZE(newsize); + return (0); + } else { + /* Check whether we need to convert to off page. */ + if (ISBIG(hcp, + hcp->dup_tlen - hcp->dup_len + nval->size)) { + if ((ret = CDB___ham_dup_convert(dbc)) != 0) + return (ret); + goto do_offpage; + } + + /* Make sure we maintain sort order. */ + if (dbc->dbp->dup_compare != NULL) { + tmp_val2.data = + HKEYDATA_DATA(H_PAIRDATA(hcp->pagep, + hcp->bndx)) + hcp->dup_off + + sizeof(db_indx_t); + tmp_val2.size = hcp->dup_len; + if (dbc->dbp->dup_compare(nval, &tmp_val2) != 0) + return (EINVAL); + } + /* Overwriting a complete duplicate. */ + if ((ret = CDB___ham_make_dup(nval, &tmp_val, + &dbc->rdata.data, &dbc->rdata.size)) != 0) + return (ret); + /* Now fix what we are replacing. */ + tmp_val.doff = hcp->dup_off; + tmp_val.dlen = DUP_SIZE(hcp->dup_len); + + /* Update cursor */ + if (nval->size > hcp->dup_len) + hcp->dup_tlen += (nval->size - hcp->dup_len); + else + hcp->dup_tlen -= (hcp->dup_len - nval->size); + hcp->dup_len = DUP_SIZE(nval->size); + } + myval = &tmp_val; + } else if (!F_ISSET(nval, DB_DBT_PARTIAL)) { + /* Put/overwrite */ + memcpy(&tmp_val, nval, sizeof(*nval)); + F_SET(&tmp_val, DB_DBT_PARTIAL); + tmp_val.doff = 0; + hk = H_PAIRDATA(hcp->pagep, hcp->bndx); + if (HPAGE_PTYPE(hk) == H_OFFPAGE) + memcpy(&tmp_val.dlen, + HOFFPAGE_TLEN(hk), sizeof(u_int32_t)); + else + tmp_val.dlen = LEN_HDATA(hcp->pagep, + hcp->hdr->dbmeta.pagesize,hcp->bndx); + myval = &tmp_val; + } else + /* Regular partial put */ + myval = nval; +doreplace: + return (CDB___ham_replpair(dbc, myval, 0)); +} + +/* + * Given a key and a cursor, sets the cursor to the page/ndx on which + * the key resides. If the key is found, the cursor H_OK flag is set + * and the pagep, bndx, pgno (dpagep, dndx, dpgno) fields are set. + * If the key is not found, the H_OK flag is not set. If the sought + * field is non-0, the pagep, bndx, pgno (dpagep, dndx, dpgno) fields + * are set indicating where an add might take place. If it is 0, + * non of the cursor pointer field are valid. + */ +static int +CDB___ham_lookup(dbc, key, sought, mode) + DBC *dbc; + const DBT *key; + u_int32_t sought; + db_lockmode_t mode; +{ + DB *dbp; + HASH_CURSOR *hcp; + db_pgno_t pgno; + u_int32_t tlen; + int match, ret, t_ret; + u_int8_t *hk; + + dbp = dbc->dbp; + hcp = (HASH_CURSOR *)dbc->internal; + /* + * Set up cursor so that we're looking for space to add an item + * as we cycle through the pages looking for the key. + */ + if ((ret = CDB___ham_item_reset(dbc)) != 0) + return (ret); + hcp->seek_size = sought; + + hcp->bucket = CDB___ham_call_hash(hcp, (u_int8_t *)key->data, key->size); + while (1) { + if ((ret = CDB___ham_item_next(dbc, mode)) != 0) + return (ret); + + if (F_ISSET(hcp, H_NOMORE)) + break; + + hk = H_PAIRKEY(hcp->pagep, hcp->bndx); + switch (HPAGE_PTYPE(hk)) { + case H_OFFPAGE: + memcpy(&tlen, HOFFPAGE_TLEN(hk), sizeof(u_int32_t)); + if (tlen == key->size) { + memcpy(&pgno, + HOFFPAGE_PGNO(hk), sizeof(db_pgno_t)); + if ((ret = CDB___db_moff(dbp, + key, pgno, tlen, NULL, &match)) != 0) + return (ret); + if (match == 0) { + F_SET(hcp, H_OK); + return (0); + } + } + break; + case H_KEYDATA: + if (key->size == + LEN_HKEY(hcp->pagep, dbp->pgsize, hcp->bndx) && + memcmp(key->data, + HKEYDATA_DATA(hk), key->size) == 0) { + F_SET(hcp, H_OK); + return (0); + } + break; + case H_DUPLICATE: + case H_OFFDUP: + /* + * These are errors because keys are never + * duplicated, only data items are. + */ + return (CDB___db_pgfmt(dbp, PGNO(hcp->pagep))); + } + } + + /* + * Item was not found. + */ + + if (sought != 0) + return (ret); + + if ((t_ret = CDB___ham_item_done(dbc, 0)) != 0 && ret == 0) + ret = t_ret; + return (ret); +} + +/* + * CDB___ham_init_dbt -- + * Initialize a dbt using some possibly already allocated storage + * for items. + * + * PUBLIC: int CDB___ham_init_dbt __P((DBT *, u_int32_t, void **, u_int32_t *)); + */ +int +CDB___ham_init_dbt(dbt, size, bufp, sizep) + DBT *dbt; + u_int32_t size; + void **bufp; + u_int32_t *sizep; +{ + int ret; + + memset(dbt, 0, sizeof(*dbt)); + if (*sizep < size) { + if ((ret = CDB___os_realloc(size, NULL, bufp)) != 0) { + *sizep = 0; + return (ret); + } + *sizep = size; + } + dbt->data = *bufp; + dbt->size = size; + return (0); +} + +/* + * Adjust the cursor after an insert or delete. The cursor passed is + * the one that was operated upon; we just need to check any of the + * others. + * + * len indicates the length of the item added/deleted + * add indicates if the item indicated by the cursor has just been + * added (add == 1) or deleted (add == 0). + * dup indicates if the addition occurred into a duplicate set. + * + * PUBLIC: void CDB___ham_c_update + * PUBLIC: __P((HASH_CURSOR *, db_pgno_t, u_int32_t, int, int)); + */ +void +CDB___ham_c_update(hcp, chg_pgno, len, add, is_dup) + HASH_CURSOR *hcp; + db_pgno_t chg_pgno; + u_int32_t len; + int add, is_dup; +{ + DB *dbp; + DBC *cp; + HASH_CURSOR *lcp; + int page_deleted; + + /* + * Regular adds are always at the end of a given page, so we never + * have to adjust anyone's cursor after a regular add. + */ + if (!is_dup && add) + return; + + /* + * Determine if a page was deleted. If this is a regular update + * (i.e., not is_dup) then the deleted page's number will be that in + * chg_pgno, and the pgno in the cursor will be different. If this + * was an onpage-duplicate, then the same conditions apply. If this + * was an off-page duplicate, then we need to verify if hcp->dpgno + * is the same (no delete) or different (delete) than chg_pgno. + */ + if (!is_dup || hcp->dpgno == PGNO_INVALID) + page_deleted = + chg_pgno != PGNO_INVALID && chg_pgno != hcp->pgno; + else + page_deleted = + chg_pgno != PGNO_INVALID && chg_pgno != hcp->dpgno; + + dbp = hcp->dbc->dbp; + MUTEX_THREAD_LOCK(dbp->mutexp); + + for (cp = TAILQ_FIRST(&dbp->active_queue); cp != NULL; + cp = TAILQ_NEXT(cp, links)) { + if (cp->internal == hcp) + continue; + + lcp = (HASH_CURSOR *)cp->internal; + + if (!is_dup && lcp->pgno != chg_pgno) + continue; + + if (is_dup && + ((lcp->dpgno == PGNO_INVALID && lcp->pgno != chg_pgno) || + (lcp->dpgno != PGNO_INVALID && lcp->dpgno != chg_pgno))) + continue; + + if (is_dup && F_ISSET(hcp, H_DELETED)) { + if (lcp->dpgno == PGNO_INVALID) { + if (lcp->pgno != chg_pgno) + continue; + } else if (lcp->dpgno != chg_pgno) + continue; + } + + if (page_deleted) { + if (is_dup) { + lcp->dpgno = hcp->dpgno; + lcp->dndx = hcp->dndx; + } else { + lcp->pgno = hcp->pgno; + lcp->bndx = hcp->bndx; + lcp->bucket = hcp->bucket; + } + F_CLR(lcp, H_ISDUP); + continue; + } + + if (!is_dup && lcp->bndx > hcp->bndx) + lcp->bndx--; + else if (!is_dup && lcp->bndx == hcp->bndx) + if (add) + lcp->bndx++; + else + F_SET(lcp, H_DELETED); + else if (is_dup && hcp->dpgno != PGNO_INVALID && + hcp->dpgno == lcp->dpgno) { + /* Off-page duplicate. */ + if (add && lcp->dndx >= hcp->dndx ) + lcp->dndx++; + else if (!add && lcp->dndx > hcp->dndx) + lcp->dndx--; + else if (!add && lcp->dndx == hcp->dndx) + F_SET(lcp, H_DELETED); + + } else if (is_dup && lcp->pgno == chg_pgno && + lcp->bndx == hcp->bndx) { + /* On-page duplicate. */ + if (add) { + lcp->dup_tlen += len; + if (lcp->dup_off > hcp->dup_off) + lcp->dup_off += len; + if (lcp->dup_off == hcp->dup_off) + lcp->dup_len = len; + } else { + lcp->dup_tlen -= len; + if (lcp->dup_off > hcp->dup_off) + lcp->dup_off -= len; + else if (lcp->dup_off == hcp->dup_off) + F_SET(lcp, H_DELETED); + } + } + } + MUTEX_THREAD_UNLOCK(dbp->mutexp); +} + +/* + * CDB___ham_get_clist -- + * + * Get a list of cursors either on a particular bucket or on a particular + * page and index combination. The former is so that we can update + * cursors on a split. The latter is so we can update cursors when we + * move items off page. + * + * PUBLIC: int CDB___ham_get_clist __P((DB *, + * PUBLIC: db_pgno_t, u_int32_t, HASH_CURSOR ***)); + */ +int +CDB___ham_get_clist(dbp, bucket, indx, listp) + DB *dbp; + db_pgno_t bucket; + u_int32_t indx; + HASH_CURSOR ***listp; +{ + DBC *cp; + int nalloc, nused, ret; + + /* + * Assume that finding anything is the exception, so optimize for + * the case where there aren't any. + */ + nalloc = nused = 0; + *listp = NULL; + + MUTEX_THREAD_LOCK(dbp->mutexp); + + for (cp = TAILQ_FIRST(&dbp->active_queue); cp != NULL; + cp = TAILQ_NEXT(cp, links)) + if ((indx == NDX_INVALID && + ((HASH_CURSOR *)(cp->internal))->bucket == bucket) || + (indx != NDX_INVALID && + ((HASH_CURSOR *)(cp->internal))->pgno == bucket && + ((HASH_CURSOR *)(cp->internal))->bndx == indx)) { + if (nused >= nalloc) { + nalloc += 10; + if ((ret = + CDB___os_realloc(nalloc * sizeof(HASH_CURSOR *), + NULL, listp)) != 0) + return (ret); + } + (*listp)[nused++] = (HASH_CURSOR *)cp->internal; + } + + MUTEX_THREAD_UNLOCK(dbp->mutexp); + if (listp != NULL) { + if (nused >= nalloc) { + nalloc++; + if ((ret = CDB___os_realloc(nalloc * sizeof(HASH_CURSOR *), + NULL, listp)) != 0) + return (ret); + } + (*listp)[nused] = NULL; + } + return (0); +} |