diff options
Diffstat (limited to 'debian/htdig/htdig-3.2.0b6/htword/WordCursorOne.cc')
-rw-r--r-- | debian/htdig/htdig-3.2.0b6/htword/WordCursorOne.cc | 590 |
1 files changed, 590 insertions, 0 deletions
diff --git a/debian/htdig/htdig-3.2.0b6/htword/WordCursorOne.cc b/debian/htdig/htdig-3.2.0b6/htword/WordCursorOne.cc new file mode 100644 index 00000000..011cfc9e --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/htword/WordCursorOne.cc @@ -0,0 +1,590 @@ +// +// WordCursorOne.cc +// +// Part of the ht://Dig package <http://www.htdig.org/> +// Copyright (c) 1999-2004 The ht://Dig Group +// For copyright details, see the file COPYING in your distribution +// or the GNU Library General Public License (LGPL) version 2 or later +// <http://www.gnu.org/copyleft/lgpl.html> +// +// $Id: WordCursorOne.cc,v 1.4 2004/05/28 13:15:26 lha Exp $ +// + +#ifdef HAVE_CONFIG_H +#include "htconfig.h" +#endif /* HAVE_CONFIG_H */ + +#include <stdlib.h> + +#include "WordCursorOne.h" +#include "WordListOne.h" +#include "WordDead.h" + +#include <stdio.h> + +// +// WordCursorOne implementation +// + +// ***************************************************************************** +WordCursorOne::WordCursorOne(WordList *words) : + WordCursor(words->GetContext()), + prefixKey(words->GetContext()) +{ + Clear(); +} + +// ***************************************************************************** +WordCursorOne::WordCursorOne(WordList *words, wordlist_walk_callback_t callback, Object * callback_data) : + WordCursor(words->GetContext()), + prefixKey(words->GetContext()) +{ + Clear(); + Initialize(words, WordKey(words->GetContext()), callback, callback_data, HTDIG_WORDLIST_WALKER); +} + +// ***************************************************************************** +WordCursorOne::WordCursorOne(WordList *words, const WordKey &searchKey, int action = HTDIG_WORDLIST_WALKER) : + WordCursor(words->GetContext()), + prefixKey(words->GetContext()) +{ + Clear(); + Initialize(words, searchKey, 0, 0, action); +} + +// ***************************************************************************** +WordCursorOne::WordCursorOne(WordList *words, const WordKey &searchKey, wordlist_walk_callback_t callback, Object * callback_data) : + WordCursor(words->GetContext()), + prefixKey(words->GetContext()) +{ + Clear(); + Initialize(words, searchKey, callback, callback_data, HTDIG_WORDLIST_WALKER); +} + +// ***************************************************************************** +// +int WordCursorOne::Initialize(WordList *nwords, const WordKey &nsearchKey, wordlist_walk_callback_t ncallback, Object *ncallback_data, int naction) +{ + action = naction; + searchKey = nsearchKey; + callback = ncallback; + callback_data = ncallback_data; + words = nwords; + cursor = ((WordListOne*)nwords)->db->Cursor(); + return OK; +} + +// ***************************************************************************** +// +void +WordCursorOne::Clear() +{ + searchKey.Clear(); + action = 0; + callback = 0; + callback_data = 0; + ClearResult(); + ClearInternal(); + words = 0; + + // + // Debugging section. + // + traceRes = 0; +} + +// ***************************************************************************** +// +void +WordCursorOne::ClearInternal() +{ + key.trunc(); + data.trunc(); + prefixKey.Clear(); + cursor_get_flags = DB_SET_RANGE; + searchKeyIsSameAsPrefix = 0; +} + +// ***************************************************************************** +// +void +WordCursorOne::ClearResult() +{ + collectRes = 0; + found.Clear(); + status = OK; +} + +int +WordCursorOne::ContextRestore(const String& buffer) +{ + int ret = OK; + if(!buffer.empty()) { + WordKey key(words->GetContext(), buffer); + if((ret = Seek(key)) != OK) + return ret; + // + // Move to restored position so that next call to + // WalkNext will go above the restored position. + // + if((ret = WalkNext()) != OK) + return ret; + } + return ret; +} + +// ***************************************************************************** +// +// Walk and collect data from the word database. +// +// If action bit HTDIG_WORDLIST_COLLECTOR is set WordReferences are +// stored in a list and the list is returned. +// If action bit HTDIG_WORDLIST_WALKER is set the <callback> function +// is called for each WordReference found. No list is built and the +// function returns a null pointer. +// +// The <searchKey> argument may be a fully qualified key, containing precise values for each +// field of the key. It may also contain only some fields of the key. In both cases +// all the word occurrences matching the fields set in the key are retrieved. It may +// be fast if key is a prefix (see WordKey::Prefix for a definition). It may +// be *slow* if key is not a prefix because it forces a complete walk of the +// index. +// +int +WordCursorOne::Walk() +{ + int ret; + if((ret = WalkInit()) != OK) return ret; + while((ret = WalkNext()) == OK) + ; + int ret1; + if((ret1 = WalkFinish()) != OK) return ret1; + + return ret == WORD_WALK_ATEND ? OK : NOTOK; +} + +int +WordCursorOne::WalkInit() +{ + ClearResult(); + ClearInternal(); + + WordReference wordRef(words->GetContext()); + + { + int ret; + if((ret = cursor->Open()) != 0) + return ret; + } + + if(words->verbose) fprintf(stderr, "WordCursorOne::WalkInit: action = %d, SearchKey = %s\n", action, (char*)searchKey.Get()); + + if(action & HTDIG_WORDLIST_COLLECTOR) { + collectRes = new List; + } + + WordKey first_key(words->GetContext()); + // + // Move the cursor to start walking and do some sanity checks. + // + if(searchKey.Empty()) { + // + // Move past the stat data + // + if(words->verbose) fprintf(stderr, "WordCursorOne::WalkInit: at start of keys because search key is empty\n"); + + } else { + prefixKey = searchKey; + // + // If the key is a prefix, the start key is + // the longest possible prefix contained in the key. If the + // key does not contain any prefix, start from the beginning + // of the file. + // + if(prefixKey.PrefixOnly() == NOTOK) { + if(words->verbose) fprintf(stderr, "WordCursorOne::WalkInit: at start of keys because search key is not a prefix\n"); + prefixKey.Clear(); + } else { + if(words->verbose) fprintf(stderr, "WordCursorOne::WalkInit: go to %s \n", (char*)prefixKey.Get()); + first_key = prefixKey; + } + } + + first_key.Pack(key); + // + // Allow Seek immediately after Init + // + found.Key() = first_key; + + status = OK; + searchKeyIsSameAsPrefix = searchKey.ExactEqual(prefixKey); + cursor_get_flags = DB_SET_RANGE; + + return OK; +} + +int +WordCursorOne::WalkRewind() +{ + WordKey first_key(words->GetContext()); + // + // Move the cursor to start walking and do some sanity checks. + // + if(searchKey.Empty()) { + first_key.Clear(); + } else { + prefixKey = searchKey; + // + // If the key is a prefix, the start key is + // the longest possible prefix contained in the key. If the + // key does not contain any prefix, start from the beginning + // of the file. + // + if(prefixKey.PrefixOnly() == NOTOK) { + prefixKey.Clear(); + first_key.Clear(); + } else { + first_key = prefixKey; + } + } + + first_key.Pack(key); + // + // Allow Seek immediately after Rewind + // + found.Key() = first_key; + + status = OK; + searchKeyIsSameAsPrefix = searchKey.ExactEqual(prefixKey); + cursor_get_flags = DB_SET_RANGE; + + return OK; +} + +int +WordCursorOne::WalkNext() +{ + int ret; + while((ret = WalkNextStep()) == WORD_WALK_NOMATCH_FAILED) + if(words->verbose > 1) fprintf(stderr, "WordCursorOne::WalkNext: got false match, retry\n"); + + return ret; +} + +int +WordCursorOne::WalkNextStep() +{ + status = OK; + + { + int error; + if((error = cursor->Get(key, data, cursor_get_flags)) != 0) { + if(error == DB_NOTFOUND) { + if(words->verbose) fprintf(stderr, "WordCursorOne::WalkNextStep: looking for %s, no more matches\n", (char*)searchKey.Get()); + return (status = WORD_WALK_ATEND); + } else { + return WORD_WALK_GET_FAILED; + } + } + } + + // + // Next step operation is always sequential walk + // + cursor_get_flags = DB_NEXT; + + found.Unpack(key, data); + + if(words->Dead()->Exists(found.Key())) + return WORD_WALK_NOMATCH_FAILED; + + if(traceRes) traceRes->Add(new WordReference(found)); + + if(words->verbose > 1) fprintf(stderr, "WordCursorOne::WalkNextStep: looking for %s, candidate is %s\n", (char*)searchKey.Get(), (char*)found.Get()); + + // + // Don't bother to compare keys if we want to walk all the entries + // + if(!(searchKey.Empty())) { + // examples + // searchKey: aabc 1 ? ? ? + // prefixKey: aabc 1 ? ? ? + + // + // Stop loop if we reach a record whose key does not + // match prefix key requirement, provided we have a valid + // prefix key. + // (ie. stop loop if we're past last possible match...) + // + if(!prefixKey.Empty() && + !prefixKey.Equal(found.Key())) { + if(words->verbose) fprintf(stderr, "WordCursorOne::WalkNextStep: looking for %s, no more matches because found a key that is greater than searchKey\n", (char*)searchKey.Get()); + return (status = WORD_WALK_ATEND); + } + + // + // Skip entries that do not exactly match the specified key. + // + if(!searchKeyIsSameAsPrefix && + !searchKey.Equal(found.Key())) { + int ret; + switch((ret = SkipUselessSequentialWalking())) { + case OK: + if(words->verbose > 1) fprintf(stderr, "WordCursorOne::WalkNextStep: looking for %s, false match jump to %s\n", (char*)searchKey.Get(), (char*)found.Get()); + return WORD_WALK_NOMATCH_FAILED; + break; + case WORD_WALK_ATEND: + if(words->verbose) fprintf(stderr, "WordCursorOne::WalkNextStep: looking for %s, no more matches according to SkipUselessSequentialWalking\n", (char*)searchKey.Get()); + return (status = WORD_WALK_ATEND); + break; + default: + fprintf(stderr, "WordCursorOne::WalkNextStep: SkipUselessSequentialWalking failed %d\n", ret); + return NOTOK; + break; + } + } + } + + if(words->verbose) fprintf(stderr, "WordCursorOne::WalkNextStep: looking for %s, found %s\n", (char*)searchKey.Get(), (char*)found.Get()); + + if(collectRes) { + if(words->verbose > 2) fprintf(stderr, "WordCursorOne::WalkNextStep: collect\n"); + collectRes->Add(new WordReference(found)); + } else if(callback) { + if(words->verbose > 2) fprintf(stderr, "WordCursorOne::WalkNextStep: calling callback\n"); + int ret = (*callback)(words, *cursor, &found, *(callback_data) ); + // + // The callback function tells us that something went wrong, might + // as well stop walking. + // + if(ret != OK) { + if(words->verbose) fprintf(stderr, "WordCursorOne::WalkNextStep: callback returned NOTOK"); + return WORD_WALK_CALLBACK_FAILED|(status = WORD_WALK_ATEND); + } + } + + return OK; +} + +int +WordCursorOne::WalkFinish() +{ + if(words->verbose) fprintf(stderr, "WordCursorOne::WalkFinish\n"); + + return cursor->Close() == 0 ? OK : NOTOK; +} + +// ***************************************************************************** +// +// Helper for SkipUselessSequentialWalking. +// Undefine in foundKey all fields defined in searchKey +// so that they are not considered by SetToFollowing. +// It could become a method of WordKey but lacks generalisation and +// from what I see it is a rather specific operation. +// +static inline void complement(WordContext* context, WordKey& key, const WordKey& mask) +{ + int nfields = context->GetKeyInfo().nfields; + int i; + // + // Undefine in 'key' all fields defined in 'mask' + // + for(i = 0; i < nfields; i++) { + if(mask.IsDefined(i)) + key.Undefined(i); + else + key.SetDefined(i); + } +} + +// ***************************************************************************** +// +// Find out if we should better jump to the next possible key (DB_SET_RANGE) instead of +// sequential iterating (DB_NEXT). +// If it is decided that jump is a better move : +// cursor_set_flags = DB_SET_RANGE +// key = calculated next possible key +// Else +// do nothing +// Return values +// OK: skipping successfull. +// WORD_WALK_ATEND : no more possible match, reached the maximum +// WORD_WALK_FAILED: general failure, occurs if called and no skipping +// necessary. +// +// Sequential searching can waste time by searching all keys, for example: +// If searching for Key: argh <DEF> <UNDEF> 10 +// Under normal circonstances we would do the following +// +// DATA STATUS ACTION +// 1: argh 1 10 match DB_NEXT +// 2: argh 2 11 nomatch DB_NEXT +// 3: argh 2 15 nomatch DB_NEXT +// 4: argh 2 20 nomatch DB_NEXT +// 5: argh 2 30 nomatch DB_NEXT +// 6: argh 5 1 nomatch DB_NEXT +// 7: argh 5 8 nomatch DB_NEXT +// 8: argh 8 6 nomatch DB_NEXT +// +// But the optimal would be +// +// DATA STATUS ACTION +// 1: argh 1 10 match DB_NEXT +// 2: argh 2 11 nomatch DB_SET_RANGE argh 3 10 +// 3: argh 2 15 +// 4: argh 2 20 +// 5: argh 2 30 +// 6: argh 5 1 nomatch DB_SET_RANGE argh 5 10 +// 7: argh 5 8 +// 8: argh 8 6 nomatch DB_SET_RANGE argh 8 10 +// +// That saves a lot of unecessary hit. The underlying logic is a bit +// more complex but you have the idea. +// +int +WordCursorOne::SkipUselessSequentialWalking() +{ + WordKey& foundKey = found.Key(); + + int nfields = words->GetContext()->GetKeyInfo().nfields; + int i; + + // + // Find out how the searchKey and the foundKey differ. + // + int diff_field = 0; + int lower = 0; + if(!foundKey.Diff(searchKey, diff_field, lower)) { + // + // foundKey matches searchKey (no difference), don't + // skip, everything is fine. The caller of SkipUselessSequentialWalking + // is expected to avoid this case for efficiency. + // + return WORD_WALK_FAILED; + } + + if(words->verbose > 2) fprintf(stderr, "WordCursorOne::SkipUselessSequentialWalking: looking for %s, candidate is %s\n", (char*)searchKey.Get(), (char*)foundKey.Get()); + + // + // Undefine in foundKey all fields defined in searchKey + // so that they are not considered by SetToFollowing. + // + complement(words->GetContext(), foundKey, searchKey); + + // + // If the key found is lower than the searched key when + // considering only the fields defined in the search key, + // we only need to enforce the key to get the match. + // Otherwise we need to increment the found key to jump + // properly. + // + if(lower) { + if(words->verbose > 1) fprintf(stderr, "WordCursorOne::SkipUselessSequentialWalking: enforcing the search constraint is enough to jump forward\n"); + for(i = diff_field + 1; i < nfields; i++) + if(foundKey.IsDefined(i)) foundKey.Set(i, 0); + } else { + if(words->verbose > 1) fprintf(stderr, "WordCursorOne::SkipUselessSequentialWalking: increment the key to jump forward\n"); + // + // diff_field - 1 is not really necessary because diff_field is undefined + // in foundKey and would therefore be ignored by SetToFollowing. We write + // diff_field - 1 to clearly state that incrementing begins just before the + // field for which a difference was found. + // + int ret; + if((ret = foundKey.SetToFollowing(diff_field - 1)) != OK) + return ret; + } + + // + // Copy all fields defined in searchKey into foundKey. This will copy + // searchKey in foundKey because all these fields have been + // previously undefined in foundKey. + // + foundKey.Merge(searchKey); + + if(words->verbose > 2) fprintf(stderr, "WordCursorOne::SkipUselessSequentialWalking: looking for %s, jump to %s\n", (char*)searchKey.Get(), (char*)foundKey.Get()); + + // + // Instruct Next function to jump to the calculated key + // + if(foundKey.Pack(key) == NOTOK) { + return WORD_WALK_FAILED; + } + cursor_get_flags = DB_SET_RANGE; + + return OK; +} + +// ***************************************************************************** +// +// Copy defined fields in patch into foundKey and +// initialize internal state so that WalkNext jumps to +// this key next time it's called. +// +// Technically this means : Override latest key found (found data member) +// with patch fields values, starting from the first field set in +// patch up to the last. Pack the result in the key field and set +// cursor_get_flags to DB_SET_RANGE. +// +int +WordCursorOne::Seek(const WordKey& patch) +{ + int nfields = words->GetContext()->GetKeyInfo().nfields; + WordKey pos = searchKey; + + if(patch.Empty()) { + fprintf(stderr, "WordCursorOne::Seek: empty patch is useless\n"); + return NOTOK; + } + + int i; + // + // Leave the most significant fields untouched + // + for(i = WORD_KEY_WORD + 1; i < nfields; i++) + if(patch.IsDefined(i)) + break; + // + // From the first value set in the patch to the end + // override. + // + for(; i < nfields; i++) { + if(patch.IsDefined(i)) + pos.Set(i, patch.Get(i)); + else + pos.Set(i, 0); + } + + if(!pos.Filled()) { + fprintf(stderr, "WordCursorOne::Seek: only make sense if the resulting key is fully defined\n"); + return NOTOK; + } + + if(words->verbose > 2) fprintf(stderr, "WordCursorOne::Seek: seek to %s\n", (char*)pos.Get()); + + // + // Next move will jump to the patched key + // + pos.Pack(key); + cursor_get_flags = DB_SET_RANGE; + + return OK; +} + +// +// Convert the whole structure to an ascii string description +// +int WordCursorOne::Get(String& bufferout) const +{ + String tmp; + bufferout.trunc(); + + searchKey.Get(tmp); + bufferout << "Input: searchKey = " << tmp << ", action = " << action << "; Output: collectRes " << (collectRes ? "set" : "not set"); + found.Get(tmp); + bufferout << ", found = " << tmp << ", status = " << status; + prefixKey.Get(tmp); + bufferout << "; Internal State: prefixKey = " << tmp << ", cursor_get_flags = " << cursor_get_flags; + + return OK; +} |