summaryrefslogtreecommitdiffstats
path: root/debian/htdig/htdig-3.2.0b6/htword/WordCursorOne.cc
diff options
context:
space:
mode:
Diffstat (limited to 'debian/htdig/htdig-3.2.0b6/htword/WordCursorOne.cc')
-rw-r--r--debian/htdig/htdig-3.2.0b6/htword/WordCursorOne.cc590
1 files changed, 590 insertions, 0 deletions
diff --git a/debian/htdig/htdig-3.2.0b6/htword/WordCursorOne.cc b/debian/htdig/htdig-3.2.0b6/htword/WordCursorOne.cc
new file mode 100644
index 00000000..011cfc9e
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htword/WordCursorOne.cc
@@ -0,0 +1,590 @@
+//
+// WordCursorOne.cc
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: WordCursorOne.cc,v 1.4 2004/05/28 13:15:26 lha Exp $
+//
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#include <stdlib.h>
+
+#include "WordCursorOne.h"
+#include "WordListOne.h"
+#include "WordDead.h"
+
+#include <stdio.h>
+
+//
+// WordCursorOne implementation
+//
+
+// *****************************************************************************
+WordCursorOne::WordCursorOne(WordList *words) :
+ WordCursor(words->GetContext()),
+ prefixKey(words->GetContext())
+{
+ Clear();
+}
+
+// *****************************************************************************
+WordCursorOne::WordCursorOne(WordList *words, wordlist_walk_callback_t callback, Object * callback_data) :
+ WordCursor(words->GetContext()),
+ prefixKey(words->GetContext())
+{
+ Clear();
+ Initialize(words, WordKey(words->GetContext()), callback, callback_data, HTDIG_WORDLIST_WALKER);
+}
+
+// *****************************************************************************
+WordCursorOne::WordCursorOne(WordList *words, const WordKey &searchKey, int action = HTDIG_WORDLIST_WALKER) :
+ WordCursor(words->GetContext()),
+ prefixKey(words->GetContext())
+{
+ Clear();
+ Initialize(words, searchKey, 0, 0, action);
+}
+
+// *****************************************************************************
+WordCursorOne::WordCursorOne(WordList *words, const WordKey &searchKey, wordlist_walk_callback_t callback, Object * callback_data) :
+ WordCursor(words->GetContext()),
+ prefixKey(words->GetContext())
+{
+ Clear();
+ Initialize(words, searchKey, callback, callback_data, HTDIG_WORDLIST_WALKER);
+}
+
+// *****************************************************************************
+//
+int WordCursorOne::Initialize(WordList *nwords, const WordKey &nsearchKey, wordlist_walk_callback_t ncallback, Object *ncallback_data, int naction)
+{
+ action = naction;
+ searchKey = nsearchKey;
+ callback = ncallback;
+ callback_data = ncallback_data;
+ words = nwords;
+ cursor = ((WordListOne*)nwords)->db->Cursor();
+ return OK;
+}
+
+// *****************************************************************************
+//
+void
+WordCursorOne::Clear()
+{
+ searchKey.Clear();
+ action = 0;
+ callback = 0;
+ callback_data = 0;
+ ClearResult();
+ ClearInternal();
+ words = 0;
+
+ //
+ // Debugging section.
+ //
+ traceRes = 0;
+}
+
+// *****************************************************************************
+//
+void
+WordCursorOne::ClearInternal()
+{
+ key.trunc();
+ data.trunc();
+ prefixKey.Clear();
+ cursor_get_flags = DB_SET_RANGE;
+ searchKeyIsSameAsPrefix = 0;
+}
+
+// *****************************************************************************
+//
+void
+WordCursorOne::ClearResult()
+{
+ collectRes = 0;
+ found.Clear();
+ status = OK;
+}
+
+int
+WordCursorOne::ContextRestore(const String& buffer)
+{
+ int ret = OK;
+ if(!buffer.empty()) {
+ WordKey key(words->GetContext(), buffer);
+ if((ret = Seek(key)) != OK)
+ return ret;
+ //
+ // Move to restored position so that next call to
+ // WalkNext will go above the restored position.
+ //
+ if((ret = WalkNext()) != OK)
+ return ret;
+ }
+ return ret;
+}
+
+// *****************************************************************************
+//
+// Walk and collect data from the word database.
+//
+// If action bit HTDIG_WORDLIST_COLLECTOR is set WordReferences are
+// stored in a list and the list is returned.
+// If action bit HTDIG_WORDLIST_WALKER is set the <callback> function
+// is called for each WordReference found. No list is built and the
+// function returns a null pointer.
+//
+// The <searchKey> argument may be a fully qualified key, containing precise values for each
+// field of the key. It may also contain only some fields of the key. In both cases
+// all the word occurrences matching the fields set in the key are retrieved. It may
+// be fast if key is a prefix (see WordKey::Prefix for a definition). It may
+// be *slow* if key is not a prefix because it forces a complete walk of the
+// index.
+//
+int
+WordCursorOne::Walk()
+{
+ int ret;
+ if((ret = WalkInit()) != OK) return ret;
+ while((ret = WalkNext()) == OK)
+ ;
+ int ret1;
+ if((ret1 = WalkFinish()) != OK) return ret1;
+
+ return ret == WORD_WALK_ATEND ? OK : NOTOK;
+}
+
+int
+WordCursorOne::WalkInit()
+{
+ ClearResult();
+ ClearInternal();
+
+ WordReference wordRef(words->GetContext());
+
+ {
+ int ret;
+ if((ret = cursor->Open()) != 0)
+ return ret;
+ }
+
+ if(words->verbose) fprintf(stderr, "WordCursorOne::WalkInit: action = %d, SearchKey = %s\n", action, (char*)searchKey.Get());
+
+ if(action & HTDIG_WORDLIST_COLLECTOR) {
+ collectRes = new List;
+ }
+
+ WordKey first_key(words->GetContext());
+ //
+ // Move the cursor to start walking and do some sanity checks.
+ //
+ if(searchKey.Empty()) {
+ //
+ // Move past the stat data
+ //
+ if(words->verbose) fprintf(stderr, "WordCursorOne::WalkInit: at start of keys because search key is empty\n");
+
+ } else {
+ prefixKey = searchKey;
+ //
+ // If the key is a prefix, the start key is
+ // the longest possible prefix contained in the key. If the
+ // key does not contain any prefix, start from the beginning
+ // of the file.
+ //
+ if(prefixKey.PrefixOnly() == NOTOK) {
+ if(words->verbose) fprintf(stderr, "WordCursorOne::WalkInit: at start of keys because search key is not a prefix\n");
+ prefixKey.Clear();
+ } else {
+ if(words->verbose) fprintf(stderr, "WordCursorOne::WalkInit: go to %s \n", (char*)prefixKey.Get());
+ first_key = prefixKey;
+ }
+ }
+
+ first_key.Pack(key);
+ //
+ // Allow Seek immediately after Init
+ //
+ found.Key() = first_key;
+
+ status = OK;
+ searchKeyIsSameAsPrefix = searchKey.ExactEqual(prefixKey);
+ cursor_get_flags = DB_SET_RANGE;
+
+ return OK;
+}
+
+int
+WordCursorOne::WalkRewind()
+{
+ WordKey first_key(words->GetContext());
+ //
+ // Move the cursor to start walking and do some sanity checks.
+ //
+ if(searchKey.Empty()) {
+ first_key.Clear();
+ } else {
+ prefixKey = searchKey;
+ //
+ // If the key is a prefix, the start key is
+ // the longest possible prefix contained in the key. If the
+ // key does not contain any prefix, start from the beginning
+ // of the file.
+ //
+ if(prefixKey.PrefixOnly() == NOTOK) {
+ prefixKey.Clear();
+ first_key.Clear();
+ } else {
+ first_key = prefixKey;
+ }
+ }
+
+ first_key.Pack(key);
+ //
+ // Allow Seek immediately after Rewind
+ //
+ found.Key() = first_key;
+
+ status = OK;
+ searchKeyIsSameAsPrefix = searchKey.ExactEqual(prefixKey);
+ cursor_get_flags = DB_SET_RANGE;
+
+ return OK;
+}
+
+int
+WordCursorOne::WalkNext()
+{
+ int ret;
+ while((ret = WalkNextStep()) == WORD_WALK_NOMATCH_FAILED)
+ if(words->verbose > 1) fprintf(stderr, "WordCursorOne::WalkNext: got false match, retry\n");
+
+ return ret;
+}
+
+int
+WordCursorOne::WalkNextStep()
+{
+ status = OK;
+
+ {
+ int error;
+ if((error = cursor->Get(key, data, cursor_get_flags)) != 0) {
+ if(error == DB_NOTFOUND) {
+ if(words->verbose) fprintf(stderr, "WordCursorOne::WalkNextStep: looking for %s, no more matches\n", (char*)searchKey.Get());
+ return (status = WORD_WALK_ATEND);
+ } else {
+ return WORD_WALK_GET_FAILED;
+ }
+ }
+ }
+
+ //
+ // Next step operation is always sequential walk
+ //
+ cursor_get_flags = DB_NEXT;
+
+ found.Unpack(key, data);
+
+ if(words->Dead()->Exists(found.Key()))
+ return WORD_WALK_NOMATCH_FAILED;
+
+ if(traceRes) traceRes->Add(new WordReference(found));
+
+ if(words->verbose > 1) fprintf(stderr, "WordCursorOne::WalkNextStep: looking for %s, candidate is %s\n", (char*)searchKey.Get(), (char*)found.Get());
+
+ //
+ // Don't bother to compare keys if we want to walk all the entries
+ //
+ if(!(searchKey.Empty())) {
+ // examples
+ // searchKey: aabc 1 ? ? ?
+ // prefixKey: aabc 1 ? ? ?
+
+ //
+ // Stop loop if we reach a record whose key does not
+ // match prefix key requirement, provided we have a valid
+ // prefix key.
+ // (ie. stop loop if we're past last possible match...)
+ //
+ if(!prefixKey.Empty() &&
+ !prefixKey.Equal(found.Key())) {
+ if(words->verbose) fprintf(stderr, "WordCursorOne::WalkNextStep: looking for %s, no more matches because found a key that is greater than searchKey\n", (char*)searchKey.Get());
+ return (status = WORD_WALK_ATEND);
+ }
+
+ //
+ // Skip entries that do not exactly match the specified key.
+ //
+ if(!searchKeyIsSameAsPrefix &&
+ !searchKey.Equal(found.Key())) {
+ int ret;
+ switch((ret = SkipUselessSequentialWalking())) {
+ case OK:
+ if(words->verbose > 1) fprintf(stderr, "WordCursorOne::WalkNextStep: looking for %s, false match jump to %s\n", (char*)searchKey.Get(), (char*)found.Get());
+ return WORD_WALK_NOMATCH_FAILED;
+ break;
+ case WORD_WALK_ATEND:
+ if(words->verbose) fprintf(stderr, "WordCursorOne::WalkNextStep: looking for %s, no more matches according to SkipUselessSequentialWalking\n", (char*)searchKey.Get());
+ return (status = WORD_WALK_ATEND);
+ break;
+ default:
+ fprintf(stderr, "WordCursorOne::WalkNextStep: SkipUselessSequentialWalking failed %d\n", ret);
+ return NOTOK;
+ break;
+ }
+ }
+ }
+
+ if(words->verbose) fprintf(stderr, "WordCursorOne::WalkNextStep: looking for %s, found %s\n", (char*)searchKey.Get(), (char*)found.Get());
+
+ if(collectRes) {
+ if(words->verbose > 2) fprintf(stderr, "WordCursorOne::WalkNextStep: collect\n");
+ collectRes->Add(new WordReference(found));
+ } else if(callback) {
+ if(words->verbose > 2) fprintf(stderr, "WordCursorOne::WalkNextStep: calling callback\n");
+ int ret = (*callback)(words, *cursor, &found, *(callback_data) );
+ //
+ // The callback function tells us that something went wrong, might
+ // as well stop walking.
+ //
+ if(ret != OK) {
+ if(words->verbose) fprintf(stderr, "WordCursorOne::WalkNextStep: callback returned NOTOK");
+ return WORD_WALK_CALLBACK_FAILED|(status = WORD_WALK_ATEND);
+ }
+ }
+
+ return OK;
+}
+
+int
+WordCursorOne::WalkFinish()
+{
+ if(words->verbose) fprintf(stderr, "WordCursorOne::WalkFinish\n");
+
+ return cursor->Close() == 0 ? OK : NOTOK;
+}
+
+// *****************************************************************************
+//
+// Helper for SkipUselessSequentialWalking.
+// Undefine in foundKey all fields defined in searchKey
+// so that they are not considered by SetToFollowing.
+// It could become a method of WordKey but lacks generalisation and
+// from what I see it is a rather specific operation.
+//
+static inline void complement(WordContext* context, WordKey& key, const WordKey& mask)
+{
+ int nfields = context->GetKeyInfo().nfields;
+ int i;
+ //
+ // Undefine in 'key' all fields defined in 'mask'
+ //
+ for(i = 0; i < nfields; i++) {
+ if(mask.IsDefined(i))
+ key.Undefined(i);
+ else
+ key.SetDefined(i);
+ }
+}
+
+// *****************************************************************************
+//
+// Find out if we should better jump to the next possible key (DB_SET_RANGE) instead of
+// sequential iterating (DB_NEXT).
+// If it is decided that jump is a better move :
+// cursor_set_flags = DB_SET_RANGE
+// key = calculated next possible key
+// Else
+// do nothing
+// Return values
+// OK: skipping successfull.
+// WORD_WALK_ATEND : no more possible match, reached the maximum
+// WORD_WALK_FAILED: general failure, occurs if called and no skipping
+// necessary.
+//
+// Sequential searching can waste time by searching all keys, for example:
+// If searching for Key: argh <DEF> <UNDEF> 10
+// Under normal circonstances we would do the following
+//
+// DATA STATUS ACTION
+// 1: argh 1 10 match DB_NEXT
+// 2: argh 2 11 nomatch DB_NEXT
+// 3: argh 2 15 nomatch DB_NEXT
+// 4: argh 2 20 nomatch DB_NEXT
+// 5: argh 2 30 nomatch DB_NEXT
+// 6: argh 5 1 nomatch DB_NEXT
+// 7: argh 5 8 nomatch DB_NEXT
+// 8: argh 8 6 nomatch DB_NEXT
+//
+// But the optimal would be
+//
+// DATA STATUS ACTION
+// 1: argh 1 10 match DB_NEXT
+// 2: argh 2 11 nomatch DB_SET_RANGE argh 3 10
+// 3: argh 2 15
+// 4: argh 2 20
+// 5: argh 2 30
+// 6: argh 5 1 nomatch DB_SET_RANGE argh 5 10
+// 7: argh 5 8
+// 8: argh 8 6 nomatch DB_SET_RANGE argh 8 10
+//
+// That saves a lot of unecessary hit. The underlying logic is a bit
+// more complex but you have the idea.
+//
+int
+WordCursorOne::SkipUselessSequentialWalking()
+{
+ WordKey& foundKey = found.Key();
+
+ int nfields = words->GetContext()->GetKeyInfo().nfields;
+ int i;
+
+ //
+ // Find out how the searchKey and the foundKey differ.
+ //
+ int diff_field = 0;
+ int lower = 0;
+ if(!foundKey.Diff(searchKey, diff_field, lower)) {
+ //
+ // foundKey matches searchKey (no difference), don't
+ // skip, everything is fine. The caller of SkipUselessSequentialWalking
+ // is expected to avoid this case for efficiency.
+ //
+ return WORD_WALK_FAILED;
+ }
+
+ if(words->verbose > 2) fprintf(stderr, "WordCursorOne::SkipUselessSequentialWalking: looking for %s, candidate is %s\n", (char*)searchKey.Get(), (char*)foundKey.Get());
+
+ //
+ // Undefine in foundKey all fields defined in searchKey
+ // so that they are not considered by SetToFollowing.
+ //
+ complement(words->GetContext(), foundKey, searchKey);
+
+ //
+ // If the key found is lower than the searched key when
+ // considering only the fields defined in the search key,
+ // we only need to enforce the key to get the match.
+ // Otherwise we need to increment the found key to jump
+ // properly.
+ //
+ if(lower) {
+ if(words->verbose > 1) fprintf(stderr, "WordCursorOne::SkipUselessSequentialWalking: enforcing the search constraint is enough to jump forward\n");
+ for(i = diff_field + 1; i < nfields; i++)
+ if(foundKey.IsDefined(i)) foundKey.Set(i, 0);
+ } else {
+ if(words->verbose > 1) fprintf(stderr, "WordCursorOne::SkipUselessSequentialWalking: increment the key to jump forward\n");
+ //
+ // diff_field - 1 is not really necessary because diff_field is undefined
+ // in foundKey and would therefore be ignored by SetToFollowing. We write
+ // diff_field - 1 to clearly state that incrementing begins just before the
+ // field for which a difference was found.
+ //
+ int ret;
+ if((ret = foundKey.SetToFollowing(diff_field - 1)) != OK)
+ return ret;
+ }
+
+ //
+ // Copy all fields defined in searchKey into foundKey. This will copy
+ // searchKey in foundKey because all these fields have been
+ // previously undefined in foundKey.
+ //
+ foundKey.Merge(searchKey);
+
+ if(words->verbose > 2) fprintf(stderr, "WordCursorOne::SkipUselessSequentialWalking: looking for %s, jump to %s\n", (char*)searchKey.Get(), (char*)foundKey.Get());
+
+ //
+ // Instruct Next function to jump to the calculated key
+ //
+ if(foundKey.Pack(key) == NOTOK) {
+ return WORD_WALK_FAILED;
+ }
+ cursor_get_flags = DB_SET_RANGE;
+
+ return OK;
+}
+
+// *****************************************************************************
+//
+// Copy defined fields in patch into foundKey and
+// initialize internal state so that WalkNext jumps to
+// this key next time it's called.
+//
+// Technically this means : Override latest key found (found data member)
+// with patch fields values, starting from the first field set in
+// patch up to the last. Pack the result in the key field and set
+// cursor_get_flags to DB_SET_RANGE.
+//
+int
+WordCursorOne::Seek(const WordKey& patch)
+{
+ int nfields = words->GetContext()->GetKeyInfo().nfields;
+ WordKey pos = searchKey;
+
+ if(patch.Empty()) {
+ fprintf(stderr, "WordCursorOne::Seek: empty patch is useless\n");
+ return NOTOK;
+ }
+
+ int i;
+ //
+ // Leave the most significant fields untouched
+ //
+ for(i = WORD_KEY_WORD + 1; i < nfields; i++)
+ if(patch.IsDefined(i))
+ break;
+ //
+ // From the first value set in the patch to the end
+ // override.
+ //
+ for(; i < nfields; i++) {
+ if(patch.IsDefined(i))
+ pos.Set(i, patch.Get(i));
+ else
+ pos.Set(i, 0);
+ }
+
+ if(!pos.Filled()) {
+ fprintf(stderr, "WordCursorOne::Seek: only make sense if the resulting key is fully defined\n");
+ return NOTOK;
+ }
+
+ if(words->verbose > 2) fprintf(stderr, "WordCursorOne::Seek: seek to %s\n", (char*)pos.Get());
+
+ //
+ // Next move will jump to the patched key
+ //
+ pos.Pack(key);
+ cursor_get_flags = DB_SET_RANGE;
+
+ return OK;
+}
+
+//
+// Convert the whole structure to an ascii string description
+//
+int WordCursorOne::Get(String& bufferout) const
+{
+ String tmp;
+ bufferout.trunc();
+
+ searchKey.Get(tmp);
+ bufferout << "Input: searchKey = " << tmp << ", action = " << action << "; Output: collectRes " << (collectRes ? "set" : "not set");
+ found.Get(tmp);
+ bufferout << ", found = " << tmp << ", status = " << status;
+ prefixKey.Get(tmp);
+ bufferout << "; Internal State: prefixKey = " << tmp << ", cursor_get_flags = " << cursor_get_flags;
+
+ return OK;
+}