diff options
Diffstat (limited to 'tdecore/tdesycocadict.cpp')
-rw-r--r-- | tdecore/tdesycocadict.cpp | 454 |
1 files changed, 454 insertions, 0 deletions
diff --git a/tdecore/tdesycocadict.cpp b/tdecore/tdesycocadict.cpp new file mode 100644 index 000000000..702c163a7 --- /dev/null +++ b/tdecore/tdesycocadict.cpp @@ -0,0 +1,454 @@ +/* This file is part of the KDE libraries + * Copyright (C) 1999 Waldo Bastian <bastian@kde.org> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License version 2 as published by the Free Software Foundation; + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + **/ + +#include "tdesycocadict.h" +#include "tdesycocaentry.h" +#include "tdesycoca.h" + +#include <tqptrlist.h> +#include <tqvaluelist.h> +#include <kdebug.h> +#include <stdlib.h> + +namespace +{ +struct string_entry { + string_entry(TQString _key, KSycocaEntry *_payload) + { keyStr = _key; key = keyStr.unicode(); length = keyStr.length(); payload = _payload; hash = 0; } + uint hash; + int length; + const TQChar *key; + TQString keyStr; + KSycocaEntry *payload; +}; +} + +template class TQPtrList<string_entry>; + +class KSycocaDictStringList : public TQPtrList<string_entry> +{ +public: + KSycocaDictStringList(); +}; + +KSycocaDictStringList::KSycocaDictStringList() +{ + setAutoDelete(true); +} + +KSycocaDict::KSycocaDict() + : d(0), mStr(0), mOffset(0) +{ +} + +KSycocaDict::KSycocaDict(TQDataStream *str, int offset) + : d(0), mStr(str), mOffset(offset) +{ + TQ_UINT32 test1, test2; + str->device()->at(offset); + (*str) >> test1 >> test2; + if ((test1 > 0x000fffff) || (test2 > 1024)) + { + KSycoca::flagError(); + mHashTableSize = 0; + mOffset = 0; + return; + } + + str->device()->at(offset); + (*str) >> mHashTableSize; + (*str) >> mHashList; + mOffset = str->device()->at(); // Start of hashtable +} + +KSycocaDict::~KSycocaDict() +{ + delete d; +} + +void +KSycocaDict::add(const TQString &key, KSycocaEntry *payload) +{ + if (key.isEmpty()) return; // Not allowed (should never happen) + if (!payload) return; // Not allowed! + if (!d) + { + d = new KSycocaDictStringList(); + } + + string_entry *entry= new string_entry(key, payload); + d->append(entry); +} + +void +KSycocaDict::remove(const TQString &key) +{ + if (d) + { + for(string_entry *entry=d->first(); entry; entry = d->next()) + { + if (entry->keyStr == key) + { + d->remove(); + break; + } + } + } +} + +int +KSycocaDict::find_string(const TQString &key ) +{ + //kdDebug(7011) << TQString("KSycocaDict::find_string(%1)").arg(key) << endl; + + if (!mStr || !mOffset) + { + kdError(7011) << "No database available!" << endl; + return 0; + } + + if (mHashTableSize == 0) + return 0; // Unlikely to find anything :-] + + // Read hash-table data + uint hash = hashKey(key) % mHashTableSize; + //kdDebug(7011) << TQString("hash is %1").arg(hash) << endl; + + uint off = mOffset+sizeof(TQ_INT32)*hash; + //kdDebug(7011) << TQString("off is %1").arg(off,8,16) << endl; + mStr->device()->at( off ); + + TQ_INT32 offset; + (*mStr) >> offset; + + //kdDebug(7011) << TQString("offset is %1").arg(offset,8,16) << endl; + if (offset == 0) + return 0; + + if (offset > 0) + return offset; // Positive ID + + // Lookup duplicate list. + offset = -offset; + + mStr->device()->at(offset); + //kdDebug(7011) << TQString("Looking up duplicate list at %1").arg(offset,8,16) << endl; + + while(true) + { + (*mStr) >> offset; + if (offset == 0) break; + TQString dupkey; + (*mStr) >> dupkey; + //kdDebug(7011) << TQString(">> %1 %2").arg(offset,8,16).arg(dupkey) << endl; + if (dupkey == key) return offset; + } + //kdWarning(7011) << "Not found!" << endl; + + return 0; +} + +uint +KSycocaDict::count() +{ + if (!d) return 0; + + return d->count(); +} + +void +KSycocaDict::clear() +{ + delete d; + d = 0; +} + +uint +KSycocaDict::hashKey( const TQString &key) +{ + int l = key.length(); + register uint h = 0; + + for(uint i = 0; i < mHashList.count(); i++) + { + int pos = mHashList[i]; + if (pos < 0) + { + pos = -pos-1; + if (pos < l) + h = ((h * 13) + (key[l-pos].cell() % 29)) & 0x3ffffff; + } + else + { + pos = pos-1; + if (pos < l) + h = ((h * 13) + (key[pos].cell() % 29)) & 0x3ffffff; + } + } + return h; +} + +// +// Calculate the diversity of the strings at position 'pos' +static int +calcDiversity(KSycocaDictStringList *d, int pos, int sz) +{ + if (pos == 0) return 0; + bool *matrix = (bool *) calloc(sz, sizeof(bool)); + uint usz = sz; + + if (pos < 0) + { + pos = -pos-1; + for(string_entry *entry=d->first(); entry; entry = d->next()) + { + register int l = entry->length; + if (pos < l && pos != 0) + { + register uint hash = ((entry->hash * 13) + (entry->key[l-pos].cell() % 29)) & 0x3ffffff; + matrix[ hash % usz ] = true; + } + } + } + else + { + pos = pos-1; + for(string_entry *entry=d->first(); entry; entry = d->next()) + { + if (pos < entry->length) + { + register uint hash = ((entry->hash * 13) + (entry->key[pos].cell() % 29)) & 0x3ffffff; + matrix[ hash % usz ] = true; + } + } + } + int diversity = 0; + for(int i=0;i< sz;i++) + if (matrix[i]) diversity++; + + free((void *) matrix); + + return diversity; +} + +// +// Add the diversity of the strings at position 'pos' +static void +addDiversity(KSycocaDictStringList *d, int pos) +{ + if (pos == 0) return; + if (pos < 0) + { + pos = -pos-1; + for(string_entry *entry=d->first(); entry; entry = d->next()) + { + register int l = entry->length; + if (pos < l) + entry->hash = ((entry->hash * 13) + (entry->key[l-pos].cell() % 29)) & 0x3fffffff; + } + } + else + { + pos = pos - 1; + for(string_entry *entry=d->first(); entry; entry = d->next()) + { + if (pos < entry->length) + entry->hash = ((entry->hash * 13) + (entry->key[pos].cell() % 29)) & 0x3fffffff; + } + } +} + + +void +KSycocaDict::save(TQDataStream &str) +{ + if (count() == 0) + { + mHashTableSize = 0; + mHashList.clear(); + str << mHashTableSize; + str << mHashList; + return; + } + + mOffset = str.device()->at(); + + //kdDebug(7011) << TQString("KSycocaDict: %1 entries.").arg(count()) << endl; + + //kdDebug(7011) << "Calculating hash keys.." << endl; + + int maxLength = 0; + //kdDebug(7011) << "Finding maximum string length" << endl; + for(string_entry *entry=d->first(); entry; entry = d->next()) + { + entry->hash = 0; + if (entry->length > maxLength) + maxLength = entry->length; + } + + //kdDebug(7011) << TQString("Max string length = %1").arg(maxLength) << endl; + + // use "almost prime" number for sz (to calculate diversity) and later + // for the table size of big tables + // int sz = d->count()*5-1; + register unsigned int sz = count()*4 + 1; + while(!(((sz % 3) && (sz % 5) && (sz % 7) && (sz % 11) && (sz % 13)))) sz+=2; + + int maxDiv = 0; + int maxPos = 0; + int lastDiv = 0; + + mHashList.clear(); + + // try to limit diversity scan by "predicting" positions + // with high diversity + int *oldvec=new int[maxLength*2+1]; + for (int i=0; i<(maxLength*2+1); i++) oldvec[i]=0; + int mindiv=0; + + while(true) + { + int divsum=0,divnum=0; + + maxDiv = 0; + maxPos = 0; + for(int pos=-maxLength; pos <= maxLength; pos++) + { + // cut off + if (oldvec[pos+maxLength]<mindiv) + { oldvec[pos+maxLength]=0; continue; } + + int diversity = calcDiversity(d, pos, sz); + if (diversity > maxDiv) + { + maxDiv = diversity; + maxPos = pos; + } + oldvec[pos+maxLength]=diversity; + divsum+=diversity; divnum++; + } + // arbitrary cut-off value 3/4 of average seems to work + if (divnum) + mindiv=(3*divsum)/(4*divnum); + + if (maxDiv <= lastDiv) + break; + // tqWarning("Max Div = %d at pos %d", maxDiv, maxPos); + lastDiv = maxDiv; + addDiversity(d, maxPos); + mHashList.append(maxPos); + } + + delete [] oldvec; + + for(string_entry *entry=d->first(); entry; entry = d->next()) + { + entry->hash = hashKey(entry->keyStr); + } +// fprintf(stderr, "Calculating minimum table size..\n"); + + mHashTableSize = sz; + + struct hashtable_entry { + string_entry *entry; + TQPtrList<string_entry> *duplicates; + int duplicate_offset; + }; + + hashtable_entry *hashTable = new hashtable_entry[ sz ]; + + //kdDebug(7011) << "Clearing hashtable..." << endl; + for (unsigned int i=0; i < sz; i++) + { + hashTable[i].entry = 0; + hashTable[i].duplicates = 0; + } + + //kdDebug(7011) << "Filling hashtable..." << endl; + for(string_entry *entry=d->first(); entry; entry = d->next()) + { + int hash = entry->hash % sz; + if (!hashTable[hash].entry) + { // First entry + hashTable[hash].entry = entry; + } + else + { + if (!hashTable[hash].duplicates) + { // Second entry, build duplicate list. + hashTable[hash].duplicates = new TQPtrList<string_entry>(); + hashTable[hash].duplicates->append(hashTable[hash].entry); + hashTable[hash].duplicate_offset = 0; + } + hashTable[hash].duplicates->append(entry); + } + } + + str << mHashTableSize; + str << mHashList; + + mOffset = str.device()->at(); // mOffset points to start of hashTable + //kdDebug(7011) << TQString("Start of Hash Table, offset = %1").arg(mOffset,8,16) << endl; + + for(int pass = 1; pass <= 2; pass++) + { + str.device()->at(mOffset); + //kdDebug(7011) << TQString("Writing hash table (pass #%1)").arg(pass) << endl; + for(uint i=0; i < mHashTableSize; i++) + { + TQ_INT32 tmpid; + if (!hashTable[i].entry) + tmpid = (TQ_INT32) 0; + else if (!hashTable[i].duplicates) + tmpid = (TQ_INT32) hashTable[i].entry->payload->offset(); // Positive ID + else + tmpid = (TQ_INT32) -hashTable[i].duplicate_offset; // Negative ID + str << tmpid; + //kdDebug(7011) << TQString("Hash table : %1").arg(tmpid,8,16) << endl; + } + //kdDebug(7011) << TQString("End of Hash Table, offset = %1").arg(str.device()->at(),8,16) << endl; + + //kdDebug(7011) << TQString("Writing duplicate lists (pass #%1)").arg(pass) << endl; + for(uint i=0; i < mHashTableSize; i++) + { + if (hashTable[i].duplicates) + { + TQPtrList<string_entry> *dups = hashTable[i].duplicates; + hashTable[i].duplicate_offset = str.device()->at(); + + /*kdDebug(7011) << TQString("Duplicate lists: Offset = %1 list_size = %2") .arg(hashTable[i].duplicate_offset,8,16).arg(dups->count()) << endl; +*/ + for(string_entry *dup = dups->first(); dup; dup=dups->next()) + { + str << (TQ_INT32) dup->payload->offset(); // Positive ID + str << dup->keyStr; // Key (TQString) + } + str << (TQ_INT32) 0; // End of list marker (0) + } + } + //kdDebug(7011) << TQString("End of Dict, offset = %1").arg(str.device()->at(),8,16) << endl; + } + + //kdDebug(7011) << "Cleaning up hash table." << endl; + for(uint i=0; i < mHashTableSize; i++) + { + delete hashTable[i].duplicates; + } + delete [] hashTable; +} + |