diff options
Diffstat (limited to 'debian/htdig/htdig-3.2.0b6/htword/WordDBCache.cc')
-rw-r--r-- | debian/htdig/htdig-3.2.0b6/htword/WordDBCache.cc | 411 |
1 files changed, 411 insertions, 0 deletions
diff --git a/debian/htdig/htdig-3.2.0b6/htword/WordDBCache.cc b/debian/htdig/htdig-3.2.0b6/htword/WordDBCache.cc new file mode 100644 index 00000000..2f7a988a --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/htword/WordDBCache.cc @@ -0,0 +1,411 @@ +// +// Part of the ht://Dig package <http://www.htdig.org/> +// Copyright (c) 1999-2004 The ht://Dig Group +// For copyright details, see the file COPYING in your distribution +// or the GNU Library General Public License (LGPL) version 2 or later +// <http://www.gnu.org/copyleft/lgpl.html> +// +#ifdef HAVE_CONFIG_H +#include "htconfig.h" +#endif /* HAVE_CONFIG_H */ + +#include <stdio.h> + +#ifndef _MSC_VER /* _WIN32 */ +#include <unistd.h> +#endif + +#include <stdlib.h> +#include <sys/stat.h> + +#include "WordKey.h" +#include "WordDB.h" +#include "WordDBCache.h" +#include "WordMeta.h" +#include "ber.h" + +int WordDBCaches::Add(char* key, int key_size, char* data, int data_size) +{ + int ret; + if((ret = cache.Allocate(key_size + data_size)) == ENOMEM) { + if((ret = CacheFlush()) != 0) return ret; + if((ret = cache.Allocate(key_size + data_size))) return ret; + } + + return cache.Add(key, key_size, data, data_size); +} + +int WordDBCaches::AddFile(String& filename) +{ + char tmp[32]; + unsigned int serial; + words->Meta()->Serial(WORD_META_SERIAL_FILE, serial); + if(serial == WORD_META_SERIAL_INVALID) + return NOTOK; + filename = words->Filename(); + sprintf(tmp, "C%08d", serial - 1); + filename << tmp; + + String dummy; + if(files->Put(0, filename, dummy, 0) != 0) + return NOTOK; + + size = (cache.GetMax() / 1024) * serial; + + return OK; +} + +int WordDBCaches::CacheFlush() +{ + if(cache.Empty()) return OK; + + if(cache.Sort() != OK) return NOTOK; + String filename; + int locking = 0; + if(!lock) { + words->Meta()->Lock("cache", lock); + locking = 1; + } + if(AddFile(filename) != OK) return NOTOK; + if(CacheWrite(filename) != OK) return NOTOK; + + unsigned int serial; + words->Meta()->GetSerial(WORD_META_SERIAL_FILE, serial); + if(serial >= (unsigned int)file_max || Full()) + if(Merge() != OK) return NOTOK; + if(locking) words->Meta()->Unlock("cache", lock); + + return OK; +} + +static int merge_cmp_size(WordDBCaches* , WordDBCacheFile* a, WordDBCacheFile* b) +{ + return b->size - a->size; +} + +int WordDBCaches::Merge() +{ + if(CacheFlush() != OK) return NOTOK; + + int locking = 0; + if(!lock) { + words->Meta()->Lock("cache", lock); + locking = 1; + } + unsigned int serial; + words->Meta()->GetSerial(WORD_META_SERIAL_FILE, serial); + if(serial <= 1) return OK; + + // + // heap lists all the files in decreasing size order (biggest first) + // + WordDBCacheFile* heap = new WordDBCacheFile[serial]; + { + String filename; + String dummy; + WordDBCursor* cursor = files->Cursor(); + struct stat stat_buf; + int i; + int ret; + for(i = 0; (ret = cursor->Get(filename, dummy, DB_NEXT)) == 0; i++) { + WordDBCacheFile& file = heap[i]; + file.filename = filename; + if(stat((char*)file.filename, &stat_buf) == 0) { + file.size = stat_buf.st_size; + } else { + const String message = String("WordDBCaches::Merge: cannot stat ") + file.filename; + perror((const char*)message); + return NOTOK; + } + cursor->Del(); + } + delete cursor; + myqsort((void*)heap, serial, sizeof(WordDBCacheFile), (myqsort_cmp)merge_cmp_size, (void*)this); + } + + String tmpname = words->Filename() + String("C.tmp"); + + while(serial > 1) { + WordDBCacheFile* a = &heap[serial - 1]; + WordDBCacheFile* b = &heap[serial - 2]; + + if(Merge(a->filename, b->filename, tmpname) != OK) return NOTOK; + + // + // Remove file a + // + if(unlink((char*)a->filename) != 0) { + const String message = String("WordDBCaches::Merge: unlink ") + a->filename; + perror((const char*)message); + return NOTOK; + } + + // + // Remove file b + // + if(unlink((char*)b->filename) != 0) { + const String message = String("WordDBCaches::Merge: unlink ") + b->filename; + perror((const char*)message); + return NOTOK; + } + + // + // Rename tmp file into file b + // + if(rename((char*)tmpname, (char*)b->filename) != 0) { + const String message = String("WordDBCaches::Merge: rename ") + tmpname + String(" ") + b->filename; + perror((const char*)message); + return NOTOK; + } + + // + // Update b file size. The size need not be accurate number as long + // as it reflects the relative size of each file. + // + b->size += a->size; + + serial--; + // + // update heap + // + myqsort((void*)heap, serial, sizeof(WordDBCacheFile), (myqsort_cmp)merge_cmp_size, (void*)this); + } + + { + String newname(words->Filename()); + newname << "C00000000"; + + if(rename((char*)heap[0].filename, (char*)newname) != 0) { + const String message = String("WordDBCaches::Merge: rename ") + heap[0].filename + String(" ") + newname; + perror((const char*)message); + return NOTOK; + } + + String dummy; + if(files->Put(0, newname, dummy, 0) != 0) + return NOTOK; + words->Meta()->SetSerial(WORD_META_SERIAL_FILE, serial); + } + if(locking) words->Meta()->Unlock("cache", lock); + + return OK; +} + +int WordDBCaches::Merge(const String& filea, const String& fileb, const String& tmpname) +{ + FILE* ftmp = fopen((const char*)tmpname, "w"); + FILE* fa = fopen((const char*)filea, "r"); + FILE* fb = fopen((const char*)fileb, "r"); + + unsigned int buffertmp_size = 128; + unsigned char* buffertmp = (unsigned char*)malloc(buffertmp_size); + unsigned int buffera_size = 128; + unsigned char* buffera = (unsigned char*)malloc(buffera_size); + unsigned int bufferb_size = 128; + unsigned char* bufferb = (unsigned char*)malloc(bufferb_size); + + unsigned int entriesa_length; + if(ber_file2value(fa, entriesa_length) < 1) return NOTOK; + unsigned int entriesb_length; + if(ber_file2value(fb, entriesb_length) < 1) return NOTOK; + + if(ber_value2file(ftmp, entriesa_length + entriesb_length) < 1) return NOTOK; + + WordDBCacheEntry entrya; + WordDBCacheEntry entryb; + + if(entriesa_length > 0 && entriesb_length > 0) { + + if(ReadEntry(fa, entrya, buffera, buffera_size) != OK) return NOTOK; + if(ReadEntry(fb, entryb, bufferb, bufferb_size) != OK) return NOTOK; + + while(entriesa_length > 0 && entriesb_length > 0) { + if(WordKey::Compare(words->GetContext(), (const unsigned char*)entrya.key, entrya.key_size, (const unsigned char*)entryb.key, entryb.key_size) < 0) { + if(WriteEntry(ftmp, entrya, buffertmp, buffertmp_size) != OK) return NOTOK; + if(--entriesa_length > 0) + if(ReadEntry(fa, entrya, buffera, buffera_size) != OK) return NOTOK; + } else { + if(WriteEntry(ftmp, entryb, buffertmp, buffertmp_size) != OK) return NOTOK; + if(--entriesb_length > 0) + if(ReadEntry(fb, entryb, bufferb, bufferb_size) != OK) return NOTOK; + } + } + } + + if(entriesa_length > 0 || entriesb_length > 0) { + FILE* fp = entriesa_length > 0 ? fa : fb; + unsigned int& entries_length = entriesa_length > 0 ? entriesa_length : entriesb_length; + WordDBCacheEntry& entry = entriesa_length > 0 ? entrya : entryb; + while(entries_length > 0) { + if(WriteEntry(ftmp, entry, buffertmp, buffertmp_size) != OK) return NOTOK; + if(--entries_length > 0) + if(ReadEntry(fp, entry, buffera, buffera_size) != OK) return NOTOK; + } + } + + free(buffera); + free(bufferb); + free(buffertmp); + + fclose(fa); + fclose(fb); + fclose(ftmp); + + return OK; +} + +int WordDBCaches::Merge(WordDB& db) +{ + int locking = 0; + if(!lock) { + words->Meta()->Lock("cache", lock); + locking = 1; + } + if(Merge() != OK) return NOTOK; + + String filename; + String dummy; + WordDBCursor* cursor = files->Cursor(); + if(cursor->Get(filename, dummy, DB_FIRST) != 0) { + delete cursor; + return NOTOK; + } + cursor->Del(); + delete cursor; + + FILE* fp = fopen((char*)filename, "r"); + + unsigned int buffer_size = 128; + unsigned char* buffer = (unsigned char*)malloc(buffer_size); + + unsigned int entries_length; + if(ber_file2value(fp, entries_length) < 1) return NOTOK; + + WordDBCacheEntry entry; + + unsigned int i; + for(i = 0; i < entries_length; i++) { + if(ReadEntry(fp, entry, buffer, buffer_size) != OK) return NOTOK; + void* user_data = words->GetContext(); + WORD_DBT_INIT(rkey, (void*)entry.key, entry.key_size); + WORD_DBT_INIT(rdata, (void*)entry.data, entry.data_size); + db.db->put(db.db, 0, &rkey, &rdata, 0); + } + + if(unlink((char*)filename) != 0) { + const String message = String("WordDBCaches::Merge: unlink ") + filename; + perror((const char*)message); + return NOTOK; + } + + words->Meta()->SetSerial(WORD_META_SERIAL_FILE, 0); + if(locking) words->Meta()->Unlock("cache", lock); + size = 0; + free(buffer); + fclose(fp); + + return OK; +} + +int WordDBCaches::CacheWrite(const String& filename) +{ + FILE* fp = fopen(filename, "w"); + if(!fp) { + String message; + message << "WordDBCaches::CacheWrite()" << filename << "): "; + perror((char*)message); + return NOTOK; + } + + int entries_length; + WordDBCacheEntry* entries; + int ret; + if((ret = cache.Entries(entries, entries_length)) != 0) + return ret; + + if(ber_value2file(fp, entries_length) < 1) return NOTOK; + + unsigned int buffer_size = 1024; + unsigned char* buffer = (unsigned char*)malloc(buffer_size); + int i; + for(i = 0; i < entries_length; i++) { + if(WriteEntry(fp, entries[i], buffer, buffer_size) != OK) return NOTOK; + } + free(buffer); + fclose(fp); + + cache.Flush(); + + return OK; +} + +int WordDBCaches::WriteEntry(FILE* fp, WordDBCacheEntry& entry, unsigned char*& buffer, unsigned int& buffer_size) +{ + if(entry.key_size + entry.data_size + 64 > buffer_size) { + buffer_size = entry.key_size + entry.data_size + 64; + buffer = (unsigned char*)realloc(buffer, buffer_size); + } + + int p_size = buffer_size; + unsigned char* p = buffer; + + int ber_len; + if((ber_len = ber_value2buf(p, p_size, entry.key_size)) < 1) { + fprintf(stderr, "WordDBCaches::WriteEntry: BER failed for key %d\n", entry.key_size); + return NOTOK; + } + p += ber_len; + memcpy(p, entry.key, entry.key_size); + p += entry.key_size; + + p_size -= ber_len + entry.key_size; + + if((ber_len = ber_value2buf(p, p_size, entry.data_size)) < 1) { + fprintf(stderr, "WordDBCaches::WriteEntry: BER failed for data %d\n", entry.data_size); + return NOTOK; + } + p += ber_len; + memcpy(p, entry.data, entry.data_size); + p += entry.data_size; + + if(fwrite((void*)buffer, p - buffer, 1, fp) != 1) { + perror("WordDBCaches::WriteEntry: cannot write entry "); + return NOTOK; + } + + return OK; +} + +int WordDBCaches::ReadEntry(FILE* fp, WordDBCacheEntry& entry, unsigned char*& buffer, unsigned int& buffer_size) +{ + if(ber_file2value(fp, entry.key_size) < 1) return NOTOK; + + if(entry.key_size > buffer_size) { + buffer_size += entry.key_size; + if(!(buffer = (unsigned char*)realloc(buffer, buffer_size))) return NOTOK; + } + + if(fread((void*)buffer, entry.key_size, 1, fp) != 1) { + perror("WordDBCaches::ReadEntry(): cannot read key entry "); + return NOTOK; + } + + if(ber_file2value(fp, entry.data_size) < 1) return NOTOK; + + if(entry.data_size > 0) { + if(entry.data_size + entry.key_size > buffer_size) { + buffer_size += entry.data_size; + if(!(buffer = (unsigned char*)realloc(buffer, buffer_size))) return NOTOK; + } + + if(fread((void*)(buffer + entry.key_size), entry.data_size, 1, fp) != 1) { + perror("WordDBCaches::ReadEntry(): cannot read data entry "); + return NOTOK; + } + } + + entry.key = (char*)buffer; + entry.data = (char*)(buffer + entry.key_size); + + return OK; +} |