1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
|
//
// Prefix.cc
//
// Prefix: The prefix fuzzy algorithm. Performs a O(log n) search on for words
// matching the *prefix* specified--thus significantly faster than a full
// substring search.
//
// Part of the ht://Dig package <http://www.htdig.org/>
// Copyright (c) 1995-2004 The ht://Dig Group
// For copyright details, see the file COPYING in your distribution
// or the GNU Library General Public License (LGPL) version 2 or later
// <http://www.gnu.org/copyleft/lgpl.html>
//
// $Id: Prefix.cc,v 1.17 2004/05/28 13:15:20 lha Exp $
//
#ifdef HAVE_CONFIG_H
#include "htconfig.h"
#endif /* HAVE_CONFIG_H */
#include <fcntl.h>
#include "Prefix.h"
#include "htString.h"
#include "List.h"
#include "StringMatch.h"
#include "HtConfiguration.h"
//*****************************************************************************
// Prefix::Prefix(const HtConfiguration& config_arg)
//
Prefix::Prefix(const HtConfiguration& config_arg) :
Fuzzy(config_arg)
{
name = "prefix";
}
//*****************************************************************************
// Prefix::~Prefix()
//
Prefix::~Prefix()
{
}
//*****************************************************************************
//
// Prefix search
//
void
Prefix::getWords(char *w, List &words)
{
if (w == NULL || w[0] == '\0')
return;
String stripped = w;
HtStripPunctuation(stripped);
w = stripped.get();
const String prefix_suffix = config["prefix_match_character"];
int prefix_suffix_length = prefix_suffix.length();
int minimum_prefix_length = config.Value("minimum_prefix_length");
if (debug)
cerr << " word=" << w << " prefix_suffix=" << prefix_suffix
<< " prefix_suffix_length=" << prefix_suffix_length
<< " minimum_prefix_length=" << minimum_prefix_length << "\n";
if ((int)strlen(w) < minimum_prefix_length + prefix_suffix_length)
return;
// A null prefix character means that prefix matching should be
// applied to every search word; otherwise return if the word does
// not end in the prefix character(s).
//
if (prefix_suffix_length > 0
&& strcmp(prefix_suffix, w+strlen(w)-prefix_suffix_length))
return;
HtWordList wordDB(config);
if (wordDB.Open(config["word_db"], O_RDONLY) == NOTOK)
return;
int wordCount = 0;
int maximumWords = config.Value("max_prefix_matches", 1000);
String s;
int len = strlen(w) - prefix_suffix_length;
// Strip the prefix character(s)
char w2[8192];
strncpy(w2, w, sizeof(w2) - 1);
w2[sizeof(w2) - 1] = '\0';
w2[strlen(w2) - prefix_suffix_length] = '\0';
String w3(w2);
w3.lowercase();
List *wordList = wordDB.Prefix(w3.get());
WordReference *word_ref;
String last_word;
wordList->Start_Get();
while (wordCount < maximumWords && (word_ref = (WordReference *) wordList->Get_Next() ))
{
s = word_ref->Key().GetWord();
// If we're somehow past the original word, we're done
if (mystrncasecmp(s.get(), w, len))
break;
// If this is a duplicate word, ignore it
if (last_word.length() != 0 && last_word == s)
continue;
last_word = s;
words.Add(new String(s));
wordCount++;
}
if (wordList) {
wordList->Destroy();
delete wordList;
}
wordDB.Close();
}
//*****************************************************************************
int
Prefix::openIndex()
{
return 0;
}
//*****************************************************************************
void
Prefix::generateKey(char *, String &)
{
}
//*****************************************************************************
void
Prefix::addWord(char *)
{
}
|