blob: d3ef4b713422f17458c336e46f91f9ba3f8d4e0b (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
|
//
// Regexp.cc
//
// Regexp: A fuzzy to match input regex against the word database.
// Based on the substring fuzzy
//
// Part of the ht://Dig package <http://www.htdig.org/>
// Copyright (c) 1995-2004 The ht://Dig Group
// For copyright details, see the file COPYING in your distribution
// or the GNU Library General Public License (LGPL) version 2 or later
// <http://www.gnu.org/copyleft/lgpl.html>
//
// $Id: Regexp.cc,v 1.5 2004/05/28 13:15:20 lha Exp $
//
#ifdef HAVE_CONFIG_H
#include "htconfig.h"
#endif /* HAVE_CONFIG_H */
#include <fcntl.h>
#include "Regexp.h"
#include "htString.h"
#include "List.h"
#include "StringMatch.h"
#include "HtConfiguration.h"
//*****************************************************************************
// Regexp::Regexp(const HtConfiguration& config_arg)
//
Regexp::Regexp(const HtConfiguration& config_arg) :
Fuzzy(config_arg)
{
name = "regex";
}
//*****************************************************************************
// Regexp::~Regexp()
//
Regexp::~Regexp()
{
}
//*****************************************************************************
// A very simplistic and inefficient regex search. For every word
// that is looked for we do a complete linear search through the word
// database.
// Maybe a better method of doing this would be to mmap a list of words
// to memory and then run the regex on it. It would still be a
// linear search, but with much less overhead.
//
void
Regexp::getWords(char *pattern, List &words)
{
HtRegex regexMatch;
String stripped (pattern);
// First we have to strip the necessary punctuation
// Why?? lha
// stripped.remove("^.[]$()|*+?{},-\\");
// Anchor the string to be matched
regexMatch.set(String("^") + stripped);
HtWordList wordDB(config);
List *wordList;
String *key;
wordDB.Open(config["word_db"], O_RDONLY);
wordList = wordDB.Words();
int wordCount = 0;
int maximumWords = config.Value("regex_max_words", 25);
wordList->Start_Get();
while (wordCount < maximumWords && (key = (String *) wordList->Get_Next()))
{
if (regexMatch.match(*key, 0, 0) != 0)
{
words.Add(new String(*key));
wordCount++;
}
}
if (wordList) {
wordList->Destroy();
delete wordList;
}
wordDB.Close();
}
//*****************************************************************************
int
Regexp::openIndex()
{
return 0;
}
//*****************************************************************************
void
Regexp::generateKey(char *, String &)
{
}
//*****************************************************************************
void
Regexp::addWord(char *)
{
}
|