summaryrefslogtreecommitdiffstats
path: root/debian/htdig/htdig-3.2.0b6/htfuzzy/Regexp.cc
blob: d3ef4b713422f17458c336e46f91f9ba3f8d4e0b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
//
// Regexp.cc
//
// Regexp: A fuzzy to match input regex against the word database.
//        Based on the substring fuzzy
//
// Part of the ht://Dig package   <http://www.htdig.org/>
// Copyright (c) 1995-2004 The ht://Dig Group
// For copyright details, see the file COPYING in your distribution
// or the GNU Library General Public License (LGPL) version 2 or later 
// <http://www.gnu.org/copyleft/lgpl.html>
//
// $Id: Regexp.cc,v 1.5 2004/05/28 13:15:20 lha Exp $
//

#ifdef HAVE_CONFIG_H
#include "htconfig.h"
#endif /* HAVE_CONFIG_H */

#include <fcntl.h>

#include "Regexp.h"
#include "htString.h"
#include "List.h"
#include "StringMatch.h"
#include "HtConfiguration.h"

//*****************************************************************************
// Regexp::Regexp(const HtConfiguration& config_arg)
//
Regexp::Regexp(const HtConfiguration& config_arg) :
  Fuzzy(config_arg)
{
    name = "regex";
}


//*****************************************************************************
// Regexp::~Regexp()
//
Regexp::~Regexp()
{
}


//*****************************************************************************
// A very simplistic and inefficient regex search.  For every word
// that is looked for we do a complete linear search through the word
// database.
// Maybe a better method of doing this would be to mmap a list of words
// to memory and then run the regex on it.  It would still be a
// linear search, but with much less overhead.
//
void
Regexp::getWords(char *pattern, List &words)
{
    HtRegex	regexMatch;
    String	stripped (pattern);

    // First we have to strip the necessary punctuation
// Why??  lha
//    stripped.remove("^.[]$()|*+?{},-\\");

    // Anchor the string to be matched
    regexMatch.set(String("^") + stripped);

    HtWordList    wordDB(config);
    List        *wordList;
    String	*key;
    wordDB.Open(config["word_db"], O_RDONLY);
    wordList = wordDB.Words();

    int         wordCount = 0;
    int         maximumWords = config.Value("regex_max_words", 25);

    wordList->Start_Get();
    while (wordCount < maximumWords && (key = (String *) wordList->Get_Next()))
      {
        if (regexMatch.match(*key, 0, 0) != 0)
	  {
            words.Add(new String(*key));
            wordCount++;
	  }
      }
    if (wordList) {
      wordList->Destroy();
      delete wordList;
    }
    wordDB.Close();
}


//*****************************************************************************
int
Regexp::openIndex()
{
  return 0;
}


//*****************************************************************************
void
Regexp::generateKey(char *, String &)
{
}


//*****************************************************************************
void
Regexp::addWord(char *)
{
}