summaryrefslogtreecommitdiffstats
path: root/kspell2/plugins/ispell
diff options
context:
space:
mode:
authortoma <toma@283d02a7-25f6-0310-bc7c-ecb5cbfe19da>2009-11-25 17:56:58 +0000
committertoma <toma@283d02a7-25f6-0310-bc7c-ecb5cbfe19da>2009-11-25 17:56:58 +0000
commitce4a32fe52ef09d8f5ff1dd22c001110902b60a2 (patch)
tree5ac38a06f3dde268dc7927dc155896926aaf7012 /kspell2/plugins/ispell
downloadtdelibs-ce4a32fe52ef09d8f5ff1dd22c001110902b60a2.tar.gz
tdelibs-ce4a32fe52ef09d8f5ff1dd22c001110902b60a2.zip
Copy the KDE 3.5 branch to branches/trinity for new KDE 3.5 features.
BUG:215923 git-svn-id: svn://anonsvn.kde.org/home/kde/branches/trinity/kdelibs@1054174 283d02a7-25f6-0310-bc7c-ecb5cbfe19da
Diffstat (limited to 'kspell2/plugins/ispell')
-rw-r--r--kspell2/plugins/ispell/Makefile.am24
-rw-r--r--kspell2/plugins/ispell/correct.cpp949
-rw-r--r--kspell2/plugins/ispell/good.cpp431
-rw-r--r--kspell2/plugins/ispell/hash.cpp204
-rw-r--r--kspell2/plugins/ispell/ispell.h801
-rw-r--r--kspell2/plugins/ispell/ispell_checker.cpp505
-rw-r--r--kspell2/plugins/ispell/ispell_checker.h273
-rw-r--r--kspell2/plugins/ispell/ispell_def.h34
-rw-r--r--kspell2/plugins/ispell/kspell_ispell.desktop22
-rw-r--r--kspell2/plugins/ispell/kspell_ispellclient.cpp54
-rw-r--r--kspell2/plugins/ispell/kspell_ispellclient.h56
-rw-r--r--kspell2/plugins/ispell/kspell_ispelldict.cpp76
-rw-r--r--kspell2/plugins/ispell/kspell_ispelldict.h49
-rw-r--r--kspell2/plugins/ispell/lookup.cpp764
-rw-r--r--kspell2/plugins/ispell/makedent.cpp972
-rw-r--r--kspell2/plugins/ispell/msgs.h329
-rw-r--r--kspell2/plugins/ispell/sp_spell.h60
-rw-r--r--kspell2/plugins/ispell/tgood.cpp810
18 files changed, 6413 insertions, 0 deletions
diff --git a/kspell2/plugins/ispell/Makefile.am b/kspell2/plugins/ispell/Makefile.am
new file mode 100644
index 000000000..b74f48753
--- /dev/null
+++ b/kspell2/plugins/ispell/Makefile.am
@@ -0,0 +1,24 @@
+METASOURCES = AUTO
+
+AM_CPPFLAGS = -I$(top_srcdir)/kspell2 -I$(top_srcdir) $(all_includes)
+
+# For the future: examine if condensing the tons of *_LDFLAGS variables
+# into $(all_libraries) isn't better
+AM_LDFLAGS = $(LDFLAGS_AS_NEEDED) $(LDFLAGS_NEW_DTAGS)
+
+kde_module_LTLIBRARIES = kspell_ispell.la
+
+kspell_ispell_la_SOURCES = kspell_ispellclient.cpp kspell_ispelldict.cpp \
+ correct.cpp \
+ good.cpp \
+ hash.cpp \
+ lookup.cpp \
+ makedent.cpp \
+ tgood.cpp \
+ ispell_checker.cpp
+
+kspell_ispell_la_LDFLAGS = -module -no-undefined $(KDE_PLUGIN)
+kspell_ispell_la_LIBADD = ../../ui/libkspell2.la
+
+service_DATA = kspell_ispell.desktop
+servicedir = $(kde_servicesdir)
diff --git a/kspell2/plugins/ispell/correct.cpp b/kspell2/plugins/ispell/correct.cpp
new file mode 100644
index 000000000..65e98fa6d
--- /dev/null
+++ b/kspell2/plugins/ispell/correct.cpp
@@ -0,0 +1,949 @@
+/* vim: set sw=8: -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+/* enchant
+ * Copyright (C) 2003 Dom Lachowicz
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ * In addition, as a special exception, Dom Lachowicz
+ * gives permission to link the code of this program with
+ * non-LGPL Spelling Provider libraries (eg: a MSFT Office
+ * spell checker backend) and distribute linked combinations including
+ * the two. You must obey the GNU Lesser General Public License in all
+ * respects for all of the code used other than said providers. If you modify
+ * this file, you may extend this exception to your version of the
+ * file, but you are not obligated to do so. If you do not wish to
+ * do so, delete this exception statement from your version.
+ */
+
+/*
+ * correct.c - Routines to manage the higher-level aspects of spell-checking
+ *
+ * This code originally resided in ispell.c, but was moved here to keep
+ * file sizes smaller.
+ *
+ * Copyright (c), 1983, by Pace Willisson
+ *
+ * Copyright 1992, 1993, Geoff Kuenning, Granada Hills, CA
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All modifications to the source code must be clearly marked as
+ * such. Binary redistributions based on modified source code
+ * must be clearly marked as modified versions in the documentation
+ * and/or other materials provided with the distribution.
+ * 4. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgment:
+ * This product includes software developed by Geoff Kuenning and
+ * other unpaid contributors.
+ * 5. The name of Geoff Kuenning may not be used to endorse or promote
+ * products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY GEOFF KUENNING AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL GEOFF KUENNING OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * $Log$
+ * Revision 1.1 2004/01/31 16:44:12 zrusin
+ * ISpell plugin.
+ *
+ * Revision 1.4 2003/08/14 17:51:26 dom
+ * update license - exception clause should be Lesser GPL
+ *
+ * Revision 1.3 2003/07/28 20:40:25 dom
+ * fix up the license clause, further win32-registry proof some directory getting functions
+ *
+ * Revision 1.2 2003/07/16 22:52:35 dom
+ * LGPL + exception license
+ *
+ * Revision 1.1 2003/07/15 01:15:04 dom
+ * ispell enchant backend
+ *
+ * Revision 1.2 2003/01/29 05:50:11 hippietrail
+ *
+ * Fixed my mess in EncodingManager.
+ * Changed many C casts to C++ casts.
+ *
+ * Revision 1.1 2003/01/24 05:52:31 hippietrail
+ *
+ * Refactored ispell code. Old ispell global variables had been put into
+ * an allocated structure, a pointer to which was passed to many functions.
+ * I have now made all such functions and variables private members of the
+ * ISpellChecker class. It was C OO, now it's C++ OO.
+ *
+ * I've fixed the makefiles and tested compilation but am unable to test
+ * operation. Please back out my changes if they cause problems which
+ * are not obvious or easy to fix.
+ *
+ * Revision 1.7 2002/09/19 05:31:15 hippietrail
+ *
+ * More Ispell cleanup. Conditional globals and DEREF macros are removed.
+ * K&R function declarations removed, converted to Doxygen style comments
+ * where possible. No code has been changed (I hope). Compiles for me but
+ * unable to test.
+ *
+ * Revision 1.6 2002/09/17 03:03:28 hippietrail
+ *
+ * After seeking permission on the developer list I've reformatted all the
+ * spelling source which seemed to have parts which used 2, 3, 4, and 8
+ * spaces for tabs. It should all look good with our standard 4-space
+ * tabs now.
+ * I've concentrated just on indentation in the actual code. More prettying
+ * could be done.
+ * * NO code changes were made *
+ *
+ * Revision 1.5 2002/09/13 17:20:12 mpritchett
+ * Fix more warnings for Linux build
+ *
+ * Revision 1.4 2002/03/06 08:27:16 fjfranklin
+ * o Only activate compound handling when the hash file says so (Per Larsson)
+ *
+ * Revision 1.3 2001/05/14 09:52:50 hub
+ * Removed newMain.c from GNUmakefile.am
+ *
+ * C++ comments are not C comment. Changed to C comments
+ *
+ * Revision 1.2 2001/05/12 16:05:42 thomasf
+ * Big pseudo changes to ispell to make it pass around a structure rather
+ * than rely on all sorts of gloabals willy nilly here and there. Also
+ * fixed our spelling class to work with accepting suggestions once more.
+ * This code is dirty, gross and ugly (not to mention still not supporting
+ * multiple hash sized just yet) but it works on my machine and will no
+ * doubt break other machines.
+ *
+ * Revision 1.1 2001/04/15 16:01:24 tomas_f
+ * moving to spell/xp
+ *
+ * Revision 1.2 1999/10/05 16:17:28 paul
+ * Fixed build, and other tidyness.
+ * Spell dialog enabled by default, with keyboard binding of F7.
+ *
+ * Revision 1.1 1999/09/29 23:33:32 justin
+ * Updates to the underlying ispell-based code to support suggested corrections.
+ *
+ * Revision 1.59 1995/08/05 23:19:43 geoff
+ * Fix a bug that caused offsets for long lines to be confused if the
+ * line started with a quoting uparrow.
+ *
+ * Revision 1.58 1994/11/02 06:56:00 geoff
+ * Remove the anyword feature, which I've decided is a bad idea.
+ *
+ * Revision 1.57 1994/10/26 05:12:39 geoff
+ * Try boundary characters when inserting or substituting letters, except
+ * (naturally) at word boundaries.
+ *
+ * Revision 1.56 1994/10/25 05:46:30 geoff
+ * Fix an assignment inside a conditional that could generate spurious
+ * warnings (as well as being bad style). Add support for the FF_ANYWORD
+ * option.
+ *
+ * Revision 1.55 1994/09/16 04:48:24 geoff
+ * Don't pass newlines from the input to various other routines, and
+ * don't assume that those routines leave the input unchanged.
+ *
+ * Revision 1.54 1994/09/01 06:06:41 geoff
+ * Change erasechar/killchar to uerasechar/ukillchar to avoid
+ * shared-library problems on HP systems.
+ *
+ * Revision 1.53 1994/08/31 05:58:38 geoff
+ * Add code to handle extremely long lines in -a mode without splitting
+ * words or reporting incorrect offsets.
+ *
+ * Revision 1.52 1994/05/25 04:29:24 geoff
+ * Fix a bug that caused line widths to be calculated incorrectly when
+ * displaying lines containing tabs. Fix a couple of places where
+ * characters were sign-extended incorrectly, which could cause 8-bit
+ * characters to be displayed wrong.
+ *
+ * Revision 1.51 1994/05/17 06:44:05 geoff
+ * Add support for controlled compound formation and the COMPOUNDONLY
+ * option to affix flags.
+ *
+ * Revision 1.50 1994/04/27 05:20:14 geoff
+ * Allow compound words to be formed from more than two components
+ *
+ * Revision 1.49 1994/04/27 01:50:31 geoff
+ * Add support to correctly capitalize words generated as a result of a
+ * missing-space suggestion.
+ *
+ * Revision 1.48 1994/04/03 23:23:02 geoff
+ * Clean up the code in missingspace() to be a bit simpler and more
+ * efficient.
+ *
+ * Revision 1.47 1994/03/15 06:24:23 geoff
+ * Fix the +/-/~ commands to be independent. Allow the + command to
+ * receive a suffix which is a deformatter type (currently hardwired to
+ * be either tex or nroff/troff).
+ *
+ * Revision 1.46 1994/02/21 00:20:03 geoff
+ * Fix some bugs that could cause bad displays in the interaction between
+ * TeX parsing and string characters. Show_char now will not overrun
+ * the inverse-video display area by accident.
+ *
+ * Revision 1.45 1994/02/14 00:34:51 geoff
+ * Fix correct to accept length parameters for ctok and itok, so that it
+ * can pass them to the to/from ichar routines.
+ *
+ * Revision 1.44 1994/01/25 07:11:22 geoff
+ * Get rid of all old RCS log lines in preparation for the 3.1 release.
+ *
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include "ispell_checker.h"
+#include "msgs.h"
+
+/*
+extern void upcase P ((ichar_t * string));
+extern void lowcase P ((ichar_t * string));
+extern ichar_t * strtosichar P ((char * in, int canonical));
+
+int compoundflag = COMPOUND_CONTROLLED;
+*/
+
+/*
+ * \param a
+ * \param b
+ * \param canonical NZ for canonical string chars
+ *
+ * \return
+ */
+int
+ISpellChecker::casecmp (char *a, char *b, int canonical)
+{
+ register ichar_t * ap;
+ register ichar_t * bp;
+ ichar_t inta[INPUTWORDLEN + 4 * MAXAFFIXLEN + 4];
+ ichar_t intb[INPUTWORDLEN + 4 * MAXAFFIXLEN + 4];
+
+ strtoichar (inta, a, sizeof inta, canonical);
+ strtoichar (intb, b, sizeof intb, canonical);
+ for (ap = inta, bp = intb; *ap != 0; ap++, bp++)
+ {
+ if (*ap != *bp)
+ {
+ if (*bp == '\0')
+ return m_hashheader.sortorder[*ap];
+ else if (mylower (*ap))
+ {
+ if (mylower (*bp) || mytoupper (*ap) != *bp)
+ return static_cast<int>(m_hashheader.sortorder[*ap])
+ - static_cast<int>(m_hashheader.sortorder[*bp]);
+ }
+ else
+ {
+ if (myupper (*bp) || mytolower (*ap) != *bp)
+ return static_cast<int>(m_hashheader.sortorder[*ap])
+ - static_cast<int>(m_hashheader.sortorder[*bp]);
+ }
+ }
+ }
+ if (*bp != '\0')
+ return -static_cast<int>(m_hashheader.sortorder[*bp]);
+ for (ap = inta, bp = intb; *ap; ap++, bp++)
+ {
+ if (*ap != *bp)
+ {
+ return static_cast<int>(m_hashheader.sortorder[*ap])
+ - static_cast<int>(m_hashheader.sortorder[*bp]);
+ }
+ }
+ return 0;
+}
+
+/*
+ * \param word
+ */
+void
+ISpellChecker::makepossibilities (ichar_t *word)
+{
+ register int i;
+
+ for (i = 0; i < MAXPOSSIBLE; i++)
+ m_possibilities[i][0] = 0;
+ m_pcount = 0;
+ m_maxposslen = 0;
+ m_easypossibilities = 0;
+
+#ifndef NO_CAPITALIZATION_SUPPORT
+ wrongcapital (word);
+#endif
+
+/*
+ * according to Pollock and Zamora, CACM April 1984 (V. 27, No. 4),
+ * page 363, the correct order for this is:
+ * OMISSION = TRANSPOSITION > INSERTION > SUBSTITUTION
+ * thus, it was exactly backwards in the old version. -- PWP
+ */
+
+ if (m_pcount < MAXPOSSIBLE)
+ missingletter (word); /* omission */
+ if (m_pcount < MAXPOSSIBLE)
+ transposedletter (word); /* transposition */
+ if (m_pcount < MAXPOSSIBLE)
+ extraletter (word); /* insertion */
+ if (m_pcount < MAXPOSSIBLE)
+ wrongletter (word); /* substitution */
+
+ if ((m_hashheader.compoundflag != COMPOUND_ANYTIME) &&
+ m_pcount < MAXPOSSIBLE)
+ missingspace (word); /* two words */
+
+}
+
+/*
+ * \param word
+ *
+ * \return
+ */
+int
+ISpellChecker::insert (ichar_t *word)
+{
+ register int i;
+ register char * realword;
+
+ realword = ichartosstr (word, 0);
+ for (i = 0; i < m_pcount; i++)
+ {
+ if (strcmp (m_possibilities[i], realword) == 0)
+ return (0);
+ }
+
+ strcpy (m_possibilities[m_pcount++], realword);
+ i = strlen (realword);
+ if (i > m_maxposslen)
+ m_maxposslen = i;
+ if (m_pcount >= MAXPOSSIBLE)
+ return (-1);
+ else
+ return (0);
+}
+
+#ifndef NO_CAPITALIZATION_SUPPORT
+/*
+ * \param word
+ */
+void
+ISpellChecker::wrongcapital (ichar_t *word)
+{
+ ichar_t newword[INPUTWORDLEN + MAXAFFIXLEN];
+
+ /*
+ ** When the third parameter to "good" is nonzero, it ignores
+ ** case. If the word matches this way, "ins_cap" will recapitalize
+ ** it correctly.
+ */
+ if (good (word, 0, 1, 0, 0))
+ {
+ icharcpy (newword, word);
+ upcase (newword);
+ ins_cap (newword, word);
+ }
+}
+#endif
+
+/*
+ * \param word
+ */
+void
+ISpellChecker::wrongletter (ichar_t *word)
+{
+ register int i;
+ register int j;
+ register int n;
+ ichar_t savechar;
+ ichar_t newword[INPUTWORDLEN + MAXAFFIXLEN];
+
+ n = icharlen (word);
+ icharcpy (newword, word);
+#ifndef NO_CAPITALIZATION_SUPPORT
+ upcase (newword);
+#endif
+
+ for (i = 0; i < n; i++)
+ {
+ savechar = newword[i];
+ for (j=0; j < m_Trynum; ++j)
+ {
+ if (m_Try[j] == savechar)
+ continue;
+ else if (isboundarych (m_Try[j]) && (i == 0 || i == n - 1))
+ continue;
+ newword[i] = m_Try[j];
+ if (good (newword, 0, 1, 0, 0))
+ {
+ if (ins_cap (newword, word) < 0)
+ return;
+ }
+ }
+ newword[i] = savechar;
+ }
+}
+
+/*
+ * \param word
+ */
+void
+ISpellChecker::extraletter (ichar_t *word)
+{
+ ichar_t newword[INPUTWORDLEN + MAXAFFIXLEN];
+ register ichar_t * p;
+ register ichar_t * r;
+
+ if (icharlen (word) < 2)
+ return;
+
+ icharcpy (newword, word + 1);
+ for (p = word, r = newword; *p != 0; )
+ {
+ if (good (newword, 0, 1, 0, 0))
+ {
+ if (ins_cap (newword, word) < 0)
+ return;
+ }
+ *r++ = *p++;
+ }
+}
+
+/*
+ * \param word
+ */
+void
+ISpellChecker::missingletter (ichar_t *word)
+{
+ ichar_t newword[INPUTWORDLEN + MAXAFFIXLEN + 1];
+ register ichar_t * p;
+ register ichar_t * r;
+ register int i;
+
+ icharcpy (newword + 1, word);
+ for (p = word, r = newword; *p != 0; )
+ {
+ for (i = 0; i < m_Trynum; i++)
+ {
+ if (isboundarych (m_Try[i]) && r == newword)
+ continue;
+ *r = m_Try[i];
+ if (good (newword, 0, 1, 0, 0))
+ {
+ if (ins_cap (newword, word) < 0)
+ return;
+ }
+ }
+ *r++ = *p++;
+ }
+ for (i = 0; i < m_Trynum; i++)
+ {
+ if (isboundarych (m_Try[i]))
+ continue;
+ *r = m_Try[i];
+ if (good (newword, 0, 1, 0, 0))
+ {
+ if (ins_cap (newword, word) < 0)
+ return;
+ }
+ }
+}
+
+/*
+ * \param word
+ */
+void ISpellChecker::missingspace (ichar_t *word)
+{
+ ichar_t firsthalf[MAX_CAPS][INPUTWORDLEN + MAXAFFIXLEN];
+ int firstno; /* Index into first */
+ ichar_t * firstp; /* Ptr into current firsthalf word */
+ ichar_t newword[INPUTWORDLEN + MAXAFFIXLEN + 1];
+ int nfirsthalf; /* No. words saved in 1st half */
+ int nsecondhalf; /* No. words saved in 2nd half */
+ register ichar_t * p;
+ ichar_t secondhalf[MAX_CAPS][INPUTWORDLEN + MAXAFFIXLEN];
+ int secondno; /* Index into second */
+
+ /*
+ ** We don't do words of length less than 3; this keeps us from
+ ** splitting all two-letter words into two single letters. We
+ ** also don't do maximum-length words, since adding the space
+ ** would exceed the size of the "possibilities" array.
+ */
+ nfirsthalf = icharlen (word);
+ if (nfirsthalf < 3 || nfirsthalf >= INPUTWORDLEN + MAXAFFIXLEN - 1)
+ return;
+ icharcpy (newword + 1, word);
+ for (p = newword + 1; p[1] != '\0'; p++)
+ {
+ p[-1] = *p;
+ *p = '\0';
+ if (good (newword, 0, 1, 0, 0))
+ {
+ /*
+ * Save_cap must be called before good() is called on the
+ * second half, because it uses state left around by
+ * good(). This is unfortunate because it wastes a bit of
+ * time, but I don't think it's a significant performance
+ * problem.
+ */
+ nfirsthalf = save_cap (newword, word, firsthalf);
+ if (good (p + 1, 0, 1, 0, 0))
+ {
+ nsecondhalf = save_cap (p + 1, p + 1, secondhalf);
+ for (firstno = 0; firstno < nfirsthalf; firstno++)
+ {
+ firstp = &firsthalf[firstno][p - newword];
+ for (secondno = 0; secondno < nsecondhalf; secondno++)
+ {
+ *firstp = ' ';
+ icharcpy (firstp + 1, secondhalf[secondno]);
+ if (insert (firsthalf[firstno]) < 0)
+ return;
+ *firstp = '-';
+ if (insert (firsthalf[firstno]) < 0)
+ return;
+ }
+ }
+ }
+ }
+ }
+}
+
+/*
+ * \param word
+ * \param pfxopts Options to apply to prefixes
+ */
+int
+ISpellChecker::compoundgood (ichar_t *word, int pfxopts)
+{
+ ichar_t newword[INPUTWORDLEN + MAXAFFIXLEN];
+ register ichar_t * p;
+ register ichar_t savech;
+ long secondcap; /* Capitalization of 2nd half */
+
+ /*
+ ** If compoundflag is COMPOUND_NEVER, compound words are never ok.
+ */
+ if (m_hashheader.compoundflag == COMPOUND_NEVER)
+ return 0;
+ /*
+ ** Test for a possible compound word (for languages like German that
+ ** form lots of compounds).
+ **
+ ** This is similar to missingspace, except we quit on the first hit,
+ ** and we won't allow either member of the compound to be a single
+ ** letter.
+ **
+ ** We don't do words of length less than 2 * compoundmin, since
+ ** both halves must at least compoundmin letters.
+ */
+ if (icharlen (word) < 2 * m_hashheader.compoundmin)
+ return 0;
+ icharcpy (newword, word);
+ p = newword + m_hashheader.compoundmin;
+ for ( ; p[m_hashheader.compoundmin - 1] != 0; p++)
+ {
+ savech = *p;
+ *p = 0;
+ if (good (newword, 0, 0, pfxopts, FF_COMPOUNDONLY))
+ {
+ *p = savech;
+ if (good (p, 0, 1, FF_COMPOUNDONLY, 0)
+ || compoundgood (p, FF_COMPOUNDONLY))
+ {
+ secondcap = whatcap (p);
+ switch (whatcap (newword))
+ {
+ case ANYCASE:
+ case CAPITALIZED:
+ case FOLLOWCASE: /* Followcase can have l.c. suffix */
+ return secondcap == ANYCASE;
+ case ALLCAPS:
+ return secondcap == ALLCAPS;
+ }
+ }
+ }
+ else
+ *p = savech;
+ }
+ return 0;
+}
+
+/*
+ * \param word
+ */
+void
+ISpellChecker::transposedletter (ichar_t *word)
+{
+ ichar_t newword[INPUTWORDLEN + MAXAFFIXLEN];
+ register ichar_t * p;
+ register ichar_t temp;
+
+ icharcpy (newword, word);
+ for (p = newword; p[1] != 0; p++)
+ {
+ temp = *p;
+ *p = p[1];
+ p[1] = temp;
+ if (good (newword, 0, 1, 0, 0))
+ {
+ if (ins_cap (newword, word) < 0)
+ return;
+ }
+ temp = *p;
+ *p = p[1];
+ p[1] = temp;
+ }
+}
+
+/*!
+ * Insert one or more correctly capitalized versions of word
+ *
+ * \param word
+ * \param pattern
+ *
+ * \return
+ */
+int
+ISpellChecker::ins_cap (ichar_t *word, ichar_t *pattern)
+{
+ int i; /* Index into savearea */
+ int nsaved; /* No. of words saved */
+ ichar_t savearea[MAX_CAPS][INPUTWORDLEN + MAXAFFIXLEN];
+
+ nsaved = save_cap (word, pattern, savearea);
+ for (i = 0; i < nsaved; i++)
+ {
+ if (insert (savearea[i]) < 0)
+ return -1;
+ }
+ return 0;
+}
+
+/*!
+ * Save one or more correctly capitalized versions of word
+ *
+ * \param word Word to save
+ * \param pattern Prototype capitalization pattern
+ * \param savearea Room to save words
+ *
+ * \return
+ */
+int
+ISpellChecker::save_cap (ichar_t *word, ichar_t *pattern,
+ ichar_t savearea[MAX_CAPS][INPUTWORDLEN + MAXAFFIXLEN])
+{
+ int hitno; /* Index into hits array */
+ int nsaved; /* Number of words saved */
+ int preadd; /* No. chars added to front of root */
+ int prestrip; /* No. chars stripped from front */
+ int sufadd; /* No. chars added to back of root */
+ int sufstrip; /* No. chars stripped from back */
+
+ if (*word == 0)
+ return 0;
+
+ for (hitno = m_numhits, nsaved = 0; --hitno >= 0 && nsaved < MAX_CAPS; )
+ {
+ if (m_hits[hitno].prefix)
+ {
+ prestrip = m_hits[hitno].prefix->stripl;
+ preadd = m_hits[hitno].prefix->affl;
+ }
+ else
+ prestrip = preadd = 0;
+ if (m_hits[hitno].suffix)
+ {
+ sufstrip = m_hits[hitno].suffix->stripl;
+ sufadd = m_hits[hitno].suffix->affl;
+ }
+ else
+ sufadd = sufstrip = 0;
+ save_root_cap (word, pattern, prestrip, preadd,
+ sufstrip, sufadd,
+ m_hits[hitno].dictent, m_hits[hitno].prefix, m_hits[hitno].suffix,
+ savearea, &nsaved);
+ }
+ return nsaved;
+}
+
+/*
+ * \param word
+ * \param pattern
+ * \param prestrip
+ * \param preadd
+ * \param sufstrip
+ * \param sufadd
+ * \param firstdent
+ * \param pfxent
+ * \param sufent
+ *
+ * \return
+ */
+int
+ISpellChecker::ins_root_cap (ichar_t *word, ichar_t *pattern,
+ int prestrip, int preadd, int sufstrip, int sufadd,
+ struct dent *firstdent, struct flagent *pfxent, struct flagent *sufent)
+{
+ int i; /* Index into savearea */
+ ichar_t savearea[MAX_CAPS][INPUTWORDLEN + MAXAFFIXLEN];
+ int nsaved; /* Number of words saved */
+
+ nsaved = 0;
+ save_root_cap (word, pattern, prestrip, preadd, sufstrip, sufadd,
+ firstdent, pfxent, sufent, savearea, &nsaved);
+ for (i = 0; i < nsaved; i++)
+ {
+ if (insert (savearea[i]) < 0)
+ return -1;
+ }
+ return 0;
+}
+
+/* ARGSUSED */
+/*!
+ * \param word Word to be saved
+ * \param pattern Capitalization pattern
+ * \param prestrip No. chars stripped from front
+ * \param preadd No. chars added to front of root
+ * \param sufstrip No. chars stripped from back
+ * \param sufadd No. chars added to back of root
+ * \param firstdent First dent for root
+ * \param pfxent Pfx-flag entry for word
+ * \param sufent Sfx-flag entry for word
+ * \param savearea Room to save words
+ * \param nsaved Number saved so far (updated)
+ */
+void
+ISpellChecker::save_root_cap (ichar_t *word, ichar_t *pattern,
+ int prestrip, int preadd, int sufstrip, int sufadd,
+ struct dent *firstdent, struct flagent *pfxent, struct flagent *sufent,
+ ichar_t savearea[MAX_CAPS][INPUTWORDLEN + MAXAFFIXLEN],
+ int * nsaved)
+{
+#ifndef NO_CAPITALIZATION_SUPPORT
+ register struct dent * dent;
+#endif /* NO_CAPITALIZATION_SUPPORT */
+ int firstisupper;
+ ichar_t newword[INPUTWORDLEN + 4 * MAXAFFIXLEN + 4];
+#ifndef NO_CAPITALIZATION_SUPPORT
+ register ichar_t * p;
+ int len;
+ int i;
+ int limit;
+#endif /* NO_CAPITALIZATION_SUPPORT */
+
+ if (*nsaved >= MAX_CAPS)
+ return;
+ icharcpy (newword, word);
+ firstisupper = myupper (pattern[0]);
+#ifdef NO_CAPITALIZATION_SUPPORT
+ /*
+ ** Apply the old, simple-minded capitalization rules.
+ */
+ if (firstisupper)
+ {
+ if (myupper (pattern[1]))
+ upcase (newword);
+ else
+ {
+ lowcase (newword);
+ newword[0] = mytoupper (newword[0]);
+ }
+ }
+ else
+ lowcase (newword);
+ icharcpy (savearea[*nsaved], newword);
+ (*nsaved)++;
+ return;
+#else /* NO_CAPITALIZATION_SUPPORT */
+#define flagsareok(dent) \
+ ((pfxent == NULL \
+ || TSTMASKBIT (dent->mask, pfxent->flagbit)) \
+ && (sufent == NULL \
+ || TSTMASKBIT (dent->mask, sufent->flagbit)))
+
+ dent = firstdent;
+ if ((dent->flagfield & (CAPTYPEMASK | MOREVARIANTS)) == ALLCAPS)
+ {
+ upcase (newword); /* Uppercase required */
+ icharcpy (savearea[*nsaved], newword);
+ (*nsaved)++;
+ return;
+ }
+ for (p = pattern; *p; p++)
+ {
+ if (mylower (*p))
+ break;
+ }
+ if (*p == 0)
+ {
+ upcase (newword); /* Pattern was all caps */
+ icharcpy (savearea[*nsaved], newword);
+ (*nsaved)++;
+ return;
+ }
+ for (p = pattern + 1; *p; p++)
+ {
+ if (myupper (*p))
+ break;
+ }
+ if (*p == 0)
+ {
+ /*
+ ** The pattern was all-lower or capitalized. If that's
+ ** legal, insert only that version.
+ */
+ if (firstisupper)
+ {
+ if (captype (dent->flagfield) == CAPITALIZED
+ || captype (dent->flagfield) == ANYCASE)
+ {
+ lowcase (newword);
+ newword[0] = mytoupper (newword[0]);
+ icharcpy (savearea[*nsaved], newword);
+ (*nsaved)++;
+ return;
+ }
+ }
+ else
+ {
+ if (captype (dent->flagfield) == ANYCASE)
+ {
+ lowcase (newword);
+ icharcpy (savearea[*nsaved], newword);
+ (*nsaved)++;
+ return;
+ }
+ }
+ while (dent->flagfield & MOREVARIANTS)
+ {
+ dent = dent->next;
+ if (captype (dent->flagfield) == FOLLOWCASE
+ || !flagsareok (dent))
+ continue;
+ if (firstisupper)
+ {
+ if (captype (dent->flagfield) == CAPITALIZED)
+ {
+ lowcase (newword);
+ newword[0] = mytoupper (newword[0]);
+ icharcpy (savearea[*nsaved], newword);
+ (*nsaved)++;
+ return;
+ }
+ }
+ else
+ {
+ if (captype (dent->flagfield) == ANYCASE)
+ {
+ lowcase (newword);
+ icharcpy (savearea[*nsaved], newword);
+ (*nsaved)++;
+ return;
+ }
+ }
+ }
+ }
+ /*
+ ** Either the sample had complex capitalization, or the simple
+ ** capitalizations (all-lower or capitalized) are illegal.
+ ** Insert all legal capitalizations, including those that are
+ ** all-lower or capitalized. If the prototype is capitalized,
+ ** capitalized all-lower samples. Watch out for affixes.
+ */
+ dent = firstdent;
+ p = strtosichar (dent->word, 1);
+ len = icharlen (p);
+ if (dent->flagfield & MOREVARIANTS)
+ dent = dent->next; /* Skip place-holder entry */
+ for ( ; ; )
+ {
+ if (flagsareok (dent))
+ {
+ if (captype (dent->flagfield) != FOLLOWCASE)
+ {
+ lowcase (newword);
+ if (firstisupper || captype (dent->flagfield) == CAPITALIZED)
+ newword[0] = mytoupper (newword[0]);
+ icharcpy (savearea[*nsaved], newword);
+ (*nsaved)++;
+ if (*nsaved >= MAX_CAPS)
+ return;
+ }
+ else
+ {
+ /* Followcase is the tough one. */
+ p = strtosichar (dent->word, 1);
+ memmove (
+ reinterpret_cast<char *>(newword + preadd),
+ reinterpret_cast<char *>(p + prestrip),
+ (len - prestrip - sufstrip) * sizeof (ichar_t));
+ if (myupper (p[prestrip]))
+ {
+ for (i = 0; i < preadd; i++)
+ newword[i] = mytoupper (newword[i]);
+ }
+ else
+ {
+ for (i = 0; i < preadd; i++)
+ newword[i] = mytolower (newword[i]);
+ }
+ limit = len + preadd + sufadd - prestrip - sufstrip;
+ i = len + preadd - prestrip - sufstrip;
+ p += len - sufstrip - 1;
+ if (myupper (*p))
+ {
+ for (p = newword + i; i < limit; i++, p++)
+ *p = mytoupper (*p);
+ }
+ else
+ {
+ for (p = newword + i; i < limit; i++, p++)
+ *p = mytolower (*p);
+ }
+ icharcpy (savearea[*nsaved], newword);
+ (*nsaved)++;
+ if (*nsaved >= MAX_CAPS)
+ return;
+ }
+ }
+ if ((dent->flagfield & MOREVARIANTS) == 0)
+ break; /* End of the line */
+ dent = dent->next;
+ }
+ return;
+#endif /* NO_CAPITALIZATION_SUPPORT */
+}
+
+
diff --git a/kspell2/plugins/ispell/good.cpp b/kspell2/plugins/ispell/good.cpp
new file mode 100644
index 000000000..351106d99
--- /dev/null
+++ b/kspell2/plugins/ispell/good.cpp
@@ -0,0 +1,431 @@
+/* vim: set sw=8: -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+/* enchant
+ * Copyright (C) 2003 Dom Lachowicz
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ * In addition, as a special exception, Dom Lachowicz
+ * gives permission to link the code of this program with
+ * non-LGPL Spelling Provider libraries (eg: a MSFT Office
+ * spell checker backend) and distribute linked combinations including
+ * the two. You must obey the GNU Lesser General Public License in all
+ * respects for all of the code used other than said providers. If you modify
+ * this file, you may extend this exception to your version of the
+ * file, but you are not obligated to do so. If you do not wish to
+ * do so, delete this exception statement from your version.
+ */
+
+/*
+ * good.c - see if a word or its root word
+ * is in the dictionary.
+ *
+ * Pace Willisson, 1983
+ *
+ * Copyright 1992, 1993, Geoff Kuenning, Granada Hills, CA
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All modifications to the source code must be clearly marked as
+ * such. Binary redistributions based on modified source code
+ * must be clearly marked as modified versions in the documentation
+ * and/or other materials provided with the distribution.
+ * 4. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgment:
+ * This product includes software developed by Geoff Kuenning and
+ * other unpaid contributors.
+ * 5. The name of Geoff Kuenning may not be used to endorse or promote
+ * products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY GEOFF KUENNING AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL GEOFF KUENNING OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * $Log$
+ * Revision 1.1 2004/01/31 16:44:12 zrusin
+ * ISpell plugin.
+ *
+ * Revision 1.4 2003/08/14 17:51:26 dom
+ * update license - exception clause should be Lesser GPL
+ *
+ * Revision 1.3 2003/07/28 20:40:25 dom
+ * fix up the license clause, further win32-registry proof some directory getting functions
+ *
+ * Revision 1.2 2003/07/16 22:52:37 dom
+ * LGPL + exception license
+ *
+ * Revision 1.1 2003/07/15 01:15:04 dom
+ * ispell enchant backend
+ *
+ * Revision 1.2 2003/01/29 05:50:11 hippietrail
+ *
+ * Fixed my mess in EncodingManager.
+ * Changed many C casts to C++ casts.
+ *
+ * Revision 1.1 2003/01/24 05:52:32 hippietrail
+ *
+ * Refactored ispell code. Old ispell global variables had been put into
+ * an allocated structure, a pointer to which was passed to many functions.
+ * I have now made all such functions and variables private members of the
+ * ISpellChecker class. It was C OO, now it's C++ OO.
+ *
+ * I've fixed the makefiles and tested compilation but am unable to test
+ * operation. Please back out my changes if they cause problems which
+ * are not obvious or easy to fix.
+ *
+ * Revision 1.6 2003/01/06 18:48:38 dom
+ * ispell cleanup, start of using new 'add' save features
+ *
+ * Revision 1.5 2002/09/19 05:31:15 hippietrail
+ *
+ * More Ispell cleanup. Conditional globals and DEREF macros are removed.
+ * K&R function declarations removed, converted to Doxygen style comments
+ * where possible. No code has been changed (I hope). Compiles for me but
+ * unable to test.
+ *
+ * Revision 1.4 2002/09/17 03:03:29 hippietrail
+ *
+ * After seeking permission on the developer list I've reformatted all the
+ * spelling source which seemed to have parts which used 2, 3, 4, and 8
+ * spaces for tabs. It should all look good with our standard 4-space
+ * tabs now.
+ * I've concentrated just on indentation in the actual code. More prettying
+ * could be done.
+ * * NO code changes were made *
+ *
+ * Revision 1.3 2002/09/13 17:20:12 mpritchett
+ * Fix more warnings for Linux build
+ *
+ * Revision 1.2 2001/05/12 16:05:42 thomasf
+ * Big pseudo changes to ispell to make it pass around a structure rather
+ * than rely on all sorts of gloabals willy nilly here and there. Also
+ * fixed our spelling class to work with accepting suggestions once more.
+ * This code is dirty, gross and ugly (not to mention still not supporting
+ * multiple hash sized just yet) but it works on my machine and will no
+ * doubt break other machines.
+ *
+ * Revision 1.1 2001/04/15 16:01:24 tomas_f
+ * moving to spell/xp
+ *
+ * Revision 1.5 2000/02/09 22:35:25 sterwill
+ * Clean up some warnings
+ *
+ * Revision 1.4 1998/12/29 14:55:32 eric
+ *
+ * I've doctored the ispell code pretty extensively here. It is now
+ * warning-free on Win32. It also *works* on Win32 now, since I
+ * replaced all the I/O calls with ANSI standard ones.
+ *
+ * Revision 1.3 1998/12/28 23:11:30 eric
+ *
+ * modified spell code and integration to build on Windows.
+ * This is still a hack.
+ *
+ * Actually, it doesn't yet WORK on Windows. It just builds.
+ * SpellCheckInit is failing for some reason.
+ *
+ * Revision 1.2 1998/12/28 22:16:22 eric
+ *
+ * These changes begin to incorporate the spell checker into AbiWord. Most
+ * of this is a hack.
+ *
+ * 1. added other/spell to the -I list in config/abi_defs
+ * 2. replaced other/spell/Makefile with one which is more like
+ * our build system.
+ * 3. added other/spell to other/Makefile so that the build will now
+ * dive down and build the spell check library.
+ * 4. added the AbiSpell library to the Makefiles in wp/main
+ * 5. added a call to SpellCheckInit in wp/main/unix/UnixMain.cpp.
+ * This call is a HACK and should be replaced with something
+ * proper later.
+ * 6. added code to fv_View.cpp as follows:
+ * whenever you double-click on a word, the spell checker
+ * verifies that word and prints its status to stdout.
+ *
+ * Caveats:
+ * 1. This will break the Windows build. I'm going to work on fixing it
+ * now.
+ * 2. This only works if your dictionary is in /usr/lib/ispell/american.hash.
+ * The dictionary location is currently hard-coded. This will be
+ * fixed as well.
+ *
+ * Anyway, such as it is, it works.
+ *
+ * Revision 1.1 1998/12/28 18:04:43 davet
+ * Spell checker code stripped from ispell. At this point, there are
+ * two external routines... the Init routine, and a check-a-word routine
+ * which returns a boolean value, and takes a 16 bit char string.
+ * The code resembles the ispell code as much as possible still.
+ *
+ * Revision 1.43 1994/11/02 06:56:05 geoff
+ * Remove the anyword feature, which I've decided is a bad idea.
+ *
+ * Revision 1.42 1994/10/25 05:45:59 geoff
+ * Add support for an affix that will work with any word, even if there's
+ * no explicit flag.
+ *
+ * Revision 1.41 1994/05/24 06:23:06 geoff
+ * Let tgood decide capitalization questions, rather than doing it ourselves.
+ *
+ * Revision 1.40 1994/05/17 06:44:10 geoff
+ * Add support for controlled compound formation and the COMPOUNDONLY
+ * option to affix flags.
+ *
+ * Revision 1.39 1994/01/25 07:11:31 geoff
+ * Get rid of all old RCS log lines in preparation for the 3.1 release.
+ *
+ */
+
+#include <ctype.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "ispell_checker.h"
+
+
+int good P ((ichar_t * word, int ignoreflagbits, int allhits,
+ int pfxopts, int sfxopts));
+
+#ifndef NO_CAPITALIZATION_SUPPORT
+
+/*!
+** See if this particular capitalization (dent) is legal with these
+** particular affixes.
+**
+** \param dent
+** \param hit
+**
+** \return
+*/
+static int entryhasaffixes (struct dent *dent, struct success *hit)
+{
+ if (hit->prefix && !TSTMASKBIT (dent->mask, hit->prefix->flagbit))
+ return 0;
+ if (hit->suffix && !TSTMASKBIT (dent->mask, hit->suffix->flagbit))
+ return 0;
+ return 1; /* Yes, these affixes are legal */
+}
+
+/*
+ * \param word
+ * \param hit
+ * \param len
+ *
+ * \return
+ */
+int ISpellChecker::cap_ok (ichar_t *word, struct success *hit, int len)
+{
+ register ichar_t * dword;
+ register ichar_t * w;
+ register struct dent * dent;
+ ichar_t dentword[INPUTWORDLEN + MAXAFFIXLEN];
+ int preadd;
+ int prestrip;
+ int sufadd;
+ ichar_t * limit;
+ long thiscap;
+ long dentcap;
+
+ thiscap = whatcap (word);
+ /*
+ ** All caps is always legal, regardless of affixes.
+ */
+ preadd = prestrip = sufadd = 0;
+ if (thiscap == ALLCAPS)
+ return 1;
+ else if (thiscap == FOLLOWCASE)
+ {
+ /* Set up some constants for the while(1) loop below */
+ if (hit->prefix)
+ {
+ preadd = hit->prefix->affl;
+ prestrip = hit->prefix->stripl;
+ }
+ else
+ preadd = prestrip = 0;
+ sufadd = hit->suffix ? hit->suffix->affl : 0;
+ }
+ /*
+ ** Search the variants for one that matches what we have. Note
+ ** that thiscap can't be ALLCAPS, since we already returned
+ ** for that case.
+ */
+ dent = hit->dictent;
+ for ( ; ; )
+ {
+ dentcap = captype (dent->flagfield);
+ if (dentcap != thiscap)
+ {
+ if (dentcap == ANYCASE && thiscap == CAPITALIZED
+ && entryhasaffixes (dent, hit))
+ return 1;
+ }
+ else /* captypes match */
+ {
+ if (thiscap != FOLLOWCASE)
+ {
+ if (entryhasaffixes (dent, hit))
+ return 1;
+ }
+ else
+ {
+ /*
+ ** Make sure followcase matches exactly.
+ ** Life is made more difficult by the
+ ** possibility of affixes. Start with
+ ** the prefix.
+ */
+ strtoichar (dentword, dent->word, INPUTWORDLEN, 1);
+ dword = dentword;
+ limit = word + preadd;
+ if (myupper (dword[prestrip]))
+ {
+ for (w = word; w < limit; w++)
+ {
+ if (mylower (*w))
+ goto doublecontinue;
+ }
+ }
+ else
+ {
+ for (w = word; w < limit; w++)
+ {
+ if (myupper (*w))
+ goto doublecontinue;
+ }
+ }
+ dword += prestrip;
+ /* Do root part of word */
+ limit = dword + len - preadd - sufadd;
+ while (dword < limit)
+ {
+ if (*dword++ != *w++)
+ goto doublecontinue;
+ }
+ /* Do suffix */
+ dword = limit - 1;
+ if (myupper (*dword))
+ {
+ for ( ; *w; w++)
+ {
+ if (mylower (*w))
+ goto doublecontinue;
+ }
+ }
+ else
+ {
+ for ( ; *w; w++)
+ {
+ if (myupper (*w))
+ goto doublecontinue;
+ }
+ }
+ /*
+ ** All failure paths go to "doublecontinue,"
+ ** so if we get here it must match.
+ */
+ if (entryhasaffixes (dent, hit))
+ return 1;
+ doublecontinue: ;
+ }
+ }
+ if ((dent->flagfield & MOREVARIANTS) == 0)
+ break;
+ dent = dent->next;
+ }
+
+ /* No matches found */
+ return 0;
+}
+#endif
+
+#ifndef NO_CAPITALIZATION_SUPPORT
+/*!
+ * \param w Word to look up
+ * \param ignoreflagbits NZ to ignore affix flags in dict
+ * \param allhits NZ to ignore case, get every hit
+ * \param pfxopts Options to apply to prefixes
+ * \param sfxopts Options to apply to suffixes
+ *
+ * \return
+ */
+int ISpellChecker::good (ichar_t *w, int ignoreflagbits, int allhits, int pfxopts, int sfxopts)
+#else
+/* ARGSUSED */
+int ISpellChecker::good (ichar_t *w, int ignoreflagbits, int dummy, int pfxopts, int sfxopts)
+#endif
+{
+ ichar_t nword[INPUTWORDLEN + MAXAFFIXLEN];
+ register ichar_t * p;
+ register ichar_t * q;
+ register int n;
+ register struct dent * dp;
+
+ /*
+ ** Make an uppercase copy of the word we are checking.
+ */
+ for (p = w, q = nword; *p; )
+ *q++ = mytoupper (*p++);
+ *q = 0;
+ n = q - nword;
+
+ m_numhits = 0;
+
+ if ((dp = ispell_lookup (nword, 1)) != NULL)
+ {
+ m_hits[0].dictent = dp;
+ m_hits[0].prefix = NULL;
+ m_hits[0].suffix = NULL;
+#ifndef NO_CAPITALIZATION_SUPPORT
+ if (allhits || cap_ok (w, &m_hits[0], n))
+ m_numhits = 1;
+#else
+ m_numhits = 1;
+#endif
+ }
+
+ if (m_numhits && !allhits)
+ return 1;
+
+ /* try stripping off affixes */
+
+ chk_aff (w, nword, n, ignoreflagbits, allhits, pfxopts, sfxopts);
+
+ return m_numhits;
+}
+
+
+
+
diff --git a/kspell2/plugins/ispell/hash.cpp b/kspell2/plugins/ispell/hash.cpp
new file mode 100644
index 000000000..03bd880bb
--- /dev/null
+++ b/kspell2/plugins/ispell/hash.cpp
@@ -0,0 +1,204 @@
+/* vim: set sw=8: -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+/* enchant
+ * Copyright (C) 2003 Dom Lachowicz
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ * In addition, as a special exception, Dom Lachowicz
+ * gives permission to link the code of this program with
+ * non-LGPL Spelling Provider libraries (eg: a MSFT Office
+ * spell checker backend) and distribute linked combinations including
+ * the two. You must obey the GNU Lesser General Public License in all
+ * respects for all of the code used other than said providers. If you modify
+ * this file, you may extend this exception to your version of the
+ * file, but you are not obligated to do so. If you do not wish to
+ * do so, delete this exception statement from your version.
+ */
+
+/*
+ * hash.c - a simple hash function for ispell
+ *
+ * Pace Willisson, 1983
+ *
+ * Copyright 1992, 1993, Geoff Kuenning, Granada Hills, CA
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All modifications to the source code must be clearly marked as
+ * such. Binary redistributions based on modified source code
+ * must be clearly marked as modified versions in the documentation
+ * and/or other materials provided with the distribution.
+ * 4. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgment:
+ * This product includes software developed by Geoff Kuenning and
+ * other unpaid contributors.
+ * 5. The name of Geoff Kuenning may not be used to endorse or promote
+ * products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY GEOFF KUENNING AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL GEOFF KUENNING OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * $Log$
+ * Revision 1.1 2004/01/31 16:44:12 zrusin
+ * ISpell plugin.
+ *
+ * Revision 1.4 2003/08/14 17:51:27 dom
+ * update license - exception clause should be Lesser GPL
+ *
+ * Revision 1.3 2003/07/28 20:40:26 dom
+ * fix up the license clause, further win32-registry proof some directory getting functions
+ *
+ * Revision 1.2 2003/07/16 22:52:39 dom
+ * LGPL + exception license
+ *
+ * Revision 1.1 2003/07/15 01:15:05 dom
+ * ispell enchant backend
+ *
+ * Revision 1.2 2003/01/29 05:50:11 hippietrail
+ *
+ * Fixed my mess in EncodingManager.
+ * Changed many C casts to C++ casts.
+ *
+ * Revision 1.1 2003/01/24 05:52:33 hippietrail
+ *
+ * Refactored ispell code. Old ispell global variables had been put into
+ * an allocated structure, a pointer to which was passed to many functions.
+ * I have now made all such functions and variables private members of the
+ * ISpellChecker class. It was C OO, now it's C++ OO.
+ *
+ * I've fixed the makefiles and tested compilation but am unable to test
+ * operation. Please back out my changes if they cause problems which
+ * are not obvious or easy to fix.
+ *
+ * Revision 1.5 2002/09/19 05:31:15 hippietrail
+ *
+ * More Ispell cleanup. Conditional globals and DEREF macros are removed.
+ * K&R function declarations removed, converted to Doxygen style comments
+ * where possible. No code has been changed (I hope). Compiles for me but
+ * unable to test.
+ *
+ * Revision 1.4 2002/09/17 03:03:29 hippietrail
+ *
+ * After seeking permission on the developer list I've reformatted all the
+ * spelling source which seemed to have parts which used 2, 3, 4, and 8
+ * spaces for tabs. It should all look good with our standard 4-space
+ * tabs now.
+ * I've concentrated just on indentation in the actual code. More prettying
+ * could be done.
+ * * NO code changes were made *
+ *
+ * Revision 1.3 2002/09/13 17:20:13 mpritchett
+ * Fix more warnings for Linux build
+ *
+ * Revision 1.2 2001/05/12 16:05:42 thomasf
+ * Big pseudo changes to ispell to make it pass around a structure rather
+ * than rely on all sorts of gloabals willy nilly here and there. Also
+ * fixed our spelling class to work with accepting suggestions once more.
+ * This code is dirty, gross and ugly (not to mention still not supporting
+ * multiple hash sized just yet) but it works on my machine and will no
+ * doubt break other machines.
+ *
+ * Revision 1.1 2001/04/15 16:01:24 tomas_f
+ * moving to spell/xp
+ *
+ * Revision 1.3 1998/12/29 14:55:33 eric
+ *
+ * I've doctored the ispell code pretty extensively here. It is now
+ * warning-free on Win32. It also *works* on Win32 now, since I
+ * replaced all the I/O calls with ANSI standard ones.
+ *
+ * Revision 1.2 1998/12/28 23:11:30 eric
+ *
+ * modified spell code and integration to build on Windows.
+ * This is still a hack.
+ *
+ * Actually, it doesn't yet WORK on Windows. It just builds.
+ * SpellCheckInit is failing for some reason.
+ *
+ * Revision 1.1 1998/12/28 18:04:43 davet
+ * Spell checker code stripped from ispell. At this point, there are
+ * two external routines... the Init routine, and a check-a-word routine
+ * which returns a boolean value, and takes a 16 bit char string.
+ * The code resembles the ispell code as much as possible still.
+ *
+ * Revision 1.20 1994/01/25 07:11:34 geoff
+ * Get rid of all old RCS log lines in preparation for the 3.1 release.
+ *
+ */
+
+#include "ispell_checker.h"
+
+/*
+ * The following hash algorithm is due to Ian Dall, with slight modifications
+ * by Geoff Kuenning to reflect the results of testing with the English
+ * dictionaries actually distributed with ispell.
+ */
+#define HASHSHIFT 5
+
+#ifdef NO_CAPITALIZATION_SUPPORT
+#define HASHUPPER(c) c
+#else /* NO_CAPITALIZATION_SUPPORT */
+#define HASHUPPER(c) mytoupper(c)
+#endif /* NO_CAPITALIZATION_SUPPORT */
+
+/*
+ * \param s
+ * \param hashtblsize
+ */
+int ISpellChecker::hash (ichar_t *s, int hashtblsize)
+{
+ register long h = 0;
+ register int i;
+
+#ifdef ICHAR_IS_CHAR
+ for (i = 4; i-- && *s != 0; )
+ h = (h << 8) | HASHUPPER (*s++);
+#else /* ICHAR_IS_CHAR */
+ for (i = 2; i-- && *s != 0; )
+ h = (h << 16) | HASHUPPER (*s++);
+#endif /* ICHAR_IS_CHAR */
+ while (*s != 0)
+ {
+ /*
+ * We have to do circular shifts the hard way, since C doesn't
+ * have them even though the hardware probably does. Oh, well.
+ */
+ h = (h << HASHSHIFT)
+ | ((h >> (32 - HASHSHIFT)) & ((1 << HASHSHIFT) - 1));
+ h ^= HASHUPPER (*s++);
+ }
+ return static_cast<unsigned long>(h) % hashtblsize;
+}
diff --git a/kspell2/plugins/ispell/ispell.h b/kspell2/plugins/ispell/ispell.h
new file mode 100644
index 000000000..587defc07
--- /dev/null
+++ b/kspell2/plugins/ispell/ispell.h
@@ -0,0 +1,801 @@
+/* vim: set sw=8: -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+/* enchant
+ * Copyright (C) 2003 Dom Lachowicz
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ * In addition, as a special exception, Dom Lachowicz
+ * gives permission to link the code of this program with
+ * non-LGPL Spelling Provider libraries (eg: a MSFT Office
+ * spell checker backend) and distribute linked combinations including
+ * the two. You must obey the GNU Lesser General Public License in all
+ * respects for all of the code used other than said providers. If you modify
+ * this file, you may extend this exception to your version of the
+ * file, but you are not obligated to do so. If you do not wish to
+ * do so, delete this exception statement from your version.
+ */
+
+#ifndef ISPELL_H
+#define ISPELL_H
+
+#include <sys/types.h>
+
+/*
+ * $Id$
+ */
+
+/*
+ * Copyright 1992, 1993, Geoff Kuenning, Granada Hills, CA
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All modifications to the source code must be clearly marked as
+ * such. Binary redistributions based on modified source code
+ * must be clearly marked as modified versions in the documentation
+ * and/or other materials provided with the distribution.
+ * 4. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgment:
+ * This product includes software developed by Geoff Kuenning and
+ * other unpaid contributors.
+ * 5. The name of Geoff Kuenning may not be used to endorse or promote
+ * products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY GEOFF KUENNING AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL GEOFF KUENNING OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * $Log$
+ * Revision 1.1 2004/01/31 16:44:12 zrusin
+ * ISpell plugin.
+ *
+ * Revision 1.4 2003/08/14 17:51:27 dom
+ * update license - exception clause should be Lesser GPL
+ *
+ * Revision 1.3 2003/07/28 20:40:26 dom
+ * fix up the license clause, further win32-registry proof some directory getting functions
+ *
+ * Revision 1.2 2003/07/16 22:52:40 dom
+ * LGPL + exception license
+ *
+ * Revision 1.1 2003/07/15 01:15:06 dom
+ * ispell enchant backend
+ *
+ * Revision 1.10 2003/01/24 05:52:33 hippietrail
+ *
+ * Refactored ispell code. Old ispell global variables had been put into
+ * an allocated structure, a pointer to which was passed to many functions.
+ * I have now made all such functions and variables private members of the
+ * ISpellChecker class. It was C OO, now it's C++ OO.
+ *
+ * I've fixed the makefiles and tested compilation but am unable to test
+ * operation. Please back out my changes if they cause problems which
+ * are not obvious or easy to fix.
+ *
+ * Revision 1.9 2002/09/19 05:31:15 hippietrail
+ *
+ * More Ispell cleanup. Conditional globals and DEREF macros are removed.
+ * K&R function declarations removed, converted to Doxygen style comments
+ * where possible. No code has been changed (I hope). Compiles for me but
+ * unable to test.
+ *
+ * Revision 1.8 2002/09/17 03:03:29 hippietrail
+ *
+ * After seeking permission on the developer list I've reformatted all the
+ * spelling source which seemed to have parts which used 2, 3, 4, and 8
+ * spaces for tabs. It should all look good with our standard 4-space
+ * tabs now.
+ * I've concentrated just on indentation in the actual code. More prettying
+ * could be done.
+ * * NO code changes were made *
+ *
+ * Revision 1.7 2002/03/22 14:31:57 dom
+ * fix mg's compile problem
+ *
+ * Revision 1.6 2002/03/05 16:55:52 dom
+ * compound word support, tested against swedish
+ *
+ * Revision 1.5 2001/08/10 18:32:40 dom
+ * Spelling and iconv updates. god, i hate iconv
+ *
+ * Revision 1.4 2001/06/26 16:33:27 dom
+ * 128 StringChars and some other stuff
+ *
+ * Revision 1.3 2001/05/12 16:05:42 thomasf
+ * Big pseudo changes to ispell to make it pass around a structure rather
+ * than rely on all sorts of gloabals willy nilly here and there. Also
+ * fixed our spelling class to work with accepting suggestions once more.
+ * This code is dirty, gross and ugly (not to mention still not supporting
+ * multiple hash sized just yet) but it works on my machine and will no
+ * doubt break other machines.
+ *
+ * Revision 1.2 2001/04/18 00:59:36 thomasf
+ * Removed the duplicate declarations of variables that was causing build
+ * to bail. This new ispell stuff is a total mess.
+ *
+ * Revision 1.1 2001/04/15 16:01:24 tomas_f
+ * moving to spell/xp
+ *
+ * Revision 1.13 2001/04/13 12:33:12 tamlin
+ * ispell can now be used from C++
+ *
+ * Revision 1.12 2001/03/25 01:30:02 tomb
+ * 1. Fixed ispell #define problems on Win32
+ * 2. Changed the way that togglable toolbars are tracked so that Full
+ * Screen mode works right on Windows
+ * 3. Fixed SET_GATHER macro in ap_Win32Dialog_Options.h
+ * 4. Fixed Toggle Case dialog to default to Sentence Case when loaded
+ * 5. Added #define for Auto Save checkbox (though I haven't updated the
+ * Prefs dialog yet)
+ *
+ * Revision 1.11 2001/03/24 23:28:41 dom
+ * Make C++ aware and watch out for VOID on Win32
+ *
+ * Revision 1.10 1999/12/21 18:46:29 sterwill
+ * ispell patch for non-English dictionaries by Henrik Berg <henrik@lansen.se>
+ *
+ * Revision 1.9 1999/10/20 03:19:35 paul
+ * Hacked ispell code to ignore any characters that don't fit in the lookup tables loaded from the dictionary. It ain't pretty, but at least we don't crash there any more.
+ *
+ * Revision 1.8 1999/09/29 23:33:32 justin
+ * Updates to the underlying ispell-based code to support suggested corrections.
+ *
+ * Revision 1.7 1999/04/13 17:12:51 jeff
+ * Applied "Darren O. Benham" <gecko@benham.net> spell check changes.
+ * Fixed crash on Win32 with the new code.
+ *
+ * Revision 1.6 1999/01/07 05:14:22 sterwill
+ * So it builds on Unix... it might break win32 in ispell, since ut_types
+ * is no longer included. This is a temporary solution to a larger problem
+ * of including C++ headers in C source files.
+ *
+ * Revision 1.6 1999/01/07 05:14:22 sterwill
+ * So it builds on Unix... it might break win32 in ispell, since ut_types
+ * is no longer included. This is a temporary solution to a larger problem
+ * of including C++ headers in C source files.
+ *
+ * Revision 1.5 1999/01/07 05:02:25 sterwill
+ * Checking in half-broken to avoid tree lossage
+ *
+ * Revision 1.4 1999/01/07 01:07:48 paul
+ * Fixed spell leaks.
+ *
+ * Revision 1.3 1998/12/29 15:03:54 eric
+ *
+ * minor fix to ispell.h to get things to compile on Linux again.
+ *
+ * Revision 1.2 1998/12/29 14:55:33 eric
+ *
+ * I've doctored the ispell code pretty extensively here. It is now
+ * warning-free on Win32. It also *works* on Win32 now, since I
+ * replaced all the I/O calls with ANSI standard ones.
+ *
+ * Revision 1.1 1998/12/28 18:04:43 davet
+ * Spell checker code stripped from ispell. At this point, there are
+ * two external routines... the Init routine, and a check-a-word routine
+ * which returns a boolean value, and takes a 16 bit char string.
+ * The code resembles the ispell code as much as possible still.
+ *
+ * Revision 1.68 1995/03/06 02:42:41 geoff
+ * Be vastly more paranoid about parenthesizing macro arguments. This
+ * fixes a bug in defmt.c where a complex argument was passed to
+ * isstringch.
+ *
+ * Revision 1.67 1995/01/03 19:24:12 geoff
+ * Get rid of a non-global declaration.
+ *
+ * Revision 1.66 1994/12/27 23:08:49 geoff
+ * Fix a lot of subtly bad assumptions about the widths of ints and longs
+ * which only show up on 64-bit machines like the Cray and the DEC Alpha.
+ *
+ * Revision 1.65 1994/11/02 06:56:10 geoff
+ * Remove the anyword feature, which I've decided is a bad idea.
+ *
+ * Revision 1.64 1994/10/25 05:46:18 geoff
+ * Add the FF_ANYWORD flag for defining an affix that will apply to any
+ * word, even if not explicitly specified. (Good for French.)
+ *
+ * Revision 1.63 1994/09/16 04:48:28 geoff
+ * Make stringdups and laststringch unsigned ints, and dupnos a plain
+ * int, so that we can handle more than 128 stringchars and stringchar
+ * types.
+ *
+ * Revision 1.62 1994/09/01 06:06:39 geoff
+ * Change erasechar/killchar to uerasechar/ukillchar to avoid
+ * shared-library problems on HP systems.
+ *
+ * Revision 1.61 1994/08/31 05:58:35 geoff
+ * Add contextoffset, used in -a mode to handle extremely long lines.
+ *
+ * Revision 1.60 1994/05/17 06:44:15 geoff
+ * Add support for controlled compound formation and the COMPOUNDONLY
+ * option to affix flags.
+ *
+ * Revision 1.59 1994/03/15 06:25:16 geoff
+ * Change deftflag's initialization so we can tell if -t/-n appeared.
+ *
+ * Revision 1.58 1994/02/07 05:53:28 geoff
+ * Add typecasts to the the 7-bit versions of ichar* routines
+ *
+ * Revision 1.57 1994/01/25 07:11:48 geoff
+ * Get rid of all old RCS log lines in preparation for the 3.1 release.
+ *
+ */
+
+#include <stdio.h>
+/* #include "ut_types.h" */
+
+#include "ispell_def.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* c++ */
+
+/* largest amount that a word might be extended by adding affixes */
+#ifndef MAXAFFIXLEN
+#define MAXAFFIXLEN 20
+#endif
+
+/*
+** Number of mask bits (affix flags) supported. Must be 32, 64, 128, or
+** 256. If MASKBITS is 32 or 64, there are really only 26 or 58 flags
+** available, respectively. If it is 32, the flags are named with the
+** 26 English uppercase letters; lowercase will be converted to uppercase.
+** If MASKBITS is 64, the 58 flags are named 'A' through 'z' in ASCII
+** order, including the 6 special characters from 'Z' to 'a': "[\]^_`".
+** If MASKBITS is 128 or 256, all the 7-bit or 8-bit characters,
+** respectively, are theoretically available, though a few (newline, slash,
+** null byte) are pretty hard to actually use successfully.
+**
+** Note that a number of non-English affix files depend on having a
+** larger value for MASKBITS. See the affix files for more
+** information.
+*/
+
+#ifndef MASKBITS
+#define MASKBITS 64
+#endif
+
+extern int gnMaskBits;
+
+/*
+** C type to use for masks. This should be a type that the processor
+** accesses efficiently.
+**
+** MASKTYPE_WIDTH must correctly reflect the number of bits in a
+** MASKTYPE. Unfortunately, it is also required to be a constant at
+** preprocessor time, which means you can't use the sizeof operator to
+** define it.
+**
+** Note that MASKTYPE *must* match MASKTYPE_WIDTH or you may get
+** division-by-zero errors!
+*/
+#ifndef MASKTYPE
+#define MASKTYPE long
+#endif
+#ifndef MASKTYPE_WIDTH
+#define MASKTYPE_WIDTH 32
+#endif
+
+ /* program: this should be coded now in init */
+
+#if MASKBITS < MASKTYPE_WIDTH
+#undef MASKBITS
+#define MASKBITS MASKTYPE_WIDTH
+#endif /* MASKBITS < MASKTYPE_WIDTH */
+
+/*
+** Maximum hash table fullness percentage. Larger numbers trade space
+** for time.
+**/
+#ifndef MAXPCT
+#define MAXPCT 70 /* Expand table when 70% full */
+#endif
+
+/*
+** Maximum number of "string" characters that can be defined in a
+** language (affix) file. Don't forget that an upper/lower string
+** character counts as two!
+*/
+#ifndef MAXSTRINGCHARS
+#define MAXSTRINGCHARS 128
+#endif /* MAXSTRINGCHARS */
+
+/*
+** Maximum length of a "string" character. The default is appropriate for
+** nroff-style characters starting with a backslash.
+*/
+#ifndef MAXSTRINGCHARLEN
+#define MAXSTRINGCHARLEN 10
+#endif /* MAXSTRINGCHARLEN */
+
+/*
+** Maximum number of "hits" expected on a word. This is basically the
+** number of different ways different affixes can produce the same word.
+** For example, with "english.aff", "brothers" can be produced 3 ways:
+** "brothers," "brother+s", or "broth+ers". If this is too low, no major
+** harm will be done, but ispell may occasionally forget a capitalization.
+*/
+#ifndef MAX_HITS
+#define MAX_HITS 10
+#endif
+
+/*
+** Maximum number of capitalization variations expected in any word.
+** Besides the obvious all-lower, all-upper, and capitalized versions,
+** this includes followcase variants. If this is too low, no real
+** harm will be done, but ispell may occasionally fail to suggest a
+** correct capitalization.
+*/
+#ifndef MAX_CAPS
+#define MAX_CAPS 10
+#endif /* MAX_CAPS */
+
+/* buffer size to use for file names if not in sys/param.h */
+#ifndef MAXPATHLEN
+#define MAXPATHLEN 512
+#endif
+
+/*
+** Maximum language-table search size. Smaller numbers make ispell
+** run faster, at the expense of more memory (the lowest reasonable value
+** is 2). If a given character appears in a significant position in
+** more than MAXSEARCH suffixes, it will be given its own index table.
+** If you change this, define INDEXDUMP in lookup.c to be sure your
+** index table looks reasonable.
+*/
+#ifndef MAXSEARCH
+#define MAXSEARCH 4
+#endif
+
+#if defined(__STDC__) || defined(__cplusplus)
+#define P(x) x
+ #ifndef VOID
+ #define VOID void
+ #endif
+#else /* __STDC__ */
+#define P(x) ()
+ #ifndef VOID
+ #define VOID char
+ #endif
+#define const
+#endif /* __STDC__ */
+
+#ifdef NO8BIT
+#define SET_SIZE 128
+#else
+#define SET_SIZE 256
+#endif
+
+#define MASKSIZE (gnMaskBits / MASKTYPE_WIDTH)
+
+#ifdef lint
+extern int TSTMASKBIT P ((MASKTYPE * mask, int bit));
+#else /* lint */
+/* The following is really testing for MASKSIZE <= 1, but cpp can't do that */
+#define TSTMASKBIT(mask, bit) \
+ ((mask)[(bit) / MASKTYPE_WIDTH] & \
+ ((MASKTYPE) 1 << ((bit) & (MASKTYPE_WIDTH - 1))))
+#endif /* lint */
+
+#if MASKBITS > 64
+#define FULLMASKSET
+#endif
+
+#if MASKBITS <= 32
+ #define FLAGBASE ((MASKTYPE_WIDTH) - 6)
+#else
+ # if MASKBITS <= 64
+ #define FLAGBASE ((MASKTYPE_WIDTH) - 6)
+ # else
+ #define FLAGBASE 0
+ # endif
+#endif
+
+/*
+** Data type for internal word storage. If necessary, we use shorts rather
+** than chars so that string characters can be encoded as a single unit.
+*/
+#if (SET_SIZE + MAXSTRINGCHARS) <= 256
+#ifndef lint
+#define ICHAR_IS_CHAR
+#endif /* lint */
+#endif
+
+#ifdef ICHAR_IS_CHAR
+typedef unsigned char ichar_t; /* Internal character */
+#define icharlen(s) strlen ((char *) (s))
+#define icharcpy(a, b) strcpy ((char *) (a), (char *) (b))
+#define icharcmp(a, b) strcmp ((char *) (a), (char *) (b))
+#define icharncmp(a, b, n) strncmp ((char *) (a), (char *) (b), (n))
+#define chartoichar(x) ((ichar_t) (x))
+#else
+typedef unsigned short ichar_t; /* Internal character */
+#define chartoichar(x) ((ichar_t) (unsigned char) (x))
+
+/*
+ * Structure used to record data about successful lookups; these values
+ * are used in the ins_root_cap routine to produce correct capitalizations.
+ */
+struct success
+{
+ struct dent * dictent; /* Header of dict entry chain for wd */
+ struct flagent * prefix; /* Prefix flag used, or NULL */
+ struct flagent * suffix; /* Suffix flag used, or NULL */
+};
+
+ichar_t* icharcpy (ichar_t* out, ichar_t* in);
+int icharlen (ichar_t* in);
+int icharcmp (ichar_t* s1, ichar_t* s2);
+int icharncmp (ichar_t* s1, ichar_t* s2, int n);
+
+#endif
+
+struct dent
+{
+ struct dent * next;
+ char * word;
+ MASKTYPE mask[2];
+#ifdef FULLMASKSET
+ char flags;
+#endif
+};
+
+/*
+** Flags in the directory entry. If FULLMASKSET is undefined, these are
+** stored in the highest bits of the last longword of the mask field. If
+** FULLMASKSET is defined, they are stored in the extra "flags" field.
+#ifndef NO_CAPITALIZATION_SUPPORT
+**
+** If a word has only one capitalization form, and that form is not
+** FOLLOWCASE, it will have exactly one entry in the dictionary. The
+** legal capitalizations will be indicated by the 2-bit capitalization
+** field, as follows:
+**
+** ALLCAPS The word must appear in all capitals.
+** CAPITALIZED The word must be capitalized (e.g., London).
+** It will also be accepted in all capitals.
+** ANYCASE The word may appear in lowercase, capitalized,
+** or all-capitals.
+**
+** Regardless of the capitalization flags, the "word" field of the entry
+** will point to an all-uppercase copy of the word. This is to simplify
+** the large portion of the code that doesn't care about capitalization.
+** Ispell will generate the correct version when needed.
+**
+** If a word has more than one capitalization, there will be multiple
+** entries for it, linked together by the "next" field. The initial
+** entry for such words will be a dummy entry, primarily for use by code
+** that ignores capitalization. The "word" field of this entry will
+** again point to an all-uppercase copy of the word. The "mask" field
+** will contain the logical OR of the mask fields of all variants.
+** A header entry is indicated by a capitalization type of ALLCAPS,
+** with the MOREVARIANTS bit set.
+**
+** The following entries will define the individual variants. Each
+** entry except the last has the MOREVARIANTS flag set, and each
+** contains one of the following capitalization options:
+**
+** ALLCAPS The word must appear in all capitals.
+** CAPITALIZED The word must be capitalized (e.g., London).
+** It will also be accepted in all capitals.
+** FOLLOWCASE The word must be capitalized exactly like the
+** sample in the entry. Prefix (suffix) characters
+** must be rendered in the case of the first (last)
+** "alphabetic" character. It will also be accepted
+** in all capitals. ("Alphabetic" means "mentioned
+** in a 'casechars' statement".)
+** ANYCASE The word may appear in lowercase, capitalized,
+** or all-capitals.
+**
+** The "mask" field for the entry contains only the affix flag bits that
+** are legal for that capitalization. The "word" field will be null
+** except for FOLLOWCASE entries, where it will point to the
+** correctly-capitalized spelling of the root word.
+**
+** It is worth discussing why the ALLCAPS option is used in
+** the header entry. The header entry accepts an all-capitals
+** version of the root plus every affix (this is always legal, since
+** words get capitalized in headers and so forth). Further, all of
+** the following variant entries will reject any all-capitals form
+** that is illegal due to an affix.
+**
+** Finally, note that variations in the KEEP flag can cause a multiple-variant
+** entry as well. For example, if the personal dictionary contains "ALPHA",
+** (KEEP flag set) and the user adds "alpha" with the KEEP flag clear, a
+** multiple-variant entry will be created so that "alpha" will be accepted
+** but only "ALPHA" will actually be kept.
+#endif
+*/
+#ifdef FULLMASKSET
+#define flagfield flags
+#else
+#define flagfield mask[1]
+#endif
+#define USED ((MASKTYPE) 1 << (FLAGBASE + 0))
+#define KEEP ((MASKTYPE) 1 << (FLAGBASE + 1))
+#ifdef NO_CAPITALIZATION_SUPPORT
+#define ALLFLAGS (USED | KEEP)
+#else /* NO_CAPITALIZATION_SUPPORT */
+#define ANYCASE ((MASKTYPE) 0 << (FLAGBASE + 2))
+#define ALLCAPS ((MASKTYPE) 1 << (FLAGBASE + 2))
+#define CAPITALIZED ((MASKTYPE) 2 << (FLAGBASE + 2))
+#define FOLLOWCASE ((MASKTYPE) 3 << (FLAGBASE + 2))
+#define CAPTYPEMASK ((MASKTYPE) 3 << (FLAGBASE + 2))
+#define MOREVARIANTS ((MASKTYPE) 1 << (FLAGBASE + 4))
+#define ALLFLAGS (USED | KEEP | CAPTYPEMASK | MOREVARIANTS)
+#define captype(x) ((x) & CAPTYPEMASK)
+#endif /* NO_CAPITALIZATION_SUPPORT */
+
+/*
+ * Language tables used to encode prefix and suffix information.
+ */
+struct flagent
+{
+ ichar_t * strip; /* String to strip off */
+ ichar_t * affix; /* Affix to append */
+ short flagbit; /* Flag bit this ent matches */
+ short stripl; /* Length of strip */
+ short affl; /* Length of affix */
+ short numconds; /* Number of char conditions */
+ short flagflags; /* Modifiers on this flag */
+ char conds[SET_SIZE + MAXSTRINGCHARS]; /* Adj. char conds */
+};
+
+/*
+ * Bits in flagflags
+ */
+#define FF_CROSSPRODUCT (1 << 0) /* Affix does cross-products */
+#define FF_COMPOUNDONLY (1 << 1) /* Afx works in compounds */
+
+union ptr_union /* Aid for building flg ptrs */
+{
+ struct flagptr * fp; /* Pointer to more indexing */
+ struct flagent * ent; /* First of a list of ents */
+};
+
+struct flagptr
+{
+ union ptr_union pu; /* Ent list or more indexes */
+ int numents; /* If zero, pu.fp is valid */
+};
+
+/*
+ * Description of a single string character type.
+ */
+struct strchartype
+{
+ char * name; /* Name of the type */
+ char * deformatter; /* Deformatter to use */
+ char * suffixes; /* File suffixes, null seps */
+};
+
+/*
+ * Header placed at the beginning of the hash file.
+ */
+struct hashheader
+{
+ unsigned short magic; /* Magic number for ID */
+ unsigned short compileoptions; /* How we were compiled */
+ short maxstringchars; /* Max # strchrs we support */
+ short maxstringcharlen; /* Max strchr len supported */
+ short compoundmin; /* Min lth of compound parts */
+ short compoundbit; /* Flag 4 compounding roots */
+ int stringsize; /* Size of string table */
+ int lstringsize; /* Size of lang. str tbl */
+ int tblsize; /* No. entries in hash tbl */
+ int stblsize; /* No. entries in sfx tbl */
+ int ptblsize; /* No. entries in pfx tbl */
+ int sortval; /* Largest sort ID assigned */
+ int nstrchars; /* No. strchars defined */
+ int nstrchartype; /* No. strchar types */
+ int strtypestart; /* Start of strtype table */
+ char nrchars[5]; /* Nroff special characters */
+ char texchars[13]; /* TeX special characters */
+ char compoundflag; /* Compund-word handling */
+ char defhardflag; /* Default tryveryhard flag */
+ char flagmarker; /* "Start-of-flags" char */
+ unsigned short sortorder[SET_SIZE + MAXSTRINGCHARS]; /* Sort ordering */
+ ichar_t lowerconv[SET_SIZE + MAXSTRINGCHARS]; /* Lower-conversion table */
+ ichar_t upperconv[SET_SIZE + MAXSTRINGCHARS]; /* Upper-conversion table */
+ char wordchars[SET_SIZE + MAXSTRINGCHARS]; /* NZ for chars found in wrds */
+ char upperchars[SET_SIZE + MAXSTRINGCHARS]; /* NZ for uppercase chars */
+ char lowerchars[SET_SIZE + MAXSTRINGCHARS]; /* NZ for lowercase chars */
+ char boundarychars[SET_SIZE + MAXSTRINGCHARS]; /* NZ for boundary chars */
+ char stringstarts[SET_SIZE]; /* NZ if char can start str */
+ char stringchars[MAXSTRINGCHARS][MAXSTRINGCHARLEN + 1]; /* String chars */
+ unsigned int stringdups[MAXSTRINGCHARS]; /* No. of "base" char */
+ int dupnos[MAXSTRINGCHARS]; /* Dup char ID # */
+ unsigned short magic2; /* Second magic for dbl chk */
+};
+
+/* hash table magic number */
+#define MAGIC 0x9602
+
+/* compile options, put in the hash header for consistency checking */
+#ifdef NO8BIT
+# define MAGIC8BIT 0x01
+#else
+# define MAGIC8BIT 0x00
+#endif
+#ifdef NO_CAPITALIZATION_SUPPORT
+# define MAGICCAPITALIZATION 0x00
+#else
+# define MAGICCAPITALIZATION 0x02
+#endif
+# define MAGICMASKSET 0x04
+
+#if MASKBITS <= 32
+# define MAGICMASKSET 0x00
+#else
+# if MASKBITS <= 64
+# else
+# if MASKBITS <= 128
+# define MAGICMASKSET 0x08
+# else
+# define MAGICMASKSET 0x0C
+# endif
+# endif
+#endif
+
+#define COMPILEOPTIONS (MAGIC8BIT | MAGICCAPITALIZATION | MAGICMASKSET)
+
+/*
+** Offsets into the nroff special-character array
+*/
+#define NRLEFTPAREN hashheader.nrchars[0]
+#define NRRIGHTPAREN hashheader.nrchars[1]
+#define NRDOT hashheader.nrchars[2]
+#define NRBACKSLASH hashheader.nrchars[3]
+#define NRSTAR hashheader.nrchars[4]
+
+/*
+** Offsets into the TeX special-character array
+*/
+#define TEXLEFTPAREN hashheader.texchars[0]
+#define TEXRIGHTPAREN hashheader.texchars[1]
+#define TEXLEFTSQUARE hashheader.texchars[2]
+#define TEXRIGHTSQUARE hashheader.texchars[3]
+#define TEXLEFTCURLY hashheader.texchars[4]
+#define TEXRIGHTCURLY hashheader.texchars[5]
+#define TEXLEFTANGLE hashheader.texchars[6]
+#define TEXRIGHTANGLE hashheader.texchars[7]
+#define TEXBACKSLASH hashheader.texchars[8]
+#define TEXDOLLAR hashheader.texchars[9]
+#define TEXSTAR hashheader.texchars[10]
+#define TEXDOT hashheader.texchars[11]
+#define TEXPERCENT hashheader.texchars[12]
+
+/*
+** Values for compoundflag
+*/
+#define COMPOUND_NEVER 0 /* Compound words are never good */
+#define COMPOUND_ANYTIME 1 /* Accept run-together words */
+#define COMPOUND_CONTROLLED 2 /* Compounds controlled by afx flags */
+/*
+** These macros are similar to the ones above, but they take into account
+** the possibility of string characters. Note well that they take a POINTER,
+** not a character.
+**
+** The "l_" versions set "len" to the length of the string character as a
+** handy side effect. (Note that the global "laststringch" is also set,
+** and sometimes used, by these macros.)
+**
+** The "l1_" versions go one step further and guarantee that the "len"
+** field is valid for *all* characters, being set to 1 even if the macro
+** returns false. This macro is a great example of how NOT to write
+** readable C.
+*/
+/*TF NOTE: This is actually defined in code (makedent) now */
+#if 0
+#define isstringch(ptr, canon) (isstringstart (*(ptr)) \
+ && stringcharlen ((ptr), (canon)) > 0)
+#define l_isstringch(ptr, len, canon) \
+ (isstringstart (*(ptr)) \
+ && (len = stringcharlen ((ptr), (canon))) \
+ > 0)
+#define l1_isstringch(ptr, len, canon) \
+ (len = 1, \
+ isstringstart ((unsigned char)(*(ptr))) \
+ && ((len = \
+ stringcharlen ((ptr), (canon))) \
+ > 0 \
+ ? 1 : (len = 1, 0)))
+#endif
+
+/*
+ * Sizes of buffers returned by ichartosstr/strtosichar.
+ */
+#define ICHARTOSSTR_SIZE (INPUTWORDLEN + 4 * MAXAFFIXLEN + 4)
+#define STRTOSICHAR_SIZE ((INPUTWORDLEN + 4 * MAXAFFIXLEN + 4) \
+ * sizeof (ichar_t))
+/* TF CHANGE: We should fill this as a structure
+ and then use it throughout.
+*/
+
+/*
+ * Initialized variables. These are generated using macros so that they
+ * may be consistently declared in all programs. Numerous examples of
+ * usage are given below.
+ */
+#ifdef MAIN
+#define INIT(decl, init) decl = init
+#else
+#define INIT(decl, init) extern decl
+#endif
+
+#ifdef MINIMENU
+INIT (int minimenusize, 2); /* MUST be either 2 or zero */
+#else /* MINIMENU */
+INIT (int minimenusize, 0); /* MUST be either 2 or zero */
+#endif /* MINIMENU */
+
+INIT (int eflag, 0); /* NZ for expand mode */
+INIT (int dumpflag, 0); /* NZ to do dump mode */
+INIT (int fflag, 0); /* NZ if -f specified */
+#ifndef USG
+INIT (int sflag, 0); /* NZ to stop self after EOF */
+#endif
+INIT (int vflag, 0); /* NZ to display characters as M-xxx */
+INIT (int xflag, DEFNOBACKUPFLAG); /* NZ to suppress backups */
+INIT (int deftflag, -1); /* NZ for TeX mode by default */
+INIT (int tflag, DEFTEXFLAG); /* NZ for TeX mode in current file */
+INIT (int prefstringchar, -1); /* Preferred string character type */
+
+INIT (int terse, 0); /* NZ for "terse" mode */
+
+INIT (char tempfile[MAXPATHLEN], ""); /* Name of file we're spelling into */
+
+INIT (int minword, MINWORD); /* Longest always-legal word */
+INIT (int sortit, 1); /* Sort suggestions alphabetically */
+INIT (int compoundflag, -1); /* How to treat compounds: see above */
+INIT (int tryhardflag, -1); /* Always call tryveryhard */
+
+INIT (char * currentfile, NULL); /* Name of current input file */
+
+/* Odd numbers for math mode in LaTeX; even for LR or paragraph mode */
+INIT (int math_mode, 0);
+/* P -- paragraph or LR mode
+ * b -- parsing a \begin statement
+ * e -- parsing an \end statement
+ * r -- parsing a \ref type of argument.
+ * m -- looking for a \begin{minipage} argument.
+ */
+INIT (char LaTeX_Mode, 'P');
+
+#ifdef __cplusplus
+}
+#endif /* c++ */
+
+#endif /* ISPELL_H */
diff --git a/kspell2/plugins/ispell/ispell_checker.cpp b/kspell2/plugins/ispell/ispell_checker.cpp
new file mode 100644
index 000000000..42cc2c460
--- /dev/null
+++ b/kspell2/plugins/ispell/ispell_checker.cpp
@@ -0,0 +1,505 @@
+/* vim: set sw=8: -*- Mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+/* kspell2 - adopted from Enchant
+ * Copyright (C) 2003 Dom Lachowicz
+ * Copyright (C) 2004 Zack Rusin <zack@kde.org>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ * In addition, as a special exception, Dom Lachowicz
+ * gives permission to link the code of this program with
+ * non-LGPL Spelling Provider libraries (eg: a MSFT Office
+ * spell checker backend) and distribute linked combinations including
+ * the two. You must obey the GNU Lesser General Public License in all
+ * respects for all of the code used other than said providers. If you modify
+ * this file, you may extend this exception to your version of the
+ * file, but you are not obligated to do so. If you do not wish to
+ * do so, delete this exception statement from your version.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <string>
+#include <vector>
+
+#include "sp_spell.h"
+#include "ispell_checker.h"
+
+#include <qmap.h>
+#include <qdir.h>
+#include <qfileinfo.h>
+
+/***************************************************************************/
+
+typedef struct str_ispell_map
+{
+ const char * lang;
+ const char * dict;
+ const char * enc;
+} IspellMap;
+
+static const char *ispell_dirs [] = {
+ "/usr/lib/ispell",
+ "/usr/local/lib/ispell",
+ "/usr/local/share/ispell",
+ "/usr/share/ispell",
+ "/usr/pkg/lib",
+ 0
+};
+static const IspellMap ispell_map [] = {
+ {"ca" ,"catala.hash" ,"iso-8859-1" },
+ {"ca_ES" ,"catala.hash" ,"iso-8859-1" },
+ {"cs" ,"czech.hash" ,"iso-8859-2" },
+ {"cs_CZ" ,"czech.hash" ,"iso-8859-2" },
+ {"da" ,"dansk.hash" ,"iso-8859-1" },
+ {"da_DK" ,"dansk.hash" ,"iso-8859-1" },
+ {"de" ,"deutsch.hash" ,"iso-8859-1" },
+ {"de_CH" ,"swiss.hash" ,"iso-8859-1" },
+ {"de_AT" ,"deutsch.hash" ,"iso-8859-1" },
+ {"de_DE" ,"deutsch.hash" ,"iso-8859-1" },
+ {"el" ,"ellhnika.hash" ,"iso-8859-7" },
+ {"el_GR" ,"ellhnika.hash" ,"iso-8859-7" },
+ {"en" ,"british.hash" ,"iso-8859-1" },
+ {"en_AU" ,"british.hash" ,"iso-8859-1" },
+ {"en_BZ" ,"british.hash" ,"iso-8859-1" },
+ {"en_CA" ,"british.hash" ,"iso-8859-1" },
+ {"en_GB" ,"british.hash" ,"iso-8859-1" },
+ {"en_IE" ,"british.hash" ,"iso-8859-1" },
+ {"en_JM" ,"british.hash" ,"iso-8859-1" },
+ {"en_NZ" ,"british.hash" ,"iso-8859-1" },
+ {"en_TT" ,"british.hash" ,"iso-8859-1" },
+ {"en_ZA" ,"british.hash" ,"iso-8859-1" },
+ {"en_ZW" ,"british.hash" ,"iso-8859-1" },
+ {"en_PH" ,"american.hash" ,"iso-8859-1" },
+ {"en_US" ,"american.hash" ,"iso-8859-1" },
+ {"eo" ,"esperanto.hash" ,"iso-8859-3" },
+ {"es" ,"espanol.hash" ,"iso-8859-1" },
+ {"es_AR" ,"espanol.hash" ,"iso-8859-1" },
+ {"es_BO" ,"espanol.hash" ,"iso-8859-1" },
+ {"es_CL" ,"espanol.hash" ,"iso-8859-1" },
+ {"es_CO" ,"espanol.hash" ,"iso-8859-1" },
+ {"es_CR" ,"espanol.hash" ,"iso-8859-1" },
+ {"es_DO" ,"espanol.hash" ,"iso-8859-1" },
+ {"es_EC" ,"espanol.hash" ,"iso-8859-1" },
+ {"es_ES" ,"espanol.hash" ,"iso-8859-1" },
+ {"es_GT" ,"espanol.hash" ,"iso-8859-1" },
+ {"es_HN" ,"espanol.hash" ,"iso-8859-1" },
+ {"es_MX" ,"espanol.hash" ,"iso-8859-1" },
+ {"es_NI" ,"espanol.hash" ,"iso-8859-1" },
+ {"es_PA" ,"espanol.hash" ,"iso-8859-1" },
+ {"es_PE" ,"espanol.hash" ,"iso-8859-1" },
+ {"es_PR" ,"espanol.hash" ,"iso-8859-1" },
+ {"es_PY" ,"espanol.hash" ,"iso-8859-1" },
+ {"es_SV" ,"espanol.hash" ,"iso-8859-1" },
+ {"es_UY" ,"espanol.hash" ,"iso-8859-1" },
+ {"es_VE" ,"espanol.hash" ,"iso-8859-1" },
+ {"fi" ,"finnish.hash" ,"iso-8859-1" },
+ {"fi_FI" ,"finnish.hash" ,"iso-8859-1" },
+ {"fr" ,"francais.hash" ,"iso-8859-1" },
+ {"fr_BE" ,"francais.hash" ,"iso-8859-1" },
+ {"fr_CA" ,"francais.hash" ,"iso-8859-1" },
+ {"fr_CH" ,"francais.hash" ,"iso-8859-1" },
+ {"fr_FR" ,"francais.hash" ,"iso-8859-1" },
+ {"fr_LU" ,"francais.hash" ,"iso-8859-1" },
+ {"fr_MC" ,"francais.hash" ,"iso-8859-1" },
+ {"hu" ,"hungarian.hash" ,"iso-8859-2" },
+ {"hu_HU" ,"hungarian.hash" ,"iso-8859-2" },
+ {"ga" ,"irish.hash" ,"iso-8859-1" },
+ {"ga_IE" ,"irish.hash" ,"iso-8859-1" },
+ {"gl" ,"galician.hash" ,"iso-8859-1" },
+ {"gl_ES" ,"galician.hash" ,"iso-8859-1" },
+ {"ia" ,"interlingua.hash" ,"iso-8859-1" },
+ {"it" ,"italian.hash" ,"iso-8859-1" },
+ {"it_IT" ,"italian.hash" ,"iso-8859-1" },
+ {"it_CH" ,"italian.hash" ,"iso-8859-1" },
+ {"la" ,"mlatin.hash" ,"iso-8859-1" },
+ {"la_IT" ,"mlatin.hash" ,"iso-8859-1" },
+ {"lt" ,"lietuviu.hash" ,"iso-8859-13" },
+ {"lt_LT" ,"lietuviu.hash" ,"iso-8859-13" },
+ {"nl" ,"nederlands.hash" ,"iso-8859-1" },
+ {"nl_NL" ,"nederlands.hash" ,"iso-8859-1" },
+ {"nl_BE" ,"nederlands.hash" ,"iso-8859-1" },
+ {"nb" ,"norsk.hash" ,"iso-8859-1" },
+ {"nb_NO" ,"norsk.hash" ,"iso-8859-1" },
+ {"nn" ,"nynorsk.hash" ,"iso-8859-1" },
+ {"nn_NO" ,"nynorsk.hash" ,"iso-8859-1" },
+ {"no" ,"norsk.hash" ,"iso-8859-1" },
+ {"no_NO" ,"norsk.hash" ,"iso-8859-1" },
+ {"pl" ,"polish.hash" ,"iso-8859-2" },
+ {"pl_PL" ,"polish.hash" ,"iso-8859-2" },
+ {"pt" ,"brazilian.hash" ,"iso-8859-1" },
+ {"pt_BR" ,"brazilian.hash" ,"iso-8859-1" },
+ {"pt_PT" ,"portugues.hash" ,"iso-8859-1" },
+ {"ru" ,"russian.hash" ,"koi8-r" },
+ {"ru_MD" ,"russian.hash" ,"koi8-r" },
+ {"ru_RU" ,"russian.hash" ,"koi8-r" },
+ {"sc" ,"sardinian.hash" ,"iso-8859-1" },
+ {"sc_IT" ,"sardinian.hash" ,"iso-8859-1" },
+ {"sk" ,"slovak.hash" ,"iso-8859-2" },
+ {"sk_SK" ,"slovak.hash" ,"iso-8859-2" },
+ {"sl" ,"slovensko.hash" ,"iso-8859-2" },
+ {"sl_SI" ,"slovensko.hash" ,"iso-8859-2" },
+ {"sv" ,"svenska.hash" ,"iso-8859-1" },
+ {"sv_SE" ,"svenska.hash" ,"iso-8859-1" },
+ {"uk" ,"ukrainian.hash" ,"koi8-u" },
+ {"uk_UA" ,"ukrainian.hash" ,"koi8-u" },
+ {"yi" ,"yiddish-yivo.hash" ,"utf-8" }
+};
+
+static const size_t size_ispell_map = ( sizeof(ispell_map) / sizeof((ispell_map)[0]) );
+static QMap<QString, QString> ispell_dict_map;
+
+
+void
+ISpellChecker::try_autodetect_charset(const char * const inEncoding)
+{
+ if (inEncoding && strlen(inEncoding))
+ {
+ m_translate_in = QTextCodec::codecForName(inEncoding);
+ }
+}
+
+/***************************************************************************/
+/***************************************************************************/
+
+ISpellChecker::ISpellChecker()
+ : deftflag(-1),
+ prefstringchar(-1),
+ m_bSuccessfulInit(false),
+ m_BC(NULL),
+ m_cd(NULL),
+ m_cl(NULL),
+ m_cm(NULL),
+ m_ho(NULL),
+ m_nd(NULL),
+ m_so(NULL),
+ m_se(NULL),
+ m_ti(NULL),
+ m_te(NULL),
+ m_hashstrings(NULL),
+ m_hashtbl(NULL),
+ m_pflaglist(NULL),
+ m_sflaglist(NULL),
+ m_chartypes(NULL),
+ m_infile(NULL),
+ m_outfile(NULL),
+ m_askfilename(NULL),
+ m_Trynum(0),
+ m_translate_in(0)
+{
+ memset(m_sflagindex,0,sizeof(m_sflagindex));
+ memset(m_pflagindex,0,sizeof(m_pflagindex));
+}
+
+#ifndef FREEP
+#define FREEP(p) do { if (p) free(p); } while (0)
+#endif
+
+ISpellChecker::~ISpellChecker()
+{
+ if (m_bSuccessfulInit) {
+ // only cleanup our mess if we were successfully initialized
+
+ clearindex (m_pflagindex);
+ clearindex (m_sflagindex);
+ }
+
+ FREEP(m_hashtbl);
+ FREEP(m_hashstrings);
+ FREEP(m_sflaglist);
+ FREEP(m_chartypes);
+
+ delete m_translate_in;
+ m_translate_in = 0;
+}
+
+bool
+ISpellChecker::checkWord( const QString& utf8Word )
+{
+ ichar_t iWord[INPUTWORDLEN + MAXAFFIXLEN];
+ if (!m_bSuccessfulInit)
+ return false;
+
+ if (!utf8Word || utf8Word.length() >= (INPUTWORDLEN + MAXAFFIXLEN) || utf8Word.isEmpty())
+ return false;
+
+ bool retVal = false;
+ QCString out;
+ if (!m_translate_in)
+ return false;
+ else {
+ /* convert to 8bit string and null terminate */
+ int len_out = utf8Word.length();
+
+ out = m_translate_in->fromUnicode( utf8Word, len_out );
+ }
+
+ if (!strtoichar(iWord, out.data(), INPUTWORDLEN + MAXAFFIXLEN, 0))
+ {
+ if (good(iWord, 0, 0, 1, 0) == 1 ||
+ compoundgood(iWord, 1) == 1)
+ {
+ retVal = true;
+ }
+ }
+
+ return retVal;
+}
+
+QStringList
+ISpellChecker::suggestWord(const QString& utf8Word)
+{
+ ichar_t iWord[INPUTWORDLEN + MAXAFFIXLEN];
+ int c;
+
+ if (!m_bSuccessfulInit)
+ return QStringList();
+
+ if (utf8Word.isEmpty() || utf8Word.length() >= (INPUTWORDLEN + MAXAFFIXLEN) ||
+ utf8Word.length() == 0)
+ return QStringList();
+
+ QCString out;
+ if (!m_translate_in)
+ return QStringList();
+ else
+ {
+ /* convert to 8bit string and null terminate */
+
+ int len_out = utf8Word.length();
+ out = m_translate_in->fromUnicode( utf8Word, len_out );
+ }
+
+ if (!strtoichar(iWord, out.data(), INPUTWORDLEN + MAXAFFIXLEN, 0))
+ makepossibilities(iWord);
+ else
+ return QStringList();
+
+ QStringList sugg_arr;
+ for (c = 0; c < m_pcount; c++)
+ {
+ QString utf8Word;
+
+ if (!m_translate_in)
+ {
+ /* copy to 8bit string and null terminate */
+ utf8Word = QString::fromUtf8( m_possibilities[c] );
+ }
+ else
+ {
+ /* convert to 32bit string and null terminate */
+ utf8Word = m_translate_in->toUnicode( m_possibilities[c] );
+ }
+
+ sugg_arr.append( utf8Word );
+ }
+
+ return sugg_arr;
+}
+
+static void
+s_buildHashNames (std::vector<std::string> & names, const char * dict)
+{
+ const char * tmp = 0;
+ int i = 0;
+
+ names.clear ();
+
+ while ( (tmp = ispell_dirs[i++]) ) {
+ QCString maybeFile = QCString( tmp ) + '/';
+ maybeFile += dict;
+ names.push_back( maybeFile.data() );
+ }
+}
+
+static void
+s_allDics()
+{
+ const char * tmp = 0;
+ int i = 0;
+
+ while ( (tmp = ispell_dirs[i++]) ) {
+ QDir dir( tmp );
+ QStringList lst = dir.entryList( "*.hash" );
+ for ( QStringList::Iterator it = lst.begin(); it != lst.end(); ++it ) {
+ QFileInfo info( *it );
+ for (size_t i = 0; i < size_ispell_map; i++)
+ {
+ const IspellMap * mapping = (const IspellMap *)(&(ispell_map[i]));
+ if (!strcmp (info.fileName().latin1(), mapping->dict))
+ {
+ ispell_dict_map.insert( mapping->lang, *it );
+ }
+ }
+ }
+ }
+}
+
+QValueList<QString>
+ISpellChecker::allDics()
+{
+ if ( ispell_dict_map.empty() )
+ s_allDics();
+
+ return ispell_dict_map.keys();
+}
+
+QString
+ISpellChecker::loadDictionary (const char * szdict)
+{
+ std::vector<std::string> dict_names;
+
+ s_buildHashNames (dict_names, szdict);
+
+ for (size_t i = 0; i < dict_names.size(); i++)
+ {
+ if (linit(const_cast<char*>(dict_names[i].c_str())) >= 0)
+ return dict_names[i].c_str();
+ }
+
+ return QString::null;
+}
+
+/*!
+ * Load ispell dictionary hash file for given language.
+ *
+ * \param szLang - The language tag ("en-US") we want to use
+ * \return The name of the dictionary file
+ */
+bool
+ISpellChecker::loadDictionaryForLanguage ( const char * szLang )
+{
+ QString hashname;
+
+ const char * encoding = NULL;
+ const char * szFile = NULL;
+
+ for (size_t i = 0; i < size_ispell_map; i++)
+ {
+ const IspellMap * mapping = (const IspellMap *)(&(ispell_map[i]));
+ if (!strcmp (szLang, mapping->lang))
+ {
+ szFile = mapping->dict;
+ encoding = mapping->enc;
+ break;
+ }
+ }
+
+ if (!szFile || !strlen(szFile))
+ return false;
+
+ alloc_ispell_struct();
+
+ hashname = loadDictionary(szFile);
+ if (hashname.isEmpty())
+ return false;
+
+ // one of the two above calls succeeded
+ setDictionaryEncoding (hashname, encoding);
+
+ return true;
+}
+
+void
+ISpellChecker::setDictionaryEncoding( const QString& hashname, const char * encoding )
+{
+ /* Get Hash encoding from XML file. This should always work! */
+ try_autodetect_charset(encoding);
+
+ if (m_translate_in)
+ {
+ /* We still have to setup prefstringchar*/
+ prefstringchar = findfiletype("utf8", 1, deftflag < 0 ? &deftflag
+ : static_cast<int *>(NULL));
+
+ if (prefstringchar < 0)
+ {
+ std::string teststring;
+ for(int n1 = 1; n1 <= 15; n1++)
+ {
+ teststring = "latin" + n1;
+ prefstringchar = findfiletype(teststring.c_str(), 1,
+ deftflag < 0 ? &deftflag : static_cast<int *>(NULL));
+ if (prefstringchar >= 0)
+ break;
+ }
+ }
+
+ return; /* success */
+ }
+
+ /* Test for UTF-8 first */
+ prefstringchar = findfiletype("utf8", 1, deftflag < 0 ? &deftflag : static_cast<int *>(NULL));
+ if (prefstringchar >= 0)
+ {
+ m_translate_in = QTextCodec::codecForName("utf8");
+ }
+
+ if (m_translate_in)
+ return; /* success */
+
+ /* Test for "latinN" */
+ if (!m_translate_in)
+ {
+ /* Look for "altstringtype" names from latin1 to latin15 */
+ for(int n1 = 1; n1 <= 15; n1++)
+ {
+ QString teststring = QString("latin%1").arg(n1);
+ prefstringchar = findfiletype(teststring.latin1(), 1,
+ deftflag < 0 ? &deftflag : static_cast<int *>(NULL));
+ if (prefstringchar >= 0)
+ {
+ //FIXME: latin1 might be wrong
+ m_translate_in = QTextCodec::codecForName( teststring.latin1() );
+ break;
+ }
+ }
+ }
+
+ /* If nothing found, use latin1 */
+ if (!m_translate_in)
+ {
+ m_translate_in = QTextCodec::codecForName("latin1");
+ }
+}
+
+bool
+ISpellChecker::requestDictionary(const char *szLang)
+{
+ if (!loadDictionaryForLanguage (szLang))
+ {
+ // handle a shortened version of the language tag: en_US => en
+ std::string shortened_dict (szLang);
+ size_t uscore_pos;
+
+ if ((uscore_pos = shortened_dict.rfind ('_')) != ((size_t)-1)) {
+ shortened_dict = shortened_dict.substr(0, uscore_pos);
+ if (!loadDictionaryForLanguage (shortened_dict.c_str()))
+ return false;
+ } else
+ return false;
+ }
+
+ m_bSuccessfulInit = true;
+
+ if (prefstringchar < 0)
+ m_defdupchar = 0;
+ else
+ m_defdupchar = prefstringchar;
+
+ return true;
+}
diff --git a/kspell2/plugins/ispell/ispell_checker.h b/kspell2/plugins/ispell/ispell_checker.h
new file mode 100644
index 000000000..e59f8ed2d
--- /dev/null
+++ b/kspell2/plugins/ispell/ispell_checker.h
@@ -0,0 +1,273 @@
+/* vim: set sw=8: -*- Mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+/* kspell2 - adopted from Enchant
+ * Copyright (C) 2003 Dom Lachowicz
+ * Copyright (C) 2004 Zack Rusin <zack@kde.org>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ * In addition, as a special exception, Dom Lachowicz
+ * gives permission to link the code of this program with
+ * non-LGPL Spelling Provider libraries (eg: a MSFT Office
+ * spell checker backend) and distribute linked combinations including
+ * the two. You must obey the GNU Lesser General Public License in all
+ * respects for all of the code used other than said providers. If you modify
+ * this file, you may extend this exception to your version of the
+ * file, but you are not obligated to do so. If you do not wish to
+ * do so, delete this exception statement from your version.
+ */
+
+#ifndef ISPELL_CHECKER_H
+#define ISPELL_CHECKER_H
+
+#include "ispell.h"
+
+#include <qstringlist.h>
+#include <qvaluelist.h>
+#include <qtextcodec.h>
+#include <qstring.h>
+
+
+class ISpellChecker
+{
+public:
+ ISpellChecker();
+ ~ISpellChecker();
+
+ bool checkWord(const QString& word);
+ QStringList suggestWord(const QString& word);
+
+ bool requestDictionary (const char * szLang);
+ static QValueList<QString> allDics();
+private:
+ ISpellChecker(const ISpellChecker&); // no impl
+ void operator=(const ISpellChecker&); // no impl
+
+ QString loadDictionary (const char * szLang );
+ bool loadDictionaryForLanguage ( const char * szLang );
+ void setDictionaryEncoding ( const QString& hashname, const char * enc );
+
+ //
+ // The member functions after this point were formerly global functions
+ // passed a context structure pointer...
+ //
+
+ void try_autodetect_charset(const char * inEncoding);
+
+ //
+ // From ispell correct.c
+ //
+
+ int casecmp P ((char * a, char * b, int canonical));
+ void makepossibilities P ((ichar_t * word));
+ int insert P ((ichar_t * word));
+#ifndef NO_CAPITALIZATION_SUPPORT
+ void wrongcapital P ((ichar_t * word));
+#endif /* NO_CAPITALIZATION_SUPPORT */
+ void wrongletter P ((ichar_t * word));
+ void extraletter P ((ichar_t * word));
+ void missingletter P ((ichar_t * word));
+ void missingspace P ((ichar_t * word));
+ int compoundgood P ((ichar_t * word, int pfxopts));
+ void transposedletter P ((ichar_t * word));
+ int ins_cap P ((ichar_t * word, ichar_t * pattern));
+ int save_cap P ((ichar_t * word, ichar_t * pattern,
+ ichar_t savearea[MAX_CAPS][INPUTWORDLEN + MAXAFFIXLEN]));
+ int ins_root_cap P ((ichar_t * word, ichar_t * pattern,
+ int prestrip, int preadd, int sufstrip, int sufadd,
+ struct dent * firstdent, struct flagent * pfxent,
+ struct flagent * sufent));
+ void save_root_cap P ((ichar_t * word, ichar_t * pattern,
+ int prestrip, int preadd, int sufstrip, int sufadd,
+ struct dent * firstdent, struct flagent * pfxent,
+ struct flagent * sufent,
+ ichar_t savearea[MAX_CAPS][INPUTWORDLEN + MAXAFFIXLEN],
+ int * nsaved));
+
+ //
+ // From ispell good.c
+ //
+
+ int good (ichar_t* w, int ignoreflagbits, int allhits, int pfxopts, int sfxopts);
+ void chk_aff (ichar_t* word, ichar_t* ucword, int len, int ignoreflagbits, int allhits, int pfxopts, int sfxopts);
+ int linit(char*);
+ struct dent * ispell_lookup (ichar_t* s, int dotree);
+ int strtoichar (ichar_t* out, char* in, int outlen, int canonical);
+ int ichartostr (char* out, ichar_t* in, int outlen, int canonical);
+ char * ichartosstr (ichar_t* in, int canonical);
+ int findfiletype (const char * name, int searchnames, int * deformatter);
+ long whatcap (ichar_t* word);
+
+ /*
+ HACK: macros replaced with function implementations
+ so we could do a side-effect-free check for unicode
+ characters which aren't in hashheader
+ */
+ char myupper(ichar_t c);
+ char mylower(ichar_t c);
+ int myspace(ichar_t c);
+ char iswordch(ichar_t c);
+ char isboundarych(ichar_t c);
+ char isstringstart(ichar_t c);
+ ichar_t mytolower(ichar_t c);
+ ichar_t mytoupper(ichar_t c);
+
+#ifndef ICHAR_IS_CHAR
+ int cap_ok (ichar_t* word, struct success* hit, int len);
+
+ int hash (ichar_t* s, int hashtblsize);
+#endif
+
+ //
+ // From ispell lookup.c
+ //
+
+ void clearindex P ((struct flagptr * indexp));
+ void initckch P ((char *));
+
+ void alloc_ispell_struct();
+ void free_ispell_struct();
+
+ //
+ // From ispell makedent.c
+ //
+
+ int addvheader P ((struct dent * ent));
+ void upcase P ((ichar_t * string));
+ void lowcase P ((ichar_t * string));
+ void chupcase P ((char * s));
+
+ int stringcharlen P ((char * bufp, int canonical));
+ ichar_t * strtosichar P ((char * in, int canonical));
+ char * printichar P ((int in));
+
+ //
+ // From ispell tgood.c
+ //
+
+ void pfx_list_chk P ((ichar_t * word, ichar_t * ucword,
+ int len, int optflags, int sfxopts, struct flagptr * ind,
+ int ignoreflagbits, int allhits));
+ void chk_suf P ((ichar_t * word, ichar_t * ucword, int len,
+ int optflags, struct flagent * pfxent, int ignoreflagbits,
+ int allhits));
+ void suf_list_chk P ((ichar_t * word, ichar_t * ucword, int len,
+ struct flagptr * ind, int optflags, struct flagent * pfxent,
+ int ignoreflagbits, int allhits));
+ int expand_pre P ((char * croot, ichar_t * rootword,
+ MASKTYPE mask[], int option, char * extra));
+ int pr_pre_expansion P ((char * croot, ichar_t * rootword,
+ struct flagent * flent, MASKTYPE mask[], int option,
+ char * extra));
+ int expand_suf P ((char * croot, ichar_t * rootword,
+ MASKTYPE mask[], int optflags, int option, char * extra));
+ int pr_suf_expansion P ((char * croot, ichar_t * rootword,
+ struct flagent * flent, int option, char * extra));
+ void forcelc P ((ichar_t * dst, int len));
+
+ /* this is used for converting form unsigned short to UCS-4 */
+
+ int deftflag; /* NZ for TeX mode by default */
+ int prefstringchar; /* Preferred string character type */
+ bool m_bSuccessfulInit;
+
+ //
+ // The members after this point were formerly global variables
+ // in the original ispell code
+ //
+
+ char * m_BC; /* backspace if not ^H */
+ char * m_cd; /* clear to end of display */
+ char * m_cl; /* clear display */
+ char * m_cm; /* cursor movement */
+ char * m_ho; /* home */
+ char * m_nd; /* non-destructive space */
+ char * m_so; /* standout */
+ char * m_se; /* standout end */
+ int m_sg; /* space taken by so/se */
+ char * m_ti; /* terminal initialization sequence */
+ char * m_te; /* terminal termination sequence */
+ int m_li; /* lines */
+ int m_co; /* columns */
+
+ char m_ctoken[INPUTWORDLEN + MAXAFFIXLEN]; /* Current token as char */
+ ichar_t m_itoken[INPUTWORDLEN + MAXAFFIXLEN]; /* Ctoken as ichar_t str */
+
+ int m_numhits; /* number of hits in dictionary lookups */
+ struct success
+ m_hits[MAX_HITS]; /* table of hits gotten in lookup */
+
+ char * m_hashstrings; /* Strings in hash table */
+ struct hashheader
+ m_hashheader; /* Header of hash table */
+ struct dent *
+ m_hashtbl; /* Main hash table, for dictionary */
+ int m_hashsize; /* Size of main hash table */
+
+ char m_hashname[MAXPATHLEN]; /* Name of hash table file */
+
+ int m_aflag; /* NZ if -a or -A option specified */
+ int m_cflag; /* NZ if -c (crunch) option */
+ int m_lflag; /* NZ if -l (list) option */
+ int m_incfileflag; /* whether xgets() acts exactly like gets() */
+ int m_nodictflag; /* NZ if dictionary not needed */
+
+ int m_uerasechar; /* User's erase character, from stty */
+ int m_ukillchar; /* User's kill character */
+
+ unsigned int m_laststringch; /* Number of last string character */
+ int m_defdupchar; /* Default duplicate string type */
+
+ int m_numpflags; /* Number of prefix flags in table */
+ int m_numsflags; /* Number of suffix flags in table */
+ struct flagptr m_pflagindex[SET_SIZE + MAXSTRINGCHARS];
+ /* Fast index to pflaglist */
+ struct flagent * m_pflaglist; /* Prefix flag control list */
+ struct flagptr m_sflagindex[SET_SIZE + MAXSTRINGCHARS];
+ /* Fast index to sflaglist */
+ struct flagent * m_sflaglist; /* Suffix flag control list */
+
+ struct strchartype * /* String character type collection */
+ m_chartypes;
+
+ FILE * m_infile; /* File being corrected */
+ FILE * m_outfile; /* Corrected copy of infile */
+
+ char * m_askfilename; /* File specified in -f option */
+
+ int m_changes; /* NZ if changes made to cur. file */
+ int m_readonly; /* NZ if current file is readonly */
+ int m_quit; /* NZ if we're done with this file */
+
+#define MAXPOSSIBLE 100 /* Max no. of possibilities to generate */
+
+ char m_possibilities[MAXPOSSIBLE][INPUTWORDLEN + MAXAFFIXLEN];
+ /* Table of possible corrections */
+ int m_pcount; /* Count of possibilities generated */
+ int m_maxposslen; /* Length of longest possibility */
+ int m_easypossibilities; /* Number of "easy" corrections found */
+ /* ..(defined as those using legal affixes) */
+
+ /*
+ * The following array contains a list of characters that should be tried
+ * in "missingletter." Note that lowercase characters are omitted.
+ */
+ int m_Trynum; /* Size of "Try" array */
+ ichar_t m_Try[SET_SIZE + MAXSTRINGCHARS];
+
+ QTextCodec *m_translate_in; /* Selected translation from/to Unicode */
+};
+
+#endif /* ISPELL_CHECKER_H */
diff --git a/kspell2/plugins/ispell/ispell_def.h b/kspell2/plugins/ispell/ispell_def.h
new file mode 100644
index 000000000..b3d149c43
--- /dev/null
+++ b/kspell2/plugins/ispell/ispell_def.h
@@ -0,0 +1,34 @@
+/* vim: set sw=8: -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+/* enchant
+ * Copyright (C) 2003 Dom Lachowicz
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ * In addition, as a special exception, Dom Lachowicz
+ * gives permission to link the code of this program with
+ * non-LGPL Spelling Provider libraries (eg: a MSFT Office
+ * spell checker backend) and distribute linked combinations including
+ * the two. You must obey the GNU Lesser General Public License in all
+ * respects for all of the code used other than said providers. If you modify
+ * this file, you may extend this exception to your version of the
+ * file, but you are not obligated to do so. If you do not wish to
+ * do so, delete this exception statement from your version.
+ */
+
+/* largest word accepted from a file by any input routine, plus one */
+#ifndef INPUTWORDLEN
+#define INPUTWORDLEN 100
+#endif
diff --git a/kspell2/plugins/ispell/kspell_ispell.desktop b/kspell2/plugins/ispell/kspell_ispell.desktop
new file mode 100644
index 000000000..8e33dc428
--- /dev/null
+++ b/kspell2/plugins/ispell/kspell_ispell.desktop
@@ -0,0 +1,22 @@
+[Desktop Entry]
+Type=Service
+ServiceTypes=KSpell/Client
+X-KDE-Library=kspell_ispell
+X-KDE-PluginInfo-Author=Zack Rusin
+X-KDE-PluginInfo-Email=zack@kde.org
+X-KDE-PluginInfo-Name=kspell_ispell
+X-KDE-PluginInfo-Version=0.0.1
+X-KDE-PluginInfo-Website=http://www.kde.org
+X-KDE-PluginInfo-Category=Clients
+X-KDE-PluginInfo-Depends=
+X-KDE-PluginInfo-License=LGPL
+X-KDE-PluginInfo-EnabledByDefault=true
+Name=ISpell
+Name[bn]=আই-স্পেল
+Name[hi]=आई-स्पैल
+Name[it]=Ispell
+Name[ne]=आई स्पेल
+Name[sv]=Ispell
+Name[ta]=psதேர்ந்தெடு
+Name[te]=ఐస్పెల్
+Name[tg]=psselect
diff --git a/kspell2/plugins/ispell/kspell_ispellclient.cpp b/kspell2/plugins/ispell/kspell_ispellclient.cpp
new file mode 100644
index 000000000..5830d4957
--- /dev/null
+++ b/kspell2/plugins/ispell/kspell_ispellclient.cpp
@@ -0,0 +1,54 @@
+/*
+ * kspell_aspellclient.cpp
+ *
+ * Copyright (C) 2003 Zack Rusin <zack@kde.org>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ */
+#include "kspell_ispellclient.h"
+
+#include "kspell_ispelldict.h"
+#include "ispell_checker.h"
+
+#include <kgenericfactory.h>
+#include <kdebug.h>
+
+typedef KGenericFactory<ISpellClient> ISpellClientFactory;
+K_EXPORT_COMPONENT_FACTORY( kspell_ispell, ISpellClientFactory( "kspell_ispell" ) )
+
+using namespace KSpell2;
+
+ISpellClient::ISpellClient( QObject *parent, const char *name, const QStringList& /* args */ )
+ : Client( parent, name )
+{
+}
+
+ISpellClient::~ISpellClient()
+{
+}
+
+Dictionary* ISpellClient::dictionary( const QString& language )
+{
+ ISpellDict *ad = new ISpellDict( language );
+ return ad;
+}
+
+QStringList ISpellClient::languages() const
+{
+ return ISpellChecker::allDics();
+}
+
+#include "kspell_ispellclient.moc"
diff --git a/kspell2/plugins/ispell/kspell_ispellclient.h b/kspell2/plugins/ispell/kspell_ispellclient.h
new file mode 100644
index 000000000..b46f485fd
--- /dev/null
+++ b/kspell2/plugins/ispell/kspell_ispellclient.h
@@ -0,0 +1,56 @@
+/*
+ * kspell_ispellclient.h
+ *
+ * Copyright (C) 2003 Zack Rusin <zack@kde.org>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ */
+#ifndef KSPELL_ISPELLCLIENT_H
+#define KSPELL_ISPELLCLIENT_H
+
+#include "client.h"
+#include <qobject.h>
+
+#include "ispell_checker.h"
+
+namespace KSpell2 {
+ class Dictionary;
+}
+using KSpell2::Dictionary;
+
+class ISpellClient : public KSpell2::Client
+{
+ Q_OBJECT
+public:
+ ISpellClient( QObject *parent, const char *name, const QStringList & /* args */ );
+ ~ISpellClient();
+
+ virtual int reliability() const {
+ return 10;
+ }
+
+ virtual Dictionary* dictionary( const QString& language );
+
+ virtual QStringList languages() const;
+
+ virtual QString name() const {
+ return "ISpell";
+ }
+private:
+
+};
+
+#endif
diff --git a/kspell2/plugins/ispell/kspell_ispelldict.cpp b/kspell2/plugins/ispell/kspell_ispelldict.cpp
new file mode 100644
index 000000000..2d3728a6a
--- /dev/null
+++ b/kspell2/plugins/ispell/kspell_ispelldict.cpp
@@ -0,0 +1,76 @@
+/**
+ * kspell_aspelldict.cpp
+ *
+ * Copyright (C) 2003 Zack Rusin <zack@kde.org>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ */
+#include "kspell_ispelldict.h"
+
+#include <kdebug.h>
+
+#include "ispell_checker.h"
+
+using namespace KSpell2;
+
+ISpellDict::ISpellDict( const QString& lang )
+ : Dictionary( lang )
+{
+ m_checker = new ISpellChecker();
+
+ if ( !m_checker->requestDictionary( lang.latin1() ) ) {
+ kdError()<<"Language \""<< lang << "\" doesn't exist for Ispell"<<endl;
+ }
+}
+
+ISpellDict::~ISpellDict()
+{
+}
+
+bool ISpellDict::check( const QString& word )
+{
+ return m_checker->checkWord( word );
+}
+
+QStringList ISpellDict::suggest( const QString& word )
+{
+ return m_checker->suggestWord( word );
+}
+
+bool ISpellDict::checkAndSuggest( const QString& word,
+ QStringList& suggestions )
+{
+ bool c = check( word );
+ if ( c )
+ suggestions = suggest( word );
+ return c;
+}
+
+bool ISpellDict::storeReplacement( const QString& ,
+ const QString& )
+{
+ return false;
+}
+
+bool ISpellDict::addToPersonal( const QString& )
+{
+ return false;
+}
+
+bool ISpellDict::addToSession( const QString& )
+{
+ return false;
+}
diff --git a/kspell2/plugins/ispell/kspell_ispelldict.h b/kspell2/plugins/ispell/kspell_ispelldict.h
new file mode 100644
index 000000000..45154d790
--- /dev/null
+++ b/kspell2/plugins/ispell/kspell_ispelldict.h
@@ -0,0 +1,49 @@
+/**
+ * kspell_ispelldict.h
+ *
+ * Copyright (C) 2003 Zack Rusin <zack@kde.org>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ */
+#ifndef KSPELL_ASPELLDICT_H
+#define KSPELL_ASPELLDICT_H
+
+#include "dictionary.h"
+
+class ISpellChecker;
+
+class ISpellDict : public KSpell2::Dictionary
+{
+public:
+ ISpellDict( const QString& lang );
+ ~ISpellDict();
+ virtual bool check( const QString& word );
+
+ virtual QStringList suggest( const QString& word );
+
+ virtual bool checkAndSuggest( const QString& word,
+ QStringList& suggestions ) ;
+
+ virtual bool storeReplacement( const QString& bad,
+ const QString& good );
+
+ virtual bool addToPersonal( const QString& word );
+ virtual bool addToSession( const QString& word );
+private:
+ ISpellChecker *m_checker;
+};
+
+#endif
diff --git a/kspell2/plugins/ispell/lookup.cpp b/kspell2/plugins/ispell/lookup.cpp
new file mode 100644
index 000000000..6030f49d0
--- /dev/null
+++ b/kspell2/plugins/ispell/lookup.cpp
@@ -0,0 +1,764 @@
+/* vim: set sw=8: -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+/* kspell2 - adopted from enchant
+ * Copyright (C) 2003 Dom Lachowicz
+ * Copyright (C) 2004 Zack Rusin <zack@kde.org>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ * In addition, as a special exception, Dom Lachowicz
+ * gives permission to link the code of this program with
+ * non-LGPL Spelling Provider libraries (eg: a MSFT Office
+ * spell checker backend) and distribute linked combinations including
+ * the two. You must obey the GNU General Public License in all
+ * respects for all of the code used other than said providers. If you modify
+ * this file, you may extend this exception to your version of the
+ * file, but you are not obligated to do so. If you do not wish to
+ * do so, delete this exception statement from your version.
+ */
+
+/*
+ * lookup.c - see if a word appears in the dictionary
+ *
+ * Pace Willisson, 1983
+ *
+ * Copyright 1987, 1988, 1989, 1992, 1993, Geoff Kuenning, Granada Hills, CA
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All modifications to the source code must be clearly marked as
+ * such. Binary redistributions based on modified source code
+ * must be clearly marked as modified versions in the documentation
+ * and/or other materials provided with the distribution.
+ * 4. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgment:
+ * This product includes software developed by Geoff Kuenning and
+ * other unpaid contributors.
+ * 5. The name of Geoff Kuenning may not be used to endorse or promote
+ * products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY GEOFF KUENNING AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL GEOFF KUENNING OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * $Log$
+ * Revision 1.1 2004/01/31 16:44:12 zrusin
+ * ISpell plugin.
+ *
+ * Revision 1.7 2003/09/25 02:44:48 dom
+ * bug 5813
+ *
+ * Revision 1.6 2003/08/26 13:20:40 dom
+ * ispell crasher fix, implement enchant_dictionary_release
+ *
+ * Revision 1.5 2003/08/26 13:08:03 uwog
+ * Fix segfault when the requested dictionary couldn't be found.
+ *
+ * Revision 1.4 2003/08/14 16:27:36 dom
+ * update some documentation
+ *
+ * Revision 1.3 2003/07/28 20:40:27 dom
+ * fix up the license clause, further win32-registry proof some directory getting functions
+ *
+ * Revision 1.2 2003/07/16 22:52:47 dom
+ * LGPL + exception license
+ *
+ * Revision 1.1 2003/07/15 01:15:07 dom
+ * ispell enchant backend
+ *
+ * Revision 1.3 2003/01/29 05:50:12 hippietrail
+ *
+ * Fixed my mess in EncodingManager.
+ * Changed many C casts to C++ casts.
+ *
+ * Revision 1.2 2003/01/25 03:16:05 hippietrail
+ *
+ * An UT_ICONV_INVALID fix which escaped the last commit.
+ *
+ * Revision 1.1 2003/01/24 05:52:34 hippietrail
+ *
+ * Refactored ispell code. Old ispell global variables had been put into
+ * an allocated structure, a pointer to which was passed to many functions.
+ * I have now made all such functions and variables private members of the
+ * ISpellChecker class. It was C OO, now it's C++ OO.
+ *
+ * I've fixed the makefiles and tested compilation but am unable to test
+ * operation. Please back out my changes if they cause problems which
+ * are not obvious or easy to fix.
+ *
+ * Revision 1.12 2003/01/06 18:48:39 dom
+ * ispell cleanup, start of using new 'add' save features
+ *
+ * Revision 1.11 2002/09/19 05:31:17 hippietrail
+ *
+ * More Ispell cleanup. Conditional globals and DEREF macros are removed.
+ * K&R function declarations removed, converted to Doxygen style comments
+ * where possible. No code has been changed (I hope). Compiles for me but
+ * unable to test.
+ *
+ * Revision 1.10 2002/09/17 03:03:30 hippietrail
+ *
+ * After seeking permission on the developer list I've reformatted all the
+ * spelling source which seemed to have parts which used 2, 3, 4, and 8
+ * spaces for tabs. It should all look good with our standard 4-space
+ * tabs now.
+ * I've concentrated just on indentation in the actual code. More prettying
+ * could be done.
+ * * NO code changes were made *
+ *
+ * Revision 1.9 2002/09/13 17:20:13 mpritchett
+ * Fix more warnings for Linux build
+ *
+ * Revision 1.8 2002/05/03 09:49:43 fjfranklin
+ * o hash downloader update (Gabriel Gerhardsson)
+ * - Comment out the "Can't open <dictionary>" printf.
+ * - Make the progressbar more clean at the begining of the download.
+ * - Add support for tarballs that doesn't have the full path included
+ * - Fix copyright headers on the newly added files (*HashDownloader.*)
+ *
+ * Revision 1.7 2001/08/27 19:06:30 dom
+ * Lots of compilation fixes
+ *
+ * Revision 1.6 2001/08/10 18:32:40 dom
+ * Spelling and iconv updates. god, i hate iconv
+ *
+ * Revision 1.5 2001/08/10 09:57:49 hub
+ * Patch by sobomax@FreeBSD.org
+ * #include "iconv.h" directive is missed from src/other/spell/xp/lookup.c and
+ * src/wp/impexp/xp/ie_imp_RTF.cpp.
+ * See bug 1823
+ *
+ * Revision 1.4 2001/07/18 17:46:01 dom
+ * Module changes, and fix compiler warnings
+ *
+ * Revision 1.3 2001/06/12 21:32:49 dom
+ * More ispell work...
+ *
+ * Revision 1.2 2001/05/12 16:05:42 thomasf
+ * Big pseudo changes to ispell to make it pass around a structure rather
+ * than rely on all sorts of gloabals willy nilly here and there. Also
+ * fixed our spelling class to work with accepting suggestions once more.
+ * This code is dirty, gross and ugly (not to mention still not supporting
+ * multiple hash sized just yet) but it works on my machine and will no
+ * doubt break other machines.
+ *
+ * Revision 1.1 2001/04/15 16:01:24 tomas_f
+ * moving to spell/xp
+ *
+ * Revision 1.7 1999/09/29 23:33:32 justin
+ * Updates to the underlying ispell-based code to support suggested corrections.
+ *
+ * Revision 1.6 1999/04/13 17:12:51 jeff
+ * Applied "Darren O. Benham" <gecko@benham.net> spell check changes.
+ * Fixed crash on Win32 with the new code.
+ *
+ * Revision 1.5 1999/01/07 01:07:48 paul
+ * Fixed spell leaks.
+ *
+ * Revision 1.5 1999/01/07 01:07:48 paul
+ * Fixed spell leaks.
+ *
+ * Revision 1.4 1998/12/29 14:55:33 eric
+ *
+ * I've doctored the ispell code pretty extensively here. It is now
+ * warning-free on Win32. It also *works* on Win32 now, since I
+ * replaced all the I/O calls with ANSI standard ones.
+ *
+ * Revision 1.3 1998/12/28 23:11:30 eric
+ *
+ * modified spell code and integration to build on Windows.
+ * This is still a hack.
+ *
+ * Actually, it doesn't yet WORK on Windows. It just builds.
+ * SpellCheckInit is failing for some reason.
+ *
+ * Revision 1.2 1998/12/28 22:16:22 eric
+ *
+ * These changes begin to incorporate the spell checker into AbiWord. Most
+ * of this is a hack.
+ *
+ * 1. added other/spell to the -I list in config/abi_defs
+ * 2. replaced other/spell/Makefile with one which is more like
+ * our build system.
+ * 3. added other/spell to other/Makefile so that the build will now
+ * dive down and build the spell check library.
+ * 4. added the AbiSpell library to the Makefiles in wp/main
+ * 5. added a call to SpellCheckInit in wp/main/unix/UnixMain.cpp.
+ * This call is a HACK and should be replaced with something
+ * proper later.
+ * 6. added code to fv_View.cpp as follows:
+ * whenever you double-click on a word, the spell checker
+ * verifies that word and prints its status to stdout.
+ *
+ * Caveats:
+ * 1. This will break the Windows build. I'm going to work on fixing it
+ * now.
+ * 2. This only works if your dictionary is in /usr/lib/ispell/american.hash.
+ * The dictionary location is currently hard-coded. This will be
+ * fixed as well.
+ *
+ * Anyway, such as it is, it works.
+ *
+ * Revision 1.1 1998/12/28 18:04:43 davet
+ * Spell checker code stripped from ispell. At this point, there are
+ * two external routines... the Init routine, and a check-a-word routine
+ * which returns a boolean value, and takes a 16 bit char string.
+ * The code resembles the ispell code as much as possible still.
+ *
+ * Revision 1.42 1995/01/08 23:23:42 geoff
+ * Support MSDOS_BINARY_OPEN when opening the hash file to read it in.
+ *
+ * Revision 1.41 1994/01/25 07:11:51 geoff
+ * Get rid of all old RCS log lines in preparation for the 3.1 release.
+ *
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+
+#include "ispell_checker.h"
+#include "msgs.h"
+
+#ifdef INDEXDUMP
+static void dumpindex P ((struct flagptr * indexp, int depth));
+#endif /* INDEXDUMP */
+
+int gnMaskBits = 64;
+
+/*!
+ * \param hashname name of the hash file (dictionary)
+ *
+ * \return
+ */
+int ISpellChecker::linit (char *hashname)
+{
+ FILE* fpHash;
+
+ register int i;
+ register struct dent * dp;
+ struct flagent * entry;
+ struct flagptr * ind;
+ int nextchar, x;
+ int viazero;
+ register ichar_t * cp;
+
+ if ((fpHash = fopen (hashname, "rb")) == NULL)
+ {
+ return (-1);
+ }
+
+ m_hashsize = fread (reinterpret_cast<char *>(&m_hashheader), 1, sizeof m_hashheader, fpHash);
+ if (m_hashsize < static_cast<int>(sizeof(m_hashheader)))
+ {
+ if (m_hashsize < 0)
+ fprintf (stderr, LOOKUP_C_CANT_READ, hashname);
+ else if (m_hashsize == 0)
+ fprintf (stderr, LOOKUP_C_NULL_HASH, hashname);
+ else
+ fprintf (stderr,
+ LOOKUP_C_SHORT_HASH (m_hashname, m_hashsize,
+ static_cast<int>(sizeof m_hashheader)));
+ return (-1);
+ }
+ else if (m_hashheader.magic != MAGIC)
+ {
+ fprintf (stderr,
+ LOOKUP_C_BAD_MAGIC (hashname, static_cast<unsigned int>(MAGIC),
+ static_cast<unsigned int>(m_hashheader.magic)));
+ return (-1);
+ }
+ else if (m_hashheader.magic2 != MAGIC)
+ {
+ fprintf (stderr,
+ LOOKUP_C_BAD_MAGIC2 (hashname, static_cast<unsigned int>(MAGIC),
+ static_cast<unsigned int>(m_hashheader.magic2)));
+ return (-1);
+ }
+/* else if (hashheader.compileoptions != COMPILEOPTIONS*/
+ else if ( 1 != 1
+ || m_hashheader.maxstringchars != MAXSTRINGCHARS
+ || m_hashheader.maxstringcharlen != MAXSTRINGCHARLEN)
+ {
+ fprintf (stderr,
+ LOOKUP_C_BAD_OPTIONS (static_cast<unsigned int>(m_hashheader.compileoptions),
+ m_hashheader.maxstringchars, m_hashheader.maxstringcharlen,
+ static_cast<unsigned int>(COMPILEOPTIONS), MAXSTRINGCHARS, MAXSTRINGCHARLEN));
+ return (-1);
+ }
+
+ {
+ m_hashtbl =
+ (struct dent *)
+ calloc (static_cast<unsigned>(m_hashheader.tblsize), sizeof (struct dent));
+ m_hashsize = m_hashheader.tblsize;
+ m_hashstrings = static_cast<char *>(malloc(static_cast<unsigned>(m_hashheader.stringsize)));
+ }
+ m_numsflags = m_hashheader.stblsize;
+ m_numpflags = m_hashheader.ptblsize;
+ m_sflaglist = (struct flagent *)
+ malloc ((m_numsflags + m_numpflags) * sizeof (struct flagent));
+ if (m_hashtbl == NULL || m_hashstrings == NULL || m_sflaglist == NULL)
+ {
+ fprintf (stderr, LOOKUP_C_NO_HASH_SPACE);
+ return (-1);
+ }
+ m_pflaglist = m_sflaglist + m_numsflags;
+
+ {
+ if( fread ( m_hashstrings, 1, static_cast<unsigned>(m_hashheader.stringsize), fpHash)
+ != static_cast<size_t>(m_hashheader.stringsize) )
+ {
+ fprintf (stderr, LOOKUP_C_BAD_FORMAT);
+ fprintf (stderr, "stringsize err\n" );
+ return (-1);
+ }
+ if ( m_hashheader.compileoptions & 0x04 )
+ {
+ if( fread (reinterpret_cast<char *>(m_hashtbl), 1, static_cast<unsigned>(m_hashheader.tblsize) * sizeof(struct dent), fpHash)
+ != (static_cast<size_t>(m_hashheader.tblsize * sizeof (struct dent))))
+ {
+ fprintf (stderr, LOOKUP_C_BAD_FORMAT);
+ return (-1);
+ }
+ }
+ else
+ {
+ for( x=0; x<m_hashheader.tblsize; x++ )
+ {
+ if( fread ( reinterpret_cast<char*>(m_hashtbl+x), sizeof( struct dent)-sizeof( MASKTYPE ), 1, fpHash)
+ != 1)
+ {
+ fprintf (stderr, LOOKUP_C_BAD_FORMAT);
+ return (-1);
+ }
+ } /*for*/
+ } /*else*/
+ }
+ if (fread (reinterpret_cast<char *>(m_sflaglist), 1,
+ static_cast<unsigned>(m_numsflags+ m_numpflags) * sizeof (struct flagent), fpHash)
+ != (m_numsflags + m_numpflags) * sizeof (struct flagent))
+ {
+ fprintf (stderr, LOOKUP_C_BAD_FORMAT);
+ return (-1);
+ }
+ fclose (fpHash);
+
+ {
+ for (i = m_hashsize, dp = m_hashtbl; --i >= 0; dp++)
+ {
+ if (dp->word == (char *) -1)
+ dp->word = NULL;
+ else
+ dp->word = &m_hashstrings [ reinterpret_cast<size_t>(dp->word) ];
+ if (dp->next == (struct dent *) -1)
+ dp->next = NULL;
+ else
+ dp->next = &m_hashtbl [ reinterpret_cast<size_t>(dp->next) ];
+ }
+ }
+
+ for (i = m_numsflags + m_numpflags, entry = m_sflaglist; --i >= 0; entry++)
+ {
+ if (entry->stripl)
+ entry->strip = reinterpret_cast<ichar_t *>(&m_hashstrings[reinterpret_cast<size_t>(entry->strip)]);
+ else
+ entry->strip = NULL;
+ if (entry->affl)
+ entry->affix = reinterpret_cast<ichar_t *>(&m_hashstrings[reinterpret_cast<size_t>(entry->affix)]);
+ else
+ entry->affix = NULL;
+ }
+ /*
+ ** Warning - 'entry' and 'i' are reset in the body of the loop
+ ** below. Don't try to optimize it by (e.g.) moving the decrement
+ ** of i into the loop condition.
+ */
+ for (i = m_numsflags, entry = m_sflaglist; i > 0; i--, entry++)
+ {
+ if (entry->affl == 0)
+ {
+ cp = NULL;
+ ind = &m_sflagindex[0];
+ viazero = 1;
+ }
+ else
+ {
+ cp = entry->affix + entry->affl - 1;
+ ind = &m_sflagindex[*cp];
+ viazero = 0;
+ while (ind->numents == 0 && ind->pu.fp != NULL)
+ {
+ if (cp == entry->affix)
+ {
+ ind = &ind->pu.fp[0];
+ viazero = 1;
+ }
+ else
+ {
+ ind = &ind->pu.fp[*--cp];
+ viazero = 0;
+ }
+ }
+ }
+ if (ind->numents == 0)
+ ind->pu.ent = entry;
+ ind->numents++;
+ /*
+ ** If this index entry has more than MAXSEARCH flags in
+ ** it, we will split it into subentries to reduce the
+ ** searching. However, the split doesn't make sense in
+ ** two cases: (a) if we are already at the end of the
+ ** current affix, or (b) if all the entries in the list
+ ** have identical affixes. Since the list is sorted, (b)
+ ** is true if the first and last affixes in the list
+ ** are identical.
+ */
+ if (!viazero && ind->numents >= MAXSEARCH
+ && icharcmp (entry->affix, ind->pu.ent->affix) != 0)
+ {
+ /* Sneaky trick: back up and reprocess */
+ entry = ind->pu.ent - 1; /* -1 is for entry++ in loop */
+ i = m_numsflags - (entry - m_sflaglist);
+ ind->pu.fp =
+ (struct flagptr *)
+ calloc (static_cast<unsigned>(SET_SIZE + m_hashheader.nstrchars),
+ sizeof (struct flagptr));
+ if (ind->pu.fp == NULL)
+ {
+ fprintf (stderr, LOOKUP_C_NO_LANG_SPACE);
+ return (-1);
+ }
+ ind->numents = 0;
+ }
+ }
+ /*
+ ** Warning - 'entry' and 'i' are reset in the body of the loop
+ ** below. Don't try to optimize it by (e.g.) moving the decrement
+ ** of i into the loop condition.
+ */
+ for (i = m_numpflags, entry = m_pflaglist; i > 0; i--, entry++)
+ {
+ if (entry->affl == 0)
+ {
+ cp = NULL;
+ ind = &m_pflagindex[0];
+ viazero = 1;
+ }
+ else
+ {
+ cp = entry->affix;
+ ind = &m_pflagindex[*cp++];
+ viazero = 0;
+ while (ind->numents == 0 && ind->pu.fp != NULL)
+ {
+ if (*cp == 0)
+ {
+ ind = &ind->pu.fp[0];
+ viazero = 1;
+ }
+ else
+ {
+ ind = &ind->pu.fp[*cp++];
+ viazero = 0;
+ }
+ }
+ }
+ if (ind->numents == 0)
+ ind->pu.ent = entry;
+ ind->numents++;
+ /*
+ ** If this index entry has more than MAXSEARCH flags in
+ ** it, we will split it into subentries to reduce the
+ ** searching. However, the split doesn't make sense in
+ ** two cases: (a) if we are already at the end of the
+ ** current affix, or (b) if all the entries in the list
+ ** have identical affixes. Since the list is sorted, (b)
+ ** is true if the first and last affixes in the list
+ ** are identical.
+ */
+ if (!viazero && ind->numents >= MAXSEARCH
+ && icharcmp (entry->affix, ind->pu.ent->affix) != 0)
+ {
+ /* Sneaky trick: back up and reprocess */
+ entry = ind->pu.ent - 1; /* -1 is for entry++ in loop */
+ i = m_numpflags - (entry - m_pflaglist);
+ ind->pu.fp =
+ static_cast<struct flagptr *>(calloc(SET_SIZE + m_hashheader.nstrchars,
+ sizeof (struct flagptr)));
+ if (ind->pu.fp == NULL)
+ {
+ fprintf (stderr, LOOKUP_C_NO_LANG_SPACE);
+ return (-1);
+ }
+ ind->numents = 0;
+ }
+ }
+#ifdef INDEXDUMP
+ fprintf (stderr, "Prefix index table:\n");
+ dumpindex (m_pflagindex, 0);
+ fprintf (stderr, "Suffix index table:\n");
+ dumpindex (m_sflagindex, 0);
+#endif
+ if (m_hashheader.nstrchartype == 0)
+ m_chartypes = NULL;
+ else
+ {
+ m_chartypes = (struct strchartype *)
+ malloc (m_hashheader.nstrchartype * sizeof (struct strchartype));
+ if (m_chartypes == NULL)
+ {
+ fprintf (stderr, LOOKUP_C_NO_LANG_SPACE);
+ return (-1);
+ }
+ for (i = 0, nextchar = m_hashheader.strtypestart;
+ i < m_hashheader.nstrchartype;
+ i++)
+ {
+ m_chartypes[i].name = &m_hashstrings[nextchar];
+ nextchar += strlen (m_chartypes[i].name) + 1;
+ m_chartypes[i].deformatter = &m_hashstrings[nextchar];
+ nextchar += strlen (m_chartypes[i].deformatter) + 1;
+ m_chartypes[i].suffixes = &m_hashstrings[nextchar];
+ while (m_hashstrings[nextchar] != '\0')
+ nextchar += strlen (&m_hashstrings[nextchar]) + 1;
+ nextchar++;
+ }
+ }
+
+ initckch(NULL);
+
+ return (0);
+}
+
+#ifndef FREEP
+#define FREEP(p) do { if (p) free(p); } while (0)
+#endif
+
+/*!
+ * \param wchars Characters in -w option, if any
+ */
+void ISpellChecker::initckch (char *wchars)
+{
+ register ichar_t c;
+ char num[4];
+
+ for (c = 0; c < static_cast<ichar_t>(SET_SIZE+ m_hashheader.nstrchars); ++c)
+ {
+ if (iswordch (c))
+ {
+ if (!mylower (c))
+ {
+ m_Try[m_Trynum] = c;
+ ++m_Trynum;
+ }
+ }
+ else if (isboundarych (c))
+ {
+ m_Try[m_Trynum] = c;
+ ++m_Trynum;
+ }
+ }
+ if (wchars != NULL)
+ {
+ while (m_Trynum < SET_SIZE && *wchars != '\0')
+ {
+ if (*wchars != 'n' && *wchars != '\\')
+ {
+ c = *wchars;
+ ++wchars;
+ }
+ else
+ {
+ ++wchars;
+ num[0] = '\0';
+ num[1] = '\0';
+ num[2] = '\0';
+ num[3] = '\0';
+ if (isdigit (wchars[0]))
+ {
+ num[0] = wchars[0];
+ if (isdigit (wchars[1]))
+ {
+ num[1] = wchars[1];
+ if (isdigit (wchars[2]))
+ num[2] = wchars[2];
+ }
+ }
+ if (wchars[-1] == 'n')
+ {
+ wchars += strlen (num);
+ c = atoi (num);
+ }
+ else
+ {
+ wchars += strlen (num);
+ c = 0;
+ if (num[0])
+ c = num[0] - '0';
+ if (num[1])
+ {
+ c <<= 3;
+ c += num[1] - '0';
+ }
+ if (num[2])
+ {
+ c <<= 3;
+ c += num[2] - '0';
+ }
+ }
+ }
+/* c &= NOPARITY;*/
+ if (!m_hashheader.wordchars[c])
+ {
+ m_hashheader.wordchars[c] = 1;
+ m_hashheader.sortorder[c] = m_hashheader.sortval++;
+ m_Try[m_Trynum] = c;
+ ++m_Trynum;
+ }
+ }
+ }
+}
+
+/*
+ * \param indexp
+ */
+void ISpellChecker::clearindex (struct flagptr *indexp)
+{
+ register int i;
+ for (i = 0; i < SET_SIZE + m_hashheader.nstrchars; i++, indexp++)
+ {
+ if (indexp->numents == 0 && indexp->pu.fp != NULL)
+ {
+ clearindex(indexp->pu.fp);
+ free(indexp->pu.fp);
+ }
+ }
+}
+
+#ifdef INDEXDUMP
+static void dumpindex (indexp, depth)
+ register struct flagptr * indexp;
+ register int depth;
+{
+ register int i;
+ int j;
+ int k;
+ char stripbuf[INPUTWORDLEN + 4 * MAXAFFIXLEN + 4];
+
+ for (i = 0; i < SET_SIZE + hashheader.nstrchars; i++, indexp++)
+ {
+ if (indexp->numents == 0 && indexp->pu.fp != NULL)
+ {
+ for (j = depth; --j >= 0; )
+ putc (' ', stderr);
+ if (i >= ' ' && i <= '~')
+ putc (i, stderr);
+ else
+ fprintf (stderr, "0x%x", i);
+ putc ('\n', stderr);
+ dumpindex (indexp->pu.fp, depth + 1);
+ }
+ else if (indexp->numents)
+ {
+ for (j = depth; --j >= 0; )
+ putc (' ', stderr);
+ if (i >= ' ' && i <= '~')
+ putc (i, stderr);
+ else
+ fprintf (stderr, "0x%x", i);
+ fprintf (stderr, " -> %d entries\n", indexp->numents);
+ for (k = 0; k < indexp->numents; k++)
+ {
+ for (j = depth; --j >= 0; )
+ putc (' ', stderr);
+ if (indexp->pu.ent[k].stripl)
+ {
+ ichartostr (stripbuf, indexp->pu.ent[k].strip,
+ sizeof stripbuf, 1);
+ fprintf (stderr, " entry %d (-%s,%s)\n",
+ &indexp->pu.ent[k] - sflaglist,
+ stripbuf,
+ indexp->pu.ent[k].affl
+ ? ichartosstr (indexp->pu.ent[k].affix, 1) : "-");
+ }
+ else
+ fprintf (stderr, " entry %d (%s)\n",
+ &indexp->pu.ent[k] - sflaglist,
+ ichartosstr (indexp->pu.ent[k].affix, 1));
+ }
+ }
+ }
+}
+#endif
+
+/* n is length of s */
+
+/*
+ * \param s
+ * \param dotree
+ *
+ * \return
+ */
+struct dent * ISpellChecker::ispell_lookup (ichar_t *s, int dotree)
+{
+ register struct dent * dp;
+ register char * s1;
+ char schar[INPUTWORDLEN + MAXAFFIXLEN];
+
+ dp = &m_hashtbl[hash (s, m_hashsize)];
+ if (ichartostr (schar, s, sizeof schar, 1))
+ fprintf (stderr, WORD_TOO_LONG (schar));
+ for ( ; dp != NULL; dp = dp->next)
+ {
+ /* quick strcmp, but only for equality */
+ s1 = dp->word;
+ if (s1 && s1[0] == schar[0] && strcmp (s1 + 1, schar + 1) == 0)
+ return dp;
+#ifndef NO_CAPITALIZATION_SUPPORT
+ while (dp->flagfield & MOREVARIANTS) /* Skip variations */
+ dp = dp->next;
+#endif
+ }
+ return NULL;
+}
+
+void ISpellChecker::alloc_ispell_struct()
+{
+ m_translate_in = 0;
+}
+
+void ISpellChecker::free_ispell_struct()
+{
+}
diff --git a/kspell2/plugins/ispell/makedent.cpp b/kspell2/plugins/ispell/makedent.cpp
new file mode 100644
index 000000000..9c168dc17
--- /dev/null
+++ b/kspell2/plugins/ispell/makedent.cpp
@@ -0,0 +1,972 @@
+/* vim: set sw=8: -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+/* enchant
+ * Copyright (C) 2003 Dom Lachowicz
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ * In addition, as a special exception, Dom Lachowicz
+ * gives permission to link the code of this program with
+ * non-LGPL Spelling Provider libraries (eg: a MSFT Office
+ * spell checker backend) and distribute linked combinations including
+ * the two. You must obey the GNU Lesser General Public License in all
+ * respects for all of the code used other than said providers. If you modify
+ * this file, you may extend this exception to your version of the
+ * file, but you are not obligated to do so. If you do not wish to
+ * do so, delete this exception statement from your version.
+ */
+
+/*
+ * Copyright 1988, 1989, 1992, 1993, Geoff Kuenning, Granada Hills, CA
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All modifications to the source code must be clearly marked as
+ * such. Binary redistributions based on modified source code
+ * must be clearly marked as modified versions in the documentation
+ * and/or other materials provided with the distribution.
+ * 4. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgment:
+ * This product includes software developed by Geoff Kuenning and
+ * other unpaid contributors.
+ * 5. The name of Geoff Kuenning may not be used to endorse or promote
+ * products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY GEOFF KUENNING AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL GEOFF KUENNING OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * $Log$
+ * Revision 1.2 2004/02/01 04:46:46 zrusin
+ * Both ispell and aspell plugins are not working properly. We can start switching.
+ *
+ * Revision 1.1 2004/01/31 16:44:12 zrusin
+ * ISpell plugin.
+ *
+ * Revision 1.4 2003/08/14 17:51:28 dom
+ * update license - exception clause should be Lesser GPL
+ *
+ * Revision 1.3 2003/07/28 20:40:27 dom
+ * fix up the license clause, further win32-registry proof some directory getting functions
+ *
+ * Revision 1.2 2003/07/16 22:52:49 dom
+ * LGPL + exception license
+ *
+ * Revision 1.1 2003/07/15 01:15:08 dom
+ * ispell enchant backend
+ *
+ * Revision 1.3 2003/02/12 02:10:38 hippietrail
+ *
+ * C casts -> C++ casts
+ * Improved const-correctness due to changing casts
+ * Fixed some warnings
+ *
+ * Revision 1.2 2003/01/29 05:50:12 hippietrail
+ *
+ * Fixed my mess in EncodingManager.
+ * Changed many C casts to C++ casts.
+ *
+ * Revision 1.1 2003/01/24 05:52:35 hippietrail
+ *
+ * Refactored ispell code. Old ispell global variables had been put into
+ * an allocated structure, a pointer to which was passed to many functions.
+ * I have now made all such functions and variables private members of the
+ * ISpellChecker class. It was C OO, now it's C++ OO.
+ *
+ * I've fixed the makefiles and tested compilation but am unable to test
+ * operation. Please back out my changes if they cause problems which
+ * are not obvious or easy to fix.
+ *
+ * Revision 1.8 2003/01/06 18:48:40 dom
+ * ispell cleanup, start of using new 'add' save features
+ *
+ * Revision 1.7 2003/01/04 19:09:04 dom
+ * some tidying... bug pissing me off...
+ *
+ * Revision 1.6 2002/09/19 05:31:18 hippietrail
+ *
+ * More Ispell cleanup. Conditional globals and DEREF macros are removed.
+ * K&R function declarations removed, converted to Doxygen style comments
+ * where possible. No code has been changed (I hope). Compiles for me but
+ * unable to test.
+ *
+ * Revision 1.5 2002/09/17 03:03:30 hippietrail
+ *
+ * After seeking permission on the developer list I've reformatted all the
+ * spelling source which seemed to have parts which used 2, 3, 4, and 8
+ * spaces for tabs. It should all look good with our standard 4-space
+ * tabs now.
+ * I've concentrated just on indentation in the actual code. More prettying
+ * could be done.
+ * * NO code changes were made *
+ *
+ * Revision 1.4 2002/09/13 17:20:13 mpritchett
+ * Fix more warnings for Linux build
+ *
+ * Revision 1.3 2002/03/22 14:31:57 dom
+ * fix mg's compile problem
+ *
+ * Revision 1.2 2001/05/12 16:05:42 thomasf
+ * Big pseudo changes to ispell to make it pass around a structure rather
+ * than rely on all sorts of gloabals willy nilly here and there. Also
+ * fixed our spelling class to work with accepting suggestions once more.
+ * This code is dirty, gross and ugly (not to mention still not supporting
+ * multiple hash sized just yet) but it works on my machine and will no
+ * doubt break other machines.
+ *
+ * Revision 1.1 2001/04/15 16:01:24 tomas_f
+ * moving to spell/xp
+ *
+ * Revision 1.6 1999/12/21 18:46:29 sterwill
+ * ispell patch for non-English dictionaries by Henrik Berg <henrik@lansen.se>
+ *
+ * Revision 1.5 1999/10/20 03:19:35 paul
+ * Hacked ispell code to ignore any characters that don't fit in the lookup tables loaded from the dictionary. It ain't pretty, but at least we don't crash there any more.
+ *
+ * Revision 1.4 1999/04/13 17:12:51 jeff
+ * Applied "Darren O. Benham" <gecko@benham.net> spell check changes.
+ * Fixed crash on Win32 with the new code.
+ *
+ * Revision 1.3 1998/12/29 14:55:33 eric
+ *
+ * I've doctored the ispell code pretty extensively here. It is now
+ * warning-free on Win32. It also *works* on Win32 now, since I
+ * replaced all the I/O calls with ANSI standard ones.
+ *
+ * Revision 1.3 1998/12/29 14:55:33 eric
+ *
+ * I've doctored the ispell code pretty extensively here. It is now
+ * warning-free on Win32. It also *works* on Win32 now, since I
+ * replaced all the I/O calls with ANSI standard ones.
+ *
+ * Revision 1.2 1998/12/28 23:11:30 eric
+ *
+ * modified spell code and integration to build on Windows.
+ * This is still a hack.
+ *
+ * Actually, it doesn't yet WORK on Windows. It just builds.
+ * SpellCheckInit is failing for some reason.
+ *
+ * Revision 1.1 1998/12/28 18:04:43 davet
+ * Spell checker code stripped from ispell. At this point, there are
+ * two external routines... the Init routine, and a check-a-word routine
+ * which returns a boolean value, and takes a 16 bit char string.
+ * The code resembles the ispell code as much as possible still.
+ *
+ * Revision 1.45 1994/12/27 23:08:52 geoff
+ * Add code to makedent to reject words that contain non-word characters.
+ * This helps protect people who use ISO 8-bit characters when ispell
+ * isn't configured for that option.
+ *
+ * Revision 1.44 1994/10/25 05:46:20 geoff
+ * Fix some incorrect declarations in the lint versions of some routines.
+ *
+ * Revision 1.43 1994/09/16 03:32:34 geoff
+ * Issue an error message for bad affix flags
+ *
+ * Revision 1.42 1994/02/07 04:23:43 geoff
+ * Correctly identify the deformatter when changing file types
+ *
+ * Revision 1.41 1994/01/25 07:11:55 geoff
+ * Get rid of all old RCS log lines in preparation for the 3.1 release.
+ *
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+
+#include "ispell_checker.h"
+#include "msgs.h"
+
+int makedent P ((char * lbuf, int lbuflen, struct dent * ent));
+/*int combinecaps P ((struct dent * hdr, struct dent * newent));
+#ifndef NO_CAPITALIZATION_SUPPORT
+static void forcevheader P ((struct dent * hdrp, struct dent * oldp,
+ struct dent * newp));
+#endif / * NO_CAPITALIZATION_SUPPORT * /
+static int combine_two_entries P ((struct dent * hdrp,
+ struct dent * oldp, struct dent * newp));
+static int acoversb P ((struct dent * enta, struct dent * entb));
+*/
+/*static int issubset P ((struct dent * ent1, struct dent * ent2));
+static void combineaffixes P ((struct dent * ent1, struct dent * ent2));*/
+
+void toutent P ((FILE * outfile, struct dent * hent,
+ int onlykeep));
+/*static void toutword P ((FILE * outfile, char * word,
+ struct dent * cent));
+static void flagout P ((FILE * outfile, int flag));
+*/
+#ifndef ICHAR_IS_CHAR
+ichar_t * icharcpy P ((ichar_t * out, ichar_t * in));
+int icharlen P ((ichar_t * str));
+int icharcmp P ((ichar_t * s1, ichar_t * s2));
+int icharncmp P ((ichar_t * s1, ichar_t * s2, int n));
+#endif /* ICHAR_IS_CHAR */
+
+/*static int has_marker;*/
+
+/*
+ * Fill in a directory entry, including setting the capitalization flags, and
+ * allocate and initialize memory for the d->word field. Returns -1
+ * if there was trouble. The input word must be in canonical form.
+int makedent (lbuf, lbuflen, d)
+This function is not used by AbiWord. I don't know if it'll be needed for
+other abi documents
+ */
+
+#ifndef NO_CAPITALIZATION_SUPPORT
+/*!
+** Classify the capitalization of a sample entry. Returns one of the
+** four capitalization codes ANYCASE, ALLCAPS, CAPITALIZED, or FOLLOWCASE.
+**
+** \param word
+**
+** \return
+*/
+long
+ISpellChecker::whatcap (ichar_t *word)
+{
+ register ichar_t * p;
+
+ for (p = word; *p; p++)
+ {
+ if (mylower (*p))
+ break;
+ }
+ if (*p == '\0')
+ return ALLCAPS;
+ else
+ {
+ for ( ; *p; p++)
+ {
+ if (myupper (*p))
+ break;
+ }
+ if (*p == '\0')
+ {
+ /*
+ ** No uppercase letters follow the lowercase ones.
+ ** If there is more than one uppercase letter, it's
+ ** "followcase". If only the first one is capitalized,
+ ** it's "capitalize". If there are no capitals
+ ** at all, it's ANYCASE.
+ */
+ if (myupper (word[0]))
+ {
+ for (p = word + 1; *p != '\0'; p++)
+ {
+ if (myupper (*p))
+ return FOLLOWCASE;
+ }
+ return CAPITALIZED;
+ }
+ else
+ return ANYCASE;
+ }
+ else
+ return FOLLOWCASE; /* .../lower/upper */
+ }
+}
+
+/*!
+** Add a variant-capitalization header to a word. This routine may be
+** called even for a followcase word that doesn't yet have a header.
+**
+** \param dp Entry to update
+**
+** \return 0 if all was ok, -1 if allocation error.
+*/
+int ISpellChecker::addvheader ( struct dent *dp)
+{
+ register struct dent * tdent; /* Copy of entry */
+
+ /*
+ ** Add a second entry with the correct capitalization, and then make
+ ** dp into a special dummy entry.
+ */
+ tdent = static_cast<struct dent *>(malloc(sizeof (struct dent)));
+ if (tdent == NULL)
+ {
+ fprintf (stderr, MAKEDENT_C_NO_WORD_SPACE, dp->word);
+ return -1;
+ }
+ *tdent = *dp;
+ if (captype (tdent->flagfield) != FOLLOWCASE)
+ tdent->word = NULL;
+ else
+ {
+ /* Followcase words need a copy of the capitalization */
+ tdent->word = static_cast<char *>(malloc (static_cast<unsigned int>(strlen(tdent->word)) + 1));
+ if (tdent->word == NULL)
+ {
+ fprintf (stderr, MAKEDENT_C_NO_WORD_SPACE, dp->word);
+ free (reinterpret_cast<char *>(tdent));
+ return -1;
+ }
+ strcpy (tdent->word, dp->word);
+ }
+ chupcase (dp->word);
+ dp->next = tdent;
+ dp->flagfield &= ~CAPTYPEMASK;
+ dp->flagfield |= (ALLCAPS | MOREVARIANTS);
+ return 0;
+}
+#endif /* NO_CAPITALIZATION_SUPPORT */
+
+/*
+** Combine and resolve the entries describing two capitalizations of the same
+** word. This may require allocating yet more entries.
+**
+** Hdrp is a pointer into a hash table. If the word covered by hdrp has
+** variations, hdrp must point to the header. Newp is a pointer to temporary
+** storage, and space is malloc'ed if newp is to be kept. The newp->word
+** field must have been allocated with mymalloc, so that this routine may free
+** the space if it keeps newp but not the word.
+**
+** Return value: 0 if the word was added, 1 if the word was combined
+** with an existing entry, and -1 if trouble occurred (e.g., malloc).
+** If 1 is returned, newp->word may have been be freed using myfree.
+**
+** Life is made much more difficult by the KEEP flag's possibilities. We
+** must ensure that a !KEEP word doesn't find its way into the personal
+** dictionary as a result of this routine's actions. However, a !KEEP
+** word that has affixes must have come from the main dictionary, so it
+** is acceptable to combine entries in that case (got that?).
+**
+** The net result of all this is a set of rules that is a bloody pain
+** to figure out. Basically, we want to choose one of the following actions:
+**
+** (1) Add newp's affixes and KEEP flag to oldp, and discard newp.
+** (2) Add oldp's affixes and KEEP flag to newp, replace oldp with
+** newp, and discard newp.
+#ifndef NO_CAPITALIZATION_SUPPORT
+** (3) Insert newp as a new entry in the variants list. If there is
+** currently no variant header, this requires adding one. Adding a
+** header splits into two sub-cases:
+**
+** (3a) If oldp is ALLCAPS and the KEEP flags match, just turn it
+** into the header.
+** (3b) Otherwise, add a new entry to serve as the header.
+** To ease list linking, this is done by copying oldp into
+** the new entry, and then performing (3a).
+**
+** After newp has been added as a variant, its affixes and KEEP
+** flag are OR-ed into the variant header.
+#endif
+**
+** So how to choose which? The default is always case (3), which adds newp
+** as a new entry in the variants list. Cases (1) and (2) are symmetrical
+** except for which entry is discarded. We can use case (1) or (2) whenever
+** one entry "covers" the other. "Covering" is defined as follows:
+**
+** (4) For entries with matching capitalization types, A covers B
+** if:
+**
+** (4a) B's affix flags are a subset of A's, or the KEEP flags
+** match, and
+** (4b) either the KEEP flags match, or A's KEEP flag is set.
+** (Since A has more suffixes, combining B with it won't
+** cause any extra suffixes to be added to the dictionary.)
+** (4c) If the words are FOLLOWCASE, the capitalizations match
+** exactly.
+**
+#ifndef NO_CAPITALIZATION_SUPPORT
+** (5) For entries with mismatched capitalization types, A covers B
+** if (4a) and (4b) are true, and:
+**
+** (5a) B is ALLCAPS, or
+** (5b) A is ANYCASE, and B is CAPITALIZED.
+#endif
+**
+** For any "hdrp" without variants, oldp is the same as hdrp. Otherwise,
+** the above tests are applied using each variant in turn for oldp.
+int combinecaps (hdrp, newp)
+static void forcevheader (hdrp, oldp, newp)
+static int combine_two_entries (hdrp, oldp, newp)
+static int acoversb (enta, entb)
+*/
+
+/*
+ * \param s
+ */
+void
+ISpellChecker::upcase (ichar_t *s)
+{
+
+ while (*s)
+ {
+ *s = mytoupper (*s);
+ s++;
+ }
+}
+
+/*
+ * \param s
+ */
+void
+ISpellChecker::lowcase (ichar_t *s)
+{
+
+ while (*s)
+ {
+ *s = mytolower (*s);
+ s++;
+ }
+}
+
+/*!
+ * Upcase variant that works on normal strings. Note that it is a lot
+ * slower than the normal upcase. The input must be in canonical form.
+ *
+ * \param s
+ */
+void
+ISpellChecker::chupcase (char *s)
+{
+ ichar_t * is;
+
+ is = strtosichar (s, 1);
+ upcase (is);
+ ichartostr (s, is, strlen (s) + 1, 1);
+}
+
+/*
+** See if one affix field is a subset of another. Returns NZ if ent1
+** is a subset of ent2. The KEEP flag is not taken into consideration.
+static int issubset (ent1, ent2)
+static void combineaffixes (ent1, ent2)
+*/
+
+/*
+** Write out a dictionary entry, including capitalization variants.
+** If onlykeep is true, only those variants with KEEP set will be
+** written.
+Removed -- not used by Abiword
+void toutent_ (toutfile, hent, onlykeep)
+static void toutword (toutfile, word, cent)
+static void flagout (toutfile, flag)
+*/
+
+/*!
+ * If the string under the given pointer begins with a string character,
+ * return the length of that "character". If not, return 0.
+ * May be called any time, but it's best if "isstrstart" is first
+ * used to filter out unnecessary calls.
+ *
+ * As a side effect, "laststringch" is set to the number of the string
+ * found, or to -1 if none was found. This can be useful for such things
+ * as case conversion.
+ *
+ * \param bufp
+ * \param canonical NZ if input is in canonical form
+ *
+ * \return
+ */
+int
+ISpellChecker::stringcharlen (char *bufp, int canonical)
+{
+#ifdef SLOWMULTIPLY
+ static char * sp[MAXSTRINGCHARS];
+ static int inited = 0;
+#endif /* SLOWMULTIPLY */
+ register char * bufcur;
+ register char * stringcur;
+ register int stringno;
+ register int lowstringno;
+ register int highstringno;
+ int dupwanted;
+
+#ifdef SLOWMULTIPLY
+ if (!inited)
+ {
+ inited = 1;
+ for (stringno = 0; stringno < MAXSTRINGCHARS; stringno++)
+ sp[stringno] = &hashheader.stringchars[stringno][0];
+ }
+#endif /* SLOWMULTIPLY */
+ lowstringno = 0;
+ highstringno = m_hashheader.nstrchars - 1;
+ dupwanted = canonical ? 0 : m_defdupchar;
+ while (lowstringno <= highstringno)
+ {
+ stringno = (lowstringno + highstringno) >> 1;
+#ifdef SLOWMULTIPLY
+ stringcur = sp[stringno];
+#else /* SLOWMULTIPLY */
+ stringcur = &m_hashheader.stringchars[stringno][0];
+#endif /* SLOWMULTIPLY */
+ bufcur = bufp;
+ while (*stringcur)
+ {
+#ifdef NO8BIT
+ if (((*bufcur++ ^ *stringcur) & 0x7F) != 0)
+#else /* NO8BIT */
+ if (*bufcur++ != *stringcur)
+#endif /* NO8BIT */
+ break;
+ /*
+ ** We can't use autoincrement above because of the
+ ** test below.
+ */
+ stringcur++;
+ }
+ if (*stringcur == '\0')
+ {
+ if (m_hashheader.dupnos[stringno] == dupwanted)
+ {
+ /* We have a match */
+ m_laststringch = m_hashheader.stringdups[stringno];
+#ifdef SLOWMULTIPLY
+ return stringcur - sp[stringno];
+#else /* SLOWMULTIPLY */
+ return stringcur - &m_hashheader.stringchars[stringno][0];
+#endif /* SLOWMULTIPLY */
+ }
+ else
+ --stringcur;
+ }
+ /* No match - choose which side to search on */
+#ifdef NO8BIT
+ if ((*--bufcur & 0x7F) < (*stringcur & 0x7F))
+ highstringno = stringno - 1;
+ else if ((*bufcur & 0x7F) > (*stringcur & 0x7F))
+ lowstringno = stringno + 1;
+#else /* NO8BIT */
+ if (*--bufcur < *stringcur)
+ highstringno = stringno - 1;
+ else if (*bufcur > *stringcur)
+ lowstringno = stringno + 1;
+#endif /* NO8BIT */
+ else if (dupwanted < m_hashheader.dupnos[stringno])
+ highstringno = stringno - 1;
+ else
+ lowstringno = stringno + 1;
+ }
+ m_laststringch = static_cast<unsigned int>(-1);
+ return 0; /* Not a string character */
+}
+
+/* MACROS CONVERTED TO FUNCTIONS
+** These macros are similar to the ones above, but they take into account
+** the possibility of string characters. Note well that they take a POINTER,
+** not a character.
+**
+** The "l_" versions set "len" to the length of the string character as a
+** handy side effect. (Note that the global "laststringch" is also set,
+** and sometimes used, by these macros.)
+**
+** The "l1_" versions go one step further and guarantee that the "len"
+** field is valid for *all* characters, being set to 1 even if the macro
+** returns false. This macro is a great example of how NOT to write
+** readable C.
+*/
+#define isstringch(ptr, canon) (isstringstart (*(ptr)) \
+ && stringcharlen ((ptr), (canon)) > 0)
+/*
+int isstringch(char *ptr, int canon) {
+ return (isstringstart (*(ptr)) && (len = stringcharlen ((ptr), (canon))) > 0);
+}
+*/
+
+#define l_isstringch(ptr, len, canon) \
+ (isstringstart (*(ptr)) \
+ && (len = stringcharlen ((ptr), (canon))) \
+ > 0)
+/*
+int l_isstringch(char *ptr, int len, int canon) {
+ return (isstringstart (*(ptr)) && (len = stringcharlen ((ptr), (canon))) > 0);
+}
+*/
+
+#define l1_isstringch(ptr, len, canon) \
+ (len = 1, \
+ isstringstart ((unsigned char)(*(ptr))) \
+ && ((len = \
+ stringcharlen ((ptr), (canon))) \
+ > 0 \
+ ? 1 : (len = 1, 0)))
+/*
+int l1_isstringch(char *ptr, int len, int canon) {
+ return (len = 1, isstringstart ((unsigned char)(*(ptr))) &&
+ ((len = stringcharlen ((ptr), (canon))) > 0 ? 1 : (len = 1, 0)));
+}
+*/
+
+/*** END MACRO CONVERSION ***/
+
+/*!
+ * Convert an external string to an ichar_t string. If necessary, the parity
+ * bit is stripped off as part of the process.
+ *
+ * \param out Where to put result
+ * \param in String to convert
+ * \param outlen Size of output buffer, *BYTES*
+ * \param canonical NZ if input is in canonical form
+ *
+ * \return NZ if the output string overflowed.
+ */
+int
+ISpellChecker::strtoichar (ichar_t *out, char *in, int outlen, int canonical)
+{
+ register int len = 1; /* Length of next character */
+
+ outlen /= sizeof (ichar_t); /* Convert to an ichar_t count */
+ for ( ; --outlen > 0 && *in != '\0'; in += len)
+ {
+ if (l1_isstringch (in, len , canonical)) {
+ *out++ = SET_SIZE + m_laststringch;
+ } else {
+ *out++ = (unsigned char)( *in );
+ }
+ }
+ *out = 0;
+ return outlen <= 0;
+}
+
+/*!
+ * Convert an ichar_t string to an external string.
+ *
+ * WARNING: the resulting string may wind up being longer than the
+ * original. In fact, even the sequence strtoichar->ichartostr may
+ * produce a result longer than the original, because the output form
+ * may use a different string type set than the original input form.
+ *
+ * \param out Where to put result
+ * \param in String to convert
+ * \param outlen Size of output buffer, bytes
+ * \param canonical NZ for canonical form
+ *
+ * \return NZ if the output string overflowed.
+ */
+int
+ISpellChecker::ichartostr ( char *out, ichar_t *in, int outlen, int canonical)
+{
+ register int ch; /* Next character to store */
+ register int i; /* Index into duplicates list */
+ register char * scharp; /* Pointer into a string char */
+
+ while (--outlen > 0 && (ch = *in++) != 0)
+ {
+ if (ch < SET_SIZE)
+ *out++ = static_cast<char>(ch);
+ else
+ {
+ ch -= SET_SIZE;
+ if (!canonical)
+ {
+ for (i = m_hashheader.nstrchars; --i >= 0; )
+ {
+ if (m_hashheader.dupnos[i] == m_defdupchar
+ && (static_cast<int>(m_hashheader.stringdups[i])) == ch)
+ {
+ ch = i;
+ break;
+ }
+ }
+ }
+ scharp = m_hashheader.stringchars[static_cast<unsigned>(ch)];
+ while ((*out++ = *scharp++) != '\0')
+ ;
+ out--;
+ }
+ }
+ *out = '\0';
+ return outlen <= 0;
+}
+
+/*!
+ * Convert a string to an ichar_t, storing the result in a static area.
+ *
+ * \param in String to convert
+ * \param canonical NZ if input is in canonical form
+ *
+ * \return
+ */
+ichar_t *
+ISpellChecker::strtosichar ( char *in, int canonical)
+{
+ static ichar_t out[STRTOSICHAR_SIZE / sizeof (ichar_t)];
+
+ if (strtoichar (out, in, sizeof out, canonical))
+ fprintf (stderr, WORD_TOO_LONG (in));
+ return out;
+}
+
+/*!
+ * Convert an ichar_t to a string, storing the result in a static area.
+ *
+ * \param in Internal string to convert
+ * \param canonical NZ for canonical conversion
+ *
+ * \return
+ */
+char *
+ISpellChecker::ichartosstr (ichar_t *in, int canonical)
+{
+ static char out[ICHARTOSSTR_SIZE];
+
+ if (ichartostr (out, in, sizeof out, canonical))
+ fprintf (stderr, WORD_TOO_LONG (out));
+ return out;
+}
+
+/*!
+ * Convert a single ichar to a printable string, storing the result in
+ * a static area.
+ *
+ * \param in
+ *
+ * \return
+ */
+char *
+ISpellChecker::printichar (int in)
+{
+ static char out[MAXSTRINGCHARLEN + 1];
+
+ if (in < SET_SIZE)
+ {
+ out[0] = static_cast<char>(in);
+ out[1] = '\0';
+ }
+ else
+ strcpy (out, m_hashheader.stringchars[static_cast<unsigned>(in) - SET_SIZE]);
+ return out;
+}
+
+#ifndef ICHAR_IS_CHAR
+/*!
+ * Copy an ichar_t.
+ *
+ * \param out Destination
+ * \param in Source
+ *
+ * \return
+ */
+ichar_t *
+icharcpy (ichar_t *out, ichar_t *in)
+{
+ ichar_t * origout; /* Copy of destination for return */
+
+ origout = out;
+ while ((*out++ = *in++) != 0)
+ ;
+ return origout;
+}
+
+/*!
+ * Return the length of an ichar_t.
+ *
+ * \param in String to count
+ *
+ * \return
+ */
+int
+icharlen (ichar_t * in)
+{
+ register int len; /* Length so far */
+
+ for (len = 0; *in++ != 0; len++)
+ ;
+ return len;
+}
+
+/*!
+ * Compare two ichar_t's.
+ *
+ * \param s1
+ * \param s2
+ *
+ * \return
+ */
+int
+icharcmp (ichar_t * s1, ichar_t * s2)
+{
+
+ while (*s1 != 0)
+ {
+ if (*s1++ != *s2++)
+ return *--s1 - *--s2;
+ }
+ return *s1 - *s2;
+}
+
+/*!
+ * Strncmp for two ichar_t's.
+ *
+ * \param s1
+ * \param s2
+ * \param n
+ *
+ * \return
+ */
+int
+icharncmp (ichar_t *s1, ichar_t *s2, int n)
+{
+
+ while (--n >= 0 && *s1 != 0)
+ {
+ if (*s1++ != *s2++)
+ return *--s1 - *--s2;
+ }
+ if (n < 0)
+ return 0;
+ else
+ return *s1 - *s2;
+}
+
+#endif /* ICHAR_IS_CHAR */
+
+/*
+ * \param istate
+ * \param name
+ * \param searchnames
+ * \param deformatter
+ *
+ * \return
+ */
+int
+ISpellChecker::findfiletype (const char *name, int searchnames, int *deformatter)
+{
+ char * cp; /* Pointer into suffix list */
+ int cplen; /* Length of current suffix */
+ register int i; /* Index into type table */
+ int len; /* Length of the name */
+
+ /*
+ * Note: for now, the deformatter is set to 1 for tex, 0 for nroff.
+ * Further, we assume that it's one or the other, so that a test
+ * for tex is sufficient. This needs to be generalized.
+ */
+ len = strlen (name);
+ if (searchnames)
+ {
+ for (i = 0; i < m_hashheader.nstrchartype; i++)
+ {
+ if (strcmp (name, m_chartypes[i].name) == 0)
+ {
+ if (deformatter != NULL)
+ *deformatter =
+ (strcmp (m_chartypes[i].deformatter, "tex") == 0);
+ return i;
+ }
+ }
+ }
+ for (i = 0; i < m_hashheader.nstrchartype; i++)
+ {
+ for (cp = m_chartypes[i].suffixes; *cp != '\0'; cp += cplen + 1)
+ {
+ cplen = strlen (cp);
+ if (len >= cplen && strcmp (&name[len - cplen], cp) == 0)
+ {
+ if (deformatter != NULL)
+ *deformatter =
+ (strcmp (m_chartypes[i].deformatter, "tex") == 0);
+ return i;
+ }
+ }
+ }
+ return -1;
+}
+
+/*
+ HACK: macros replaced with function implementations
+ so we could do a side-effect-free check for unicode
+ characters which aren't in hashheader
+
+ TODO: this is just a workaround to keep us from crashing.
+ more sophisticated logic needed here.
+*/
+char ISpellChecker::myupper(ichar_t c)
+{
+ if (c < (SET_SIZE + MAXSTRINGCHARS))
+ return m_hashheader.upperchars[c];
+ else
+ return 0;
+}
+
+char ISpellChecker::mylower(ichar_t c)
+{
+ if (c < (SET_SIZE + MAXSTRINGCHARS))
+ return m_hashheader.lowerchars[c];
+ else
+ return 0;
+}
+
+int myspace(ichar_t c)
+{
+ return ((c > 0) && (c < 0x80) && isspace(static_cast<unsigned char>(c)));
+}
+
+char ISpellChecker::iswordch(ichar_t c)
+{
+ if (c < (SET_SIZE + MAXSTRINGCHARS))
+ return m_hashheader.wordchars[c];
+ else
+ return 0;
+}
+
+char ISpellChecker::isboundarych(ichar_t c)
+{
+ if (c < (SET_SIZE + MAXSTRINGCHARS))
+ return m_hashheader.boundarychars[c];
+ else
+ return 0;
+}
+
+char ISpellChecker::isstringstart(ichar_t c)
+{
+ if (c < (SET_SIZE))
+ return m_hashheader.stringstarts[static_cast<unsigned char>(c)];
+ else
+ return 0;
+}
+
+ichar_t ISpellChecker::mytolower(ichar_t c)
+{
+ if (c < (SET_SIZE + MAXSTRINGCHARS))
+ return m_hashheader.lowerconv[c];
+ else
+ return c;
+}
+
+ichar_t ISpellChecker::mytoupper (ichar_t c)
+{
+ if (c < (SET_SIZE + MAXSTRINGCHARS))
+ return m_hashheader.upperconv[c];
+ else
+ return c;
+}
+
diff --git a/kspell2/plugins/ispell/msgs.h b/kspell2/plugins/ispell/msgs.h
new file mode 100644
index 000000000..e3f30220c
--- /dev/null
+++ b/kspell2/plugins/ispell/msgs.h
@@ -0,0 +1,329 @@
+/* vim: set sw=8: -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+/* enchant
+ * Copyright (C) 2003 Dom Lachowicz
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ * In addition, as a special exception, Dom Lachowicz
+ * gives permission to link the code of this program with
+ * non-LGPL Spelling Provider libraries (eg: a MSFT Office
+ * spell checker backend) and distribute linked combinations including
+ * the two. You must obey the GNU Lesser General Public License in all
+ * respects for all of the code used other than said providers. If you modify
+ * this file, you may extend this exception to your version of the
+ * file, but you are not obligated to do so. If you do not wish to
+ * do so, delete this exception statement from your version.
+ */
+
+/*
+ * $Id$
+ *
+ * Copyright 1992, 1993, Geoff Kuenning, Granada Hills, CA
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All modifications to the source code must be clearly marked as
+ * such. Binary redistributions based on modified source code
+ * must be clearly marked as modified versions in the documentation
+ * and/or other materials provided with the distribution.
+ * 4. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgment:
+ * This product includes software developed by Geoff Kuenning and
+ * other unpaid contributors.
+ * 5. The name of Geoff Kuenning may not be used to endorse or promote
+ * products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY GEOFF KUENNING AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL GEOFF KUENNING OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+/*
+ * Messages header file.
+ *
+ * This file contains all text strings that are written by any of the
+ * C programs in the ispell package. The strings are collected here so that
+ * you can have the option of translating them into your local language for
+ * the benefit of your users.
+ *
+ * Anyone who goes to the effort of making a translation may wish to return
+ * the translated strings to me, geoff@ITcorp.com, so that I can include
+ * them in a later distribution under #ifdef control.
+ *
+ * Besides the strings in this header file, you may also want to translate
+ * the strings in version.h, which give the version and copyright information.
+ * However, any translation of these strings MUST accurately preserve the
+ * legal rights under international law; you may wish to consult a lawyer
+ * about this since you will be responsible for the results of any
+ * incorrect translation.
+ *
+ * Most of the strings below are simple printf format strings. If the printf
+ * takes more than one parameter, the string is given as a parameterized
+ * macro in case your local language needs a different word order.
+ */
+
+/*
+ * $Log$
+ * Revision 1.1 2004/01/31 16:44:12 zrusin
+ * ISpell plugin.
+ *
+ * Revision 1.4 2003/08/14 17:51:28 dom
+ * update license - exception clause should be Lesser GPL
+ *
+ * Revision 1.3 2003/07/28 20:40:27 dom
+ * fix up the license clause, further win32-registry proof some directory getting functions
+ *
+ * Revision 1.2 2003/07/16 22:52:52 dom
+ * LGPL + exception license
+ *
+ * Revision 1.1 2003/07/15 01:15:08 dom
+ * ispell enchant backend
+ *
+ * Revision 1.1 2001/04/15 16:01:24 tomas_f
+ * moving to spell/xp
+ *
+ * Revision 1.1 1998/12/28 18:04:43 davet
+ * Spell checker code stripped from ispell. At this point, there are
+ * two external routines... the Init routine, and a check-a-word routine
+ * which returns a boolean value, and takes a 16 bit char string.
+ * The code resembles the ispell code as much as possible still.
+ *
+ * Revision 1.31 1994/12/27 23:08:57 geoff
+ * Add a message to be issued if a word contains illegal characters.
+ *
+ * Revision 1.30 1994/10/25 05:46:40 geoff
+ * Improve a couple of error messages relating to affix flags.
+ *
+ * Revision 1.29 1994/10/04 03:46:23 geoff
+ * Add a missing carriage return in the help message
+ *
+ * Revision 1.28 1994/09/16 05:07:00 geoff
+ * Add the BAD_FLAG message, and start a sentence in another message with
+ * an uppercase letter.
+ *
+ * Revision 1.27 1994/07/28 05:11:38 geoff
+ * Log message for previous revision: add BHASH_C_ZERO_COUNT.
+ *
+ * Revision 1.26 1994/07/28 04:53:49 geoff
+ *
+ * Revision 1.25 1994/05/24 04:54:36 geoff
+ * Add error messages for affix-flag checking.
+ *
+ * Revision 1.24 1994/01/25 07:12:42 geoff
+ * Get rid of all old RCS log lines in preparation for the 3.1 release.
+ *
+ */
+
+/*
+ * The following strings are used in numerous places:
+ */
+#define BAD_FLAG "\r\nIllegal affix flag character '%c'\r\n"
+#define CANT_OPEN "Can't open %s\r\n"
+#define CANT_CREATE "Can't create %s\r\n"
+#define WORD_TOO_LONG(w) "\r\nWord '%s' too long at line %d of %s, truncated\r\n", \
+ w, __LINE__, __FILE__
+
+/*
+ * The following strings are used in buildhash.c:
+ */
+#define BHASH_C_NO_DICT "No dictionary (%s)\n"
+#define BHASH_C_NO_COUNT "No count file\n"
+#define BHASH_C_BAD_COUNT "Bad count file\n"
+#define BHASH_C_ZERO_COUNT "No words in dictionary\n"
+ /* I think this message looks better when it's nearly 80 characters wide,
+ * thus the ugly formatting in the next two defines. GK 9-87 */
+#define BHASH_C_BAFF_1(max, excess) \
+ " Warning: this language table may exceed the maximum total affix length\nof %d by up to %d bytes. You should either increase MAXAFFIXLEN in config.X\nor shorten your largest affix/strip string difference. (This is the\n", \
+ max, excess
+#define BHASH_C_BAFF_2 \
+ "difference between the affix length and the strip length in a given\nreplacement rule, or the affix length if there is no strip string\nin that rule.)\n"
+#define BHASH_C_OVERFLOW "Hash table overflowed by %d words\n"
+#define BHASH_C_CANT_OPEN_DICT "Can't open dictionary\n"
+#define BHASH_C_NO_SPACE "Couldn't allocate hash table\n"
+#define BHASH_C_COLLISION_SPACE "\ncouldn't allocate space for collision\n"
+#define BHASH_C_COUNTING "Counting words in dictionary ...\n"
+#define BHASH_C_WORD_COUNT "\n%d words\n"
+#define BHASH_C_USAGE "Usage: buildhash [-s] dict-file aff-file hash-file\n\tbuildhash -c count aff-file\n"
+
+/*
+ * The following strings are used in correct.c:
+ */
+#define CORR_C_HELP_1 "Whenever a word is found that is not in the dictionary,\r\n"
+#define CORR_C_HELP_2 "it is printed on the first line of the screen. If the dictionary\r\n"
+#define CORR_C_HELP_3 "contains any similar words, they are listed with a number\r\n"
+#define CORR_C_HELP_4 "next to each one. You have the option of replacing the word\r\n"
+#define CORR_C_HELP_5 "completely, or choosing one of the suggested words.\r\n"
+ /* You may add HELP_6 through HELP_9 if your language needs more lines */
+#define CORR_C_HELP_6 ""
+#define CORR_C_HELP_7 ""
+#define CORR_C_HELP_8 ""
+#define CORR_C_HELP_9 ""
+#define CORR_C_HELP_COMMANDS "\r\nCommands are:\r\n\r\n"
+#define CORR_C_HELP_R_CMD "R Replace the misspelled word completely.\r\n"
+#define CORR_C_HELP_BLANK "Space Accept the word this time only.\r\n"
+#define CORR_C_HELP_A_CMD "A Accept the word for the rest of this session.\r\n"
+#define CORR_C_HELP_I_CMD "I Accept the word, and put it in your private dictionary.\r\n"
+#define CORR_C_HELP_U_CMD "U Accept and add lowercase version to private dictionary.\r\n"
+#define CORR_C_HELP_0_CMD "0-n Replace with one of the suggested words.\r\n"
+#define CORR_C_HELP_L_CMD "L Look up words in system dictionary.\r\n"
+#define CORR_C_HELP_X_CMD "X Write the rest of this file, ignoring misspellings,\r\n and start next file.\r\n"
+#define CORR_C_HELP_Q_CMD "Q Quit immediately. Asks for confirmation.\r\n Leaves file unchanged.\r\n"
+#define CORR_C_HELP_BANG "! Shell escape.\r\n"
+#define CORR_C_HELP_REDRAW "^L Redraw screen.\r\n"
+#define CORR_C_HELP_SUSPEND "^Z Suspend program.\r\n"
+#define CORR_C_HELP_HELP "? Show this help screen.\r\n"
+#define CORR_C_HELP_TYPE_SPACE "-- Type space to continue --"
+
+#define CORR_C_FILE_LABEL " File: %s"
+#define CORR_C_READONLY "[READONLY]"
+#define CORR_C_MINI_MENU "[SP] <number> R)epl A)ccept I)nsert L)ookup U)ncap Q)uit e(X)it or ? for help\r\n"
+#define CORR_C_CONFIRM_QUIT "Are you sure you want to throw away your changes? "
+#define CORR_C_REPLACE_WITH "Replace with: "
+#define CORR_C_LOOKUP_PROMPT "Lookup string ('*' is wildcard): "
+#define CORR_C_MORE_PROMPT "-- more --"
+#define CORR_C_BLANK_MORE "\r \r"
+#define CORR_C_END_LOOK "--end--"
+
+/*
+ * The following strings are used in defmt.c:
+ */
+#define DEFMT_C_TEX_MATH_ERROR "****ERROR in parsing TeX math mode!\r\n"
+#define DEFMT_C_LR_MATH_ERROR "***ERROR in LR to math-mode switch.\n"
+
+/*
+ * The following strings are used in icombine.c:
+ */
+#define ICOMBINE_C_BAD_TYPE "icombine: unrecognized formatter type '%s'\n"
+#define ICOMBINE_C_USAGE "Usage: icombine [-T suffix] [aff-file] < wordlist\n"
+
+/*
+ * The following strings are used in ispell.c:
+ */
+#define ISPELL_C_USAGE1 "Usage: %s [-dfile | -pfile | -wchars | -Wn | -t | -n | -x | -b | -S | -B | -C | -P | -m | -Lcontext | -M | -N | -Ttype | -V] file .....\n"
+#define ISPELL_C_USAGE2 " %s [-dfile | -pfile | -wchars | -Wn | -t | -n | -Ttype] -l\n"
+#ifndef USG
+#define ISPELL_C_USAGE3 " %s [-dfile | -pfile | -ffile | -Wn | -t | -n | -s | -B | -C | -P | -m | -Ttype] {-a | -A}\n"
+#else
+#define ISPELL_C_USAGE3 " %s [-dfile | -pfile | -ffile | -Wn | -t | -n | -B | -C | -P | -m | -Ttype] {-a | -A}\n"
+#endif
+#define ISPELL_C_USAGE4 " %s [-dfile] [-wchars | -Wn] -c\n"
+#define ISPELL_C_USAGE5 " %s [-dfile] [-wchars] -e[1-4]\n"
+#define ISPELL_C_USAGE6 " %s [-dfile] [-wchars] -D\n"
+#define ISPELL_C_USAGE7 " %s -v\n"
+#define ISPELL_C_TEMP_DISAPPEARED "temporary file disappeared (%s)\r\n"
+#define ISPELL_C_BAD_TYPE "ispell: unrecognized formatter type '%s'\n"
+#define ISPELL_C_NO_FILE "ispell: specified file does not exist\n"
+#define ISPELL_C_NO_FILES "ispell: specified files do not exist\n"
+#define ISPELL_C_CANT_WRITE "Warning: Can't write to %s\r\n"
+#define ISPELL_C_OPTIONS_ARE "Compiled-in options:\n"
+
+/*
+ * The following strings are used in lookup.c:
+ */
+#define LOOKUP_C_CANT_READ "Trouble reading hash table %s\r\n"
+#define LOOKUP_C_NULL_HASH "Null hash table %s\r\n"
+#define LOOKUP_C_SHORT_HASH(name, gotten, wanted) \
+ "Truncated hash table %s: got %d bytes, expected %d\r\n", \
+ name, gotten, wanted
+#define LOOKUP_C_BAD_MAGIC(name, wanted, gotten) \
+ "Illegal format hash table %s - expected magic 0x%x, got 0x%x\r\n", \
+ name, wanted, gotten
+#define LOOKUP_C_BAD_MAGIC2(name, wanted, gotten) \
+ "Illegal format hash table %s - expected magic2 0x%x, got 0x%x\r\n", \
+ name, wanted, gotten
+#define LOOKUP_C_BAD_OPTIONS(gotopts, gotchars, gotlen, wantedopts, wantedchars, wantedlen) \
+ "Hash table options don't agree with buildhash - 0x%x/%d/%d vs. 0x%x/%d/%d\r\n", \
+ gotopts, gotchars, gotlen, \
+ wantedopts, wantedchars, wantedlen
+#define LOOKUP_C_NO_HASH_SPACE "Couldn't allocate space for hash table\r\n"
+#define LOOKUP_C_BAD_FORMAT "Illegal format hash table\r\n"
+#define LOOKUP_C_NO_LANG_SPACE "Couldn't allocate space for language tables\r\n"
+
+/*
+ * The following strings are used in makedent.c:
+ */
+#define MAKEDENT_C_NO_WORD_SPACE "\r\nCouldn't allocate space for word '%s'\r\n"
+#define MAKEDENT_C_BAD_WORD_CHAR "\r\nWord '%s' contains illegal characters\r\n"
+
+/*
+ * The following strings are used in parse.y:
+ */
+#define PARSE_Y_8_BIT "Eighth bit ignored (recompile ispell without NO8BIT)"
+#define PARSE_Y_NO_WORD_STRINGS "wordchars statement may not specify string characters"
+#define PARSE_Y_UNMATCHED "Unmatched charset lengths"
+#define PARSE_Y_NO_BOUNDARY_STRINGS "boundarychars statement may not specify string characters"
+#define PARSE_Y_LONG_STRING "String character is too long"
+#define PARSE_Y_NULL_STRING "String character must have nonzero length"
+#define PARSE_Y_MANY_STRINGS "Too many string characters"
+#define PARSE_Y_NO_SUCH_STRING "No such string character"
+#define PARSE_Y_MULTIPLE_STRINGS "Alternate string character was already defined"
+#define PARSE_Y_LENGTH_MISMATCH "Upper and lower versions of string character must be same length"
+#define PARSE_Y_WRONG_NROFF "Incorrect character count in nroffchars statement"
+#define PARSE_Y_WRONG_TEX "Incorrect character count in TeXchars statement"
+#define PARSE_Y_DOUBLE_COMPOUND "Compoundwords option may only appear once"
+#define PARSE_Y_LONG_FLAG "Flag must be single character"
+#define PARSE_Y_BAD_FLAG "Flag must be alphabetic"
+#define PARSE_Y_DUP_FLAG "Duplicate flag"
+#define PARSE_Y_NO_SPACE "Out of memory"
+#define PARSE_Y_NEED_BLANK "Single characters must be separated by a blank"
+#define PARSE_Y_MANY_CONDS "Too many conditions; 8 maximum"
+#define PARSE_Y_EOF "Unexpected EOF in quoted string"
+#define PARSE_Y_LONG_QUOTE "Quoted string too long, max 256 characters"
+#define PARSE_Y_ERROR_FORMAT(file, lineno, error) \
+ "%s line %d: %s\n", file, lineno, error
+#define PARSE_Y_MALLOC_TROUBLE "yyopen: trouble allocating memory\n"
+#define PARSE_Y_UNGRAB_PROBLEM "Internal error: ungrab buffer overflow"
+#define PARSE_Y_BAD_DEFORMATTER "Deformatter must be either 'nroff' or 'tex'"
+#define PARSE_Y_BAD_NUMBER "Illegal digit in number"
+
+/*
+ * The following strings are used in term.c:
+ */
+#define TERM_C_SMALL_SCREEN "Screen too small: need at least %d lines\n"
+#define TERM_C_NO_BATCH "Can't deal with non-interactive use yet.\n"
+#define TERM_C_CANT_FORK "Couldn't fork, try later.\r\n"
+#define TERM_C_TYPE_SPACE "\n-- Type space to continue --"
+
+/*
+ * The following strings are used in tree.c:
+ */
+#define TREE_C_CANT_UPDATE "Warning: Cannot update personal dictionary (%s)\r\n"
+#define TREE_C_NO_SPACE "Ran out of space for personal dictionary\r\n"
+#define TREE_C_TRY_ANYWAY "Continuing anyway (with reduced performance).\r\n"
+
+/*
+ * The following strings are used in unsq.c:
+ */
+#define UNSQ_C_BAD_COUNT "Illegal count character 0x%x\n"
+#define UNSQ_C_SURPRISE_EOF "Unexpected EOF\n"
diff --git a/kspell2/plugins/ispell/sp_spell.h b/kspell2/plugins/ispell/sp_spell.h
new file mode 100644
index 000000000..9e1d7b903
--- /dev/null
+++ b/kspell2/plugins/ispell/sp_spell.h
@@ -0,0 +1,60 @@
+/* vim: set sw=8: -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+/* enchant
+ * Copyright (C) 2003 Dom Lachowicz
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * In addition, as a special exception, Dom Lachowicz
+ * gives permission to link the code of this program with
+ * non-LGPL Spelling Provider libraries (eg: a MSFT Office
+ * spell checker backend) and distribute linked combinations including
+ * the two. You must obey the GNU Lesser General Public License in all
+ * respects for all of the code used other than said providers. If you modify
+ * this file, you may extend this exception to your version of the
+ * file, but you are not obligated to do so. If you do not wish to
+ * do so, delete this exception statement from your version.*
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifndef SPELL_H
+#define SPELL_H
+
+/*
+ TODO stuff we need to do for this spell module:
+
+ eliminate all the stderr fprintfs
+ rip out the support for ICHAR_IS_CHAR
+*/
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+typedef struct _sp_suggestions {
+ int count;
+ short *score;
+ unsigned short **word;
+} sp_suggestions;
+
+int SpellCheckInit(char *hashname);
+void SpellCheckCleanup(void);
+int SpellCheckNWord16(const unsigned short *word16, int length);
+int SpellCheckSuggestNWord16(const unsigned short *word16, int length, sp_suggestions *sg);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SPELL_H */
diff --git a/kspell2/plugins/ispell/tgood.cpp b/kspell2/plugins/ispell/tgood.cpp
new file mode 100644
index 000000000..06fbc99ef
--- /dev/null
+++ b/kspell2/plugins/ispell/tgood.cpp
@@ -0,0 +1,810 @@
+/* vim: set sw=8: -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+/* enchant
+ * Copyright (C) 2003 Dom Lachowicz
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ * In addition, as a special exception, Dom Lachowicz
+ * gives permission to link the code of this program with
+ * non-LGPL Spelling Provider libraries (eg: a MSFT Office
+ * spell checker backend) and distribute linked combinations including
+ * the two. You must obey the GNU Lesser General Public License in all
+ * respects for all of the code used other than said providers. If you modify
+ * this file, you may extend this exception to your version of the
+ * file, but you are not obligated to do so. If you do not wish to
+ * do so, delete this exception statement from your version.
+ */
+
+/*
+ * Copyright 1987, 1988, 1989, 1992, 1993, Geoff Kuenning, Granada Hills, CA
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All modifications to the source code must be clearly marked as
+ * such. Binary redistributions based on modified source code
+ * must be clearly marked as modified versions in the documentation
+ * and/or other materials provided with the distribution.
+ * 4. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgment:
+ * This product includes software developed by Geoff Kuenning and
+ * other unpaid contributors.
+ * 5. The name of Geoff Kuenning may not be used to endorse or promote
+ * products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY GEOFF KUENNING AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL GEOFF KUENNING OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * Table-driven version of good.c.
+ *
+ * Geoff Kuenning, July 1987
+ */
+
+/*
+ * $Log$
+ * Revision 1.1 2004/01/31 16:44:12 zrusin
+ * ISpell plugin.
+ *
+ * Revision 1.4 2003/08/14 17:51:29 dom
+ * update license - exception clause should be Lesser GPL
+ *
+ * Revision 1.3 2003/07/28 20:40:28 dom
+ * fix up the license clause, further win32-registry proof some directory getting functions
+ *
+ * Revision 1.2 2003/07/16 22:52:56 dom
+ * LGPL + exception license
+ *
+ * Revision 1.1 2003/07/15 01:15:09 dom
+ * ispell enchant backend
+ *
+ * Revision 1.2 2003/01/29 05:50:12 hippietrail
+ *
+ * Fixed my mess in EncodingManager.
+ * Changed many C casts to C++ casts.
+ *
+ * Revision 1.1 2003/01/24 05:52:36 hippietrail
+ *
+ * Refactored ispell code. Old ispell global variables had been put into
+ * an allocated structure, a pointer to which was passed to many functions.
+ * I have now made all such functions and variables private members of the
+ * ISpellChecker class. It was C OO, now it's C++ OO.
+ *
+ * I've fixed the makefiles and tested compilation but am unable to test
+ * operation. Please back out my changes if they cause problems which
+ * are not obvious or easy to fix.
+ *
+ * Revision 1.6 2003/01/06 18:48:42 dom
+ * ispell cleanup, start of using new 'add' save features
+ *
+ * Revision 1.5 2002/09/19 05:31:20 hippietrail
+ *
+ * More Ispell cleanup. Conditional globals and DEREF macros are removed.
+ * K&R function declarations removed, converted to Doxygen style comments
+ * where possible. No code has been changed (I hope). Compiles for me but
+ * unable to test.
+ *
+ * Revision 1.4 2002/09/17 03:03:31 hippietrail
+ *
+ * After seeking permission on the developer list I've reformatted all the
+ * spelling source which seemed to have parts which used 2, 3, 4, and 8
+ * spaces for tabs. It should all look good with our standard 4-space
+ * tabs now.
+ * I've concentrated just on indentation in the actual code. More prettying
+ * could be done.
+ * * NO code changes were made *
+ *
+ * Revision 1.3 2002/09/13 17:20:14 mpritchett
+ * Fix more warnings for Linux build
+ *
+ * Revision 1.2 2001/05/12 16:05:42 thomasf
+ * Big pseudo changes to ispell to make it pass around a structure rather
+ * than rely on all sorts of gloabals willy nilly here and there. Also
+ * fixed our spelling class to work with accepting suggestions once more.
+ * This code is dirty, gross and ugly (not to mention still not supporting
+ * multiple hash sized just yet) but it works on my machine and will no
+ * doubt break other machines.
+ *
+ * Revision 1.1 2001/04/15 16:01:24 tomas_f
+ * moving to spell/xp
+ *
+ * Revision 1.7 1999/10/20 06:03:56 sterwill
+ * Changed C++-style comments to C-style comments in C code.
+ *
+ * Revision 1.6 1999/10/20 03:19:35 paul
+ * Hacked ispell code to ignore any characters that don't fit in the lookup tables loaded from the dictionary. It ain't pretty, but at least we don't crash there any more.
+ *
+ * Revision 1.5 1999/04/13 17:12:51 jeff
+ * Applied "Darren O. Benham" <gecko@benham.net> spell check changes.
+ * Fixed crash on Win32 with the new code.
+ *
+ * Revision 1.4 1998/12/29 14:55:33 eric
+ *
+ * I've doctored the ispell code pretty extensively here. It is now
+ * warning-free on Win32. It also *works* on Win32 now, since I
+ * replaced all the I/O calls with ANSI standard ones.
+ *
+ * Revision 1.4 1998/12/29 14:55:33 eric
+ *
+ * I've doctored the ispell code pretty extensively here. It is now
+ * warning-free on Win32. It also *works* on Win32 now, since I
+ * replaced all the I/O calls with ANSI standard ones.
+ *
+ * Revision 1.3 1998/12/28 23:11:30 eric
+ *
+ * modified spell code and integration to build on Windows.
+ * This is still a hack.
+ *
+ * Actually, it doesn't yet WORK on Windows. It just builds.
+ * SpellCheckInit is failing for some reason.
+ *
+ * Revision 1.2 1998/12/28 22:16:22 eric
+ *
+ * These changes begin to incorporate the spell checker into AbiWord. Most
+ * of this is a hack.
+ *
+ * 1. added other/spell to the -I list in config/abi_defs
+ * 2. replaced other/spell/Makefile with one which is more like
+ * our build system.
+ * 3. added other/spell to other/Makefile so that the build will now
+ * dive down and build the spell check library.
+ * 4. added the AbiSpell library to the Makefiles in wp/main
+ * 5. added a call to SpellCheckInit in wp/main/unix/UnixMain.cpp.
+ * This call is a HACK and should be replaced with something
+ * proper later.
+ * 6. added code to fv_View.cpp as follows:
+ * whenever you double-click on a word, the spell checker
+ * verifies that word and prints its status to stdout.
+ *
+ * Caveats:
+ * 1. This will break the Windows build. I'm going to work on fixing it
+ * now.
+ * 2. This only works if your dictionary is in /usr/lib/ispell/american.hash.
+ * The dictionary location is currently hard-coded. This will be
+ * fixed as well.
+ *
+ * Anyway, such as it is, it works.
+ *
+ * Revision 1.1 1998/12/28 18:04:43 davet
+ * Spell checker code stripped from ispell. At this point, there are
+ * two external routines... the Init routine, and a check-a-word routine
+ * which returns a boolean value, and takes a 16 bit char string.
+ * The code resembles the ispell code as much as possible still.
+ *
+ * Revision 1.32 1994/11/02 06:56:16 geoff
+ * Remove the anyword feature, which I've decided is a bad idea.
+ *
+ * Revision 1.31 1994/10/25 05:46:25 geoff
+ * Add support for the FF_ANYWORD (affix applies to all words, even if
+ * flag bit isn't set) flag option.
+ *
+ * Revision 1.30 1994/05/24 06:23:08 geoff
+ * Don't create a hit if "allhits" is clear and capitalization
+ * mismatches. This cures a bug where a word could be in the dictionary
+ * and yet not found.
+ *
+ * Revision 1.29 1994/05/17 06:44:21 geoff
+ * Add support for controlled compound formation and the COMPOUNDONLY
+ * option to affix flags.
+ *
+ * Revision 1.28 1994/01/25 07:12:13 geoff
+ * Get rid of all old RCS log lines in preparation for the 3.1 release.
+ *
+ */
+
+#include <ctype.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "ispell_checker.h"
+
+/*!
+ * Check possible affixes
+ *
+ * \param word Word to be checked
+ * \param ucword Upper-case-only copy of word
+ * \param len The length of word/ucword
+ * \param ignoreflagbits Ignore whether affix is legal
+ * \param allhits Keep going after first hit
+ * \param pfxopts Options to apply to prefixes
+ * \param sfxopts Options to apply to suffixes
+ */
+void ISpellChecker::chk_aff (ichar_t *word, ichar_t *ucword,
+ int len, int ignoreflagbits, int allhits, int pfxopts, int sfxopts)
+{
+ register ichar_t * cp; /* Pointer to char to index on */
+ struct flagptr * ind; /* Flag index table to test */
+
+ pfx_list_chk (word, ucword, len, pfxopts, sfxopts, &m_pflagindex[0],
+ ignoreflagbits, allhits);
+ cp = ucword;
+ /* HACK: bail on unrecognized chars */
+ if (*cp >= (SET_SIZE + MAXSTRINGCHARS))
+ return;
+ ind = &m_pflagindex[*cp++];
+ while (ind->numents == 0 && ind->pu.fp != NULL)
+ {
+ if (*cp == 0)
+ return;
+ if (ind->pu.fp[0].numents)
+ {
+ pfx_list_chk (word, ucword, len, pfxopts, sfxopts, &ind->pu.fp[0],
+ ignoreflagbits, allhits);
+ if (m_numhits && !allhits && /* !cflag && */ !ignoreflagbits)
+ return;
+ }
+ /* HACK: bail on unrecognized chars */
+ if (*cp >= (SET_SIZE + MAXSTRINGCHARS))
+ return;
+ ind = &ind->pu.fp[*cp++];
+ }
+ pfx_list_chk (word, ucword, len, pfxopts, sfxopts, ind, ignoreflagbits,
+ allhits);
+ if (m_numhits && !allhits && /* !cflag &&*/ !ignoreflagbits)
+ return;
+ chk_suf (word, ucword, len, sfxopts, static_cast<struct flagent *>(NULL),
+ ignoreflagbits, allhits);
+}
+
+/*!
+ * Check some prefix flags
+ *
+ * \param word Word to be checked
+ * \param ucword Upper-case-only word
+ * \param len The length of ucword
+ * \param optflags Options to apply
+ * \param sfxopts Options to apply to suffixes
+ * \param ind Flag index table
+ * \param ignoreflagbits Ignore whether affix is legal
+ * \param allhits Keep going after first hit
+ * */
+void ISpellChecker::pfx_list_chk (ichar_t *word, ichar_t *ucword, int len, int optflags,
+ int sfxopts, struct flagptr * ind, int ignoreflagbits, int allhits)
+{
+ int cond; /* Condition number */
+ register ichar_t * cp; /* Pointer into end of ucword */
+ struct dent * dent; /* Dictionary entry we found */
+ int entcount; /* Number of entries to process */
+ register struct flagent *
+ flent; /* Current table entry */
+ int preadd; /* Length added to tword2 as prefix */
+ register int tlen; /* Length of tword */
+ ichar_t tword[INPUTWORDLEN + 4 * MAXAFFIXLEN + 4]; /* Tmp cpy */
+ ichar_t tword2[sizeof tword]; /* 2nd copy for ins_root_cap */
+
+ for (flent = ind->pu.ent, entcount = ind->numents;
+ entcount > 0;
+ flent++, entcount--)
+ {
+ /*
+ * If this is a compound-only affix, ignore it unless we're
+ * looking for that specific thing.
+ */
+ if ((flent->flagflags & FF_COMPOUNDONLY) != 0
+ && (optflags & FF_COMPOUNDONLY) == 0)
+ continue;
+
+ /*
+ * See if the prefix matches.
+ */
+ tlen = len - flent->affl;
+ if (tlen > 0
+ && (flent->affl == 0
+ || icharncmp (flent->affix, ucword, flent->affl) == 0)
+ && tlen + flent->stripl >= flent->numconds)
+ {
+ /*
+ * The prefix matches. Remove it, replace it by the "strip"
+ * string (if any), and check the original conditions.
+ */
+ if (flent->stripl)
+ icharcpy (tword, flent->strip);
+ icharcpy (tword + flent->stripl, ucword + flent->affl);
+ cp = tword;
+ for (cond = 0; cond < flent->numconds; cond++)
+ {
+ if ((flent->conds[*cp++] & (1 << cond)) == 0)
+ break;
+ }
+ if (cond >= flent->numconds)
+ {
+ /*
+ * The conditions match. See if the word is in the
+ * dictionary.
+ */
+ tlen += flent->stripl;
+
+ if (ignoreflagbits)
+ {
+ if ((dent = ispell_lookup (tword, 1)) != NULL)
+ {
+ cp = tword2;
+ if (flent->affl)
+ {
+ icharcpy (cp, flent->affix);
+ cp += flent->affl;
+ *cp++ = '+';
+ }
+ preadd = cp - tword2;
+ icharcpy (cp, tword);
+ cp += tlen;
+ if (flent->stripl)
+ {
+ *cp++ = '-';
+ icharcpy (cp, flent->strip);
+ }
+ }
+ }
+ else if ((dent = ispell_lookup (tword, 1)) != NULL
+ && TSTMASKBIT (dent->mask, flent->flagbit))
+ {
+ if (m_numhits < MAX_HITS)
+ {
+ m_hits[m_numhits].dictent = dent;
+ m_hits[m_numhits].prefix = flent;
+ m_hits[m_numhits].suffix = NULL;
+ m_numhits++;
+ }
+ if (!allhits)
+ {
+#ifndef NO_CAPITALIZATION_SUPPORT
+ if (cap_ok (word, &m_hits[0], len))
+ return;
+ m_numhits = 0;
+#else /* NO_CAPITALIZATION_SUPPORT */
+ return;
+#endif /* NO_CAPITALIZATION_SUPPORT */
+ }
+ }
+ /*
+ * Handle cross-products.
+ */
+ if (flent->flagflags & FF_CROSSPRODUCT)
+ chk_suf (word, tword, tlen, sfxopts | FF_CROSSPRODUCT,
+ flent, ignoreflagbits, allhits);
+ }
+ }
+ }
+}
+
+/*!
+ * Check possible suffixes
+ *
+ * \param word Word to be checked
+ * \param ucword Upper-case-only word
+ * \param len The length of ucword
+ * \param optflags Affix option flags
+ * \param pfxent Prefix flag entry if cross-prod
+ * \param ignoreflagbits Ignore whether affix is legal
+ * \param allhits Keep going after first hit
+ */
+void
+ISpellChecker::chk_suf (ichar_t *word, ichar_t *ucword,
+ int len, int optflags, struct flagent *pfxent,
+ int ignoreflagbits, int allhits)
+{
+ register ichar_t * cp; /* Pointer to char to index on */
+ struct flagptr * ind; /* Flag index table to test */
+
+ suf_list_chk (word, ucword, len, &m_sflagindex[0], optflags, pfxent,
+ ignoreflagbits, allhits);
+ cp = ucword + len - 1;
+ /* HACK: bail on unrecognized chars */
+ if (*cp >= (SET_SIZE + MAXSTRINGCHARS))
+ return;
+ ind = &m_sflagindex[*cp];
+ while (ind->numents == 0 && ind->pu.fp != NULL)
+ {
+ if (cp == ucword)
+ return;
+ if (ind->pu.fp[0].numents)
+ {
+ suf_list_chk (word, ucword, len, &ind->pu.fp[0],
+ optflags, pfxent, ignoreflagbits, allhits);
+ if (m_numhits != 0 && !allhits && /* !cflag && */ !ignoreflagbits)
+ return;
+ }
+ /* HACK: bail on unrecognized chars */
+ if (*(cp-1) >= (SET_SIZE + MAXSTRINGCHARS))
+ return;
+ ind = &ind->pu.fp[*--cp];
+ }
+ suf_list_chk (word, ucword, len, ind, optflags, pfxent,
+ ignoreflagbits, allhits);
+}
+
+/*!
+ * \param word Word to be checked
+ * \param ucword Upper-case-only word
+ * \param len The length of ucword
+ * \param ind Flag index table
+ * \param optflags Affix option flags
+ * \param pfxent Prefix flag entry if crossonly
+ * \param ignoreflagbits Ignore whether affix is legal
+ * \pram allhits Keep going after first hit
+ */
+void ISpellChecker::suf_list_chk (ichar_t *word, ichar_t *ucword,
+ int len, struct flagptr *ind, int optflags,
+ struct flagent *pfxent, int ignoreflagbits, int allhits)
+{
+ register ichar_t * cp; /* Pointer into end of ucword */
+ int cond; /* Condition number */
+ struct dent * dent; /* Dictionary entry we found */
+ int entcount; /* Number of entries to process */
+ register struct flagent *
+ flent; /* Current table entry */
+ int preadd; /* Length added to tword2 as prefix */
+ register int tlen; /* Length of tword */
+ ichar_t tword[INPUTWORDLEN + 4 * MAXAFFIXLEN + 4]; /* Tmp cpy */
+ ichar_t tword2[sizeof tword]; /* 2nd copy for ins_root_cap */
+
+ icharcpy (tword, ucword);
+ for (flent = ind->pu.ent, entcount = ind->numents;
+ entcount > 0;
+ flent++, entcount--)
+ {
+ if ((optflags & FF_CROSSPRODUCT) != 0
+ && (flent->flagflags & FF_CROSSPRODUCT) == 0)
+ continue;
+ /*
+ * If this is a compound-only affix, ignore it unless we're
+ * looking for that specific thing.
+ */
+ if ((flent->flagflags & FF_COMPOUNDONLY) != 0
+ && (optflags & FF_COMPOUNDONLY) == 0)
+ continue;
+
+ /*
+ * See if the suffix matches.
+ */
+ tlen = len - flent->affl;
+ if (tlen > 0
+ && (flent->affl == 0
+ || icharcmp (flent->affix, ucword + tlen) == 0)
+ && tlen + flent->stripl >= flent->numconds)
+ {
+ /*
+ * The suffix matches. Remove it, replace it by the "strip"
+ * string (if any), and check the original conditions.
+ */
+ icharcpy (tword, ucword);
+ cp = tword + tlen;
+ if (flent->stripl)
+ {
+ icharcpy (cp, flent->strip);
+ tlen += flent->stripl;
+ cp = tword + tlen;
+ }
+ else
+ *cp = '\0';
+ for (cond = flent->numconds; --cond >= 0; )
+ {
+ if ((flent->conds[*--cp] & (1 << cond)) == 0)
+ break;
+ }
+ if (cond < 0)
+ {
+ /*
+ * The conditions match. See if the word is in the
+ * dictionary.
+ */
+ if (ignoreflagbits)
+ {
+ if ((dent = ispell_lookup (tword, 1)) != NULL)
+ {
+ cp = tword2;
+ if ((optflags & FF_CROSSPRODUCT)
+ && pfxent->affl != 0)
+ {
+ icharcpy (cp, pfxent->affix);
+ cp += pfxent->affl;
+ *cp++ = '+';
+ }
+ preadd = cp - tword2;
+ icharcpy (cp, tword);
+ cp += tlen;
+ if ((optflags & FF_CROSSPRODUCT)
+ && pfxent->stripl != 0)
+ {
+ *cp++ = '-';
+ icharcpy (cp, pfxent->strip);
+ cp += pfxent->stripl;
+ }
+ if (flent->stripl)
+ {
+ *cp++ = '-';
+ icharcpy (cp, flent->strip);
+ cp += flent->stripl;
+ }
+ if (flent->affl)
+ {
+ *cp++ = '+';
+ icharcpy (cp, flent->affix);
+ cp += flent->affl;
+ }
+ }
+ }
+ else if ((dent = ispell_lookup (tword, 1)) != NULL
+ && TSTMASKBIT (dent->mask, flent->flagbit)
+ && ((optflags & FF_CROSSPRODUCT) == 0
+ || TSTMASKBIT (dent->mask, pfxent->flagbit)))
+ {
+ if (m_numhits < MAX_HITS)
+ {
+ m_hits[m_numhits].dictent = dent;
+ m_hits[m_numhits].prefix = pfxent;
+ m_hits[m_numhits].suffix = flent;
+ m_numhits++;
+ }
+ if (!allhits)
+ {
+#ifndef NO_CAPITALIZATION_SUPPORT
+ if (cap_ok (word, &m_hits[0], len))
+ return;
+ m_numhits = 0;
+#else /* NO_CAPITALIZATION_SUPPORT */
+ return;
+#endif /* NO_CAPITALIZATION_SUPPORT */
+ }
+ }
+ }
+ }
+ }
+}
+
+/*!
+ * Expand a dictionary prefix entry
+ *
+ * \param croot Char version of rootword
+ * \param rootword Root word to expand
+ * \param mask Mask bits to expand on
+ * \param option Option, see expandmode
+ * \param extra Extra info to add to line
+ *
+ * \return
+ */
+int ISpellChecker::expand_pre (char *croot, ichar_t *rootword, MASKTYPE mask[],
+ int option, char *extra)
+{
+ int entcount; /* No. of entries to process */
+ int explength; /* Length of expansions */
+ register struct flagent *
+ flent; /* Current table entry */
+
+ for (flent = m_pflaglist, entcount = m_numpflags, explength = 0;
+ entcount > 0;
+ flent++, entcount--)
+ {
+ if (TSTMASKBIT (mask, flent->flagbit))
+ explength +=
+ pr_pre_expansion (croot, rootword, flent, mask, option, extra);
+ }
+ return explength;
+}
+
+/*!
+ * Print a prefix expansion
+ *
+ * \param croot Char version of rootword
+ * \param rootword Root word to expand
+ * \param flent Current table entry
+ * \param mask Mask bits to expand on
+ * \param option Option, see expandmode
+ * \param extra Extra info to add to line
+ *
+ * \return
+ */
+int ISpellChecker::pr_pre_expansion ( char *croot, ichar_t *rootword,
+ struct flagent *flent, MASKTYPE mask[], int option,
+ char *extra)
+{
+ int cond; /* Current condition number */
+ register ichar_t * nextc; /* Next case choice */
+ int tlen; /* Length of tword */
+ ichar_t tword[INPUTWORDLEN + MAXAFFIXLEN]; /* Temp */
+
+ tlen = icharlen (rootword);
+ if (flent->numconds > tlen)
+ return 0;
+ tlen -= flent->stripl;
+ if (tlen <= 0)
+ return 0;
+ tlen += flent->affl;
+ for (cond = 0, nextc = rootword; cond < flent->numconds; cond++)
+ {
+ if ((flent->conds[mytoupper (*nextc++)] & (1 << cond)) == 0)
+ return 0;
+ }
+ /*
+ * The conditions are satisfied. Copy the word, add the prefix,
+ * and make it the proper case. This code is carefully written
+ * to match that ins_cap and cap_ok. Note that the affix, as
+ * inserted, is uppercase.
+ *
+ * There is a tricky bit here: if the root is capitalized, we
+ * want a capitalized result. If the root is followcase, however,
+ * we want to duplicate the case of the first remaining letter
+ * of the root. In other words, "Loved/U" should generate "Unloved",
+ * but "LOved/U" should generate "UNLOved" and "lOved/U" should
+ * produce "unlOved".
+ */
+ if (flent->affl)
+ {
+ icharcpy (tword, flent->affix);
+ nextc = tword + flent->affl;
+ }
+ icharcpy (nextc, rootword + flent->stripl);
+ if (myupper (rootword[0]))
+ {
+ /* We must distinguish followcase from capitalized and all-upper */
+ for (nextc = rootword + 1; *nextc; nextc++)
+ {
+ if (!myupper (*nextc))
+ break;
+ }
+ if (*nextc)
+ {
+ /* It's a followcase or capitalized word. Figure out which. */
+ for ( ; *nextc; nextc++)
+ {
+ if (myupper (*nextc))
+ break;
+ }
+ if (*nextc)
+ {
+ /* It's followcase. */
+ if (!myupper (tword[flent->affl]))
+ forcelc (tword, flent->affl);
+ }
+ else
+ {
+ /* It's capitalized */
+ forcelc (tword + 1, tlen - 1);
+ }
+ }
+ }
+ else
+ {
+ /* Followcase or all-lower, we don't care which */
+ if (!myupper (*nextc))
+ forcelc (tword, flent->affl);
+ }
+ if (option == 3)
+ printf ("\n%s", croot);
+ if (option != 4)
+ printf (" %s%s", ichartosstr (tword, 1), extra);
+ if (flent->flagflags & FF_CROSSPRODUCT)
+ return tlen
+ + expand_suf (croot, tword, mask, FF_CROSSPRODUCT, option, extra);
+ else
+ return tlen;
+}
+
+/*!
+ * Expand a dictionary suffix entry
+ *
+ * \param croot Char version of rootword
+ * \param rootword Root word to expand
+ * \param mask Mask bits to expand on
+ * \param optflags Affix option flags
+ * \param option Option, see expandmode
+ * \param extra Extra info to add to line
+ *
+ * \return
+ */
+int ISpellChecker::expand_suf (char *croot, ichar_t *rootword, MASKTYPE mask[],
+ int optflags, int option, char *extra)
+{
+ int entcount; /* No. of entries to process */
+ int explength; /* Length of expansions */
+ register struct flagent *
+ flent; /* Current table entry */
+
+ for (flent = m_sflaglist, entcount = m_numsflags, explength = 0;
+ entcount > 0;
+ flent++, entcount--)
+ {
+ if (TSTMASKBIT (mask, flent->flagbit))
+ {
+ if ((optflags & FF_CROSSPRODUCT) == 0
+ || (flent->flagflags & FF_CROSSPRODUCT))
+ explength +=
+ pr_suf_expansion (croot, rootword, flent, option, extra);
+ }
+ }
+ return explength;
+}
+
+/*!
+ * Print a suffix expansion
+ *
+ * \param croot Char version of rootword
+ * \param rootword Root word to expand
+ * \param flent Current table entry
+ * \param option Option, see expandmode
+ * \param extra Extra info to add to line
+ *
+ * \return
+ */
+int ISpellChecker::pr_suf_expansion (char *croot, ichar_t *rootword,
+ struct flagent *flent, int option, char *extra)
+{
+ int cond; /* Current condition number */
+ register ichar_t * nextc; /* Next case choice */
+ int tlen; /* Length of tword */
+ ichar_t tword[INPUTWORDLEN + MAXAFFIXLEN]; /* Temp */
+
+ tlen = icharlen (rootword);
+ cond = flent->numconds;
+ if (cond > tlen)
+ return 0;
+ if (tlen - flent->stripl <= 0)
+ return 0;
+ for (nextc = rootword + tlen; --cond >= 0; )
+ {
+ if ((flent->conds[mytoupper (*--nextc)] & (1 << cond)) == 0)
+ return 0;
+ }
+ /*
+ * The conditions are satisfied. Copy the word, add the suffix,
+ * and make it match the case of the last remaining character of the
+ * root. Again, this code carefully matches ins_cap and cap_ok.
+ */
+ icharcpy (tword, rootword);
+ nextc = tword + tlen - flent->stripl;
+ if (flent->affl)
+ {
+ icharcpy (nextc, flent->affix);
+ if (!myupper (nextc[-1]))
+ forcelc (nextc, flent->affl);
+ }
+ else
+ *nextc = 0;
+ if (option == 3)
+ printf ("\n%s", croot);
+ if (option != 4)
+ printf (" %s%s", ichartosstr (tword, 1), extra);
+ return tlen + flent->affl - flent->stripl;
+}
+
+/*!
+ * \param dst Destination to modify
+ * \param len Length to copy
+ */
+void ISpellChecker::forcelc (ichar_t *dst, int len) /* Force to lowercase */
+{
+
+ for ( ; --len >= 0; dst++)
+ *dst = mytolower (*dst);
+}