diff options
Diffstat (limited to 'tdespell2/plugins/ispell')
-rw-r--r-- | tdespell2/plugins/ispell/CMakeLists.txt | 44 | ||||
-rw-r--r-- | tdespell2/plugins/ispell/Makefile.am | 24 | ||||
-rw-r--r-- | tdespell2/plugins/ispell/correct.cpp | 949 | ||||
-rw-r--r-- | tdespell2/plugins/ispell/good.cpp | 431 | ||||
-rw-r--r-- | tdespell2/plugins/ispell/hash.cpp | 204 | ||||
-rw-r--r-- | tdespell2/plugins/ispell/ispell.h | 801 | ||||
-rw-r--r-- | tdespell2/plugins/ispell/ispell_checker.cpp | 505 | ||||
-rw-r--r-- | tdespell2/plugins/ispell/ispell_checker.h | 273 | ||||
-rw-r--r-- | tdespell2/plugins/ispell/ispell_def.h | 34 | ||||
-rw-r--r-- | tdespell2/plugins/ispell/lookup.cpp | 764 | ||||
-rw-r--r-- | tdespell2/plugins/ispell/makedent.cpp | 972 | ||||
-rw-r--r-- | tdespell2/plugins/ispell/msgs.h | 329 | ||||
-rw-r--r-- | tdespell2/plugins/ispell/sp_spell.h | 60 | ||||
-rw-r--r-- | tdespell2/plugins/ispell/tdespell_ispell.desktop | 22 | ||||
-rw-r--r-- | tdespell2/plugins/ispell/tdespell_ispellclient.cpp | 54 | ||||
-rw-r--r-- | tdespell2/plugins/ispell/tdespell_ispellclient.h | 56 | ||||
-rw-r--r-- | tdespell2/plugins/ispell/tdespell_ispelldict.cpp | 76 | ||||
-rw-r--r-- | tdespell2/plugins/ispell/tdespell_ispelldict.h | 49 | ||||
-rw-r--r-- | tdespell2/plugins/ispell/tgood.cpp | 810 |
19 files changed, 6457 insertions, 0 deletions
diff --git a/tdespell2/plugins/ispell/CMakeLists.txt b/tdespell2/plugins/ispell/CMakeLists.txt new file mode 100644 index 000000000..c8e5a4ec6 --- /dev/null +++ b/tdespell2/plugins/ispell/CMakeLists.txt @@ -0,0 +1,44 @@ +################################################# +# +# (C) 2010 Serghei Amelian +# serghei (DOT) amelian (AT) gmail.com +# +# Improvements and feedback are welcome +# +# This file is released under GPL >= 2 +# +################################################# + +include_directories( + ${TQT_INCLUDE_DIRS} + ${CMAKE_CURRENT_BINARY_DIR} + ${CMAKE_BINARY_DIR}/tdecore + ${CMAKE_SOURCE_DIR}/tdespell2 + ${CMAKE_SOURCE_DIR}/tdecore +) + +link_directories( + ${TQT_LIBRARY_DIRS} +) + + +##### other data ################################ + +install( FILES tdespell_ispell.desktop DESTINATION ${SERVICES_INSTALL_DIR} ) + + +#### tdespell_ispell ############################## + +set( target tdespell_ispell ) + +set( ${target}_SRCS + tdespell_ispellclient.cpp tdespell_ispelldict.cpp + correct.cpp good.cpp hash.cpp lookup.cpp makedent.cpp + tgood.cpp ispell_checker.cpp +) + +tde_add_kpart( ${target} AUTOMOC + SOURCES ${${target}_SRCS} + LINK tdespell2-shared + DESTINATION ${PLUGIN_INSTALL_DIR} +) diff --git a/tdespell2/plugins/ispell/Makefile.am b/tdespell2/plugins/ispell/Makefile.am new file mode 100644 index 000000000..15dffda28 --- /dev/null +++ b/tdespell2/plugins/ispell/Makefile.am @@ -0,0 +1,24 @@ +METASOURCES = AUTO + +AM_CPPFLAGS = -I$(top_srcdir)/tdespell2 -I$(top_srcdir) $(all_includes) + +# For the future: examine if condensing the tons of *_LDFLAGS variables +# into $(all_libraries) isn't better +AM_LDFLAGS = $(LDFLAGS_AS_NEEDED) $(LDFLAGS_NEW_DTAGS) + +kde_module_LTLIBRARIES = tdespell_ispell.la + +tdespell_ispell_la_SOURCES = tdespell_ispellclient.cpp tdespell_ispelldict.cpp \ + correct.cpp \ + good.cpp \ + hash.cpp \ + lookup.cpp \ + makedent.cpp \ + tgood.cpp \ + ispell_checker.cpp + +tdespell_ispell_la_LDFLAGS = -module -no-undefined $(KDE_PLUGIN) +tdespell_ispell_la_LIBADD = ../../ui/libtdespell2.la $(LIB_QT) $(LIB_TDECORE) + +service_DATA = tdespell_ispell.desktop +servicedir = $(kde_servicesdir) diff --git a/tdespell2/plugins/ispell/correct.cpp b/tdespell2/plugins/ispell/correct.cpp new file mode 100644 index 000000000..65e98fa6d --- /dev/null +++ b/tdespell2/plugins/ispell/correct.cpp @@ -0,0 +1,949 @@ +/* vim: set sw=8: -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* enchant + * Copyright (C) 2003 Dom Lachowicz + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + * In addition, as a special exception, Dom Lachowicz + * gives permission to link the code of this program with + * non-LGPL Spelling Provider libraries (eg: a MSFT Office + * spell checker backend) and distribute linked combinations including + * the two. You must obey the GNU Lesser General Public License in all + * respects for all of the code used other than said providers. If you modify + * this file, you may extend this exception to your version of the + * file, but you are not obligated to do so. If you do not wish to + * do so, delete this exception statement from your version. + */ + +/* + * correct.c - Routines to manage the higher-level aspects of spell-checking + * + * This code originally resided in ispell.c, but was moved here to keep + * file sizes smaller. + * + * Copyright (c), 1983, by Pace Willisson + * + * Copyright 1992, 1993, Geoff Kuenning, Granada Hills, CA + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All modifications to the source code must be clearly marked as + * such. Binary redistributions based on modified source code + * must be clearly marked as modified versions in the documentation + * and/or other materials provided with the distribution. + * 4. All advertising materials mentioning features or use of this software + * must display the following acknowledgment: + * This product includes software developed by Geoff Kuenning and + * other unpaid contributors. + * 5. The name of Geoff Kuenning may not be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY GEOFF KUENNING AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL GEOFF KUENNING OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * $Log$ + * Revision 1.1 2004/01/31 16:44:12 zrusin + * ISpell plugin. + * + * Revision 1.4 2003/08/14 17:51:26 dom + * update license - exception clause should be Lesser GPL + * + * Revision 1.3 2003/07/28 20:40:25 dom + * fix up the license clause, further win32-registry proof some directory getting functions + * + * Revision 1.2 2003/07/16 22:52:35 dom + * LGPL + exception license + * + * Revision 1.1 2003/07/15 01:15:04 dom + * ispell enchant backend + * + * Revision 1.2 2003/01/29 05:50:11 hippietrail + * + * Fixed my mess in EncodingManager. + * Changed many C casts to C++ casts. + * + * Revision 1.1 2003/01/24 05:52:31 hippietrail + * + * Refactored ispell code. Old ispell global variables had been put into + * an allocated structure, a pointer to which was passed to many functions. + * I have now made all such functions and variables private members of the + * ISpellChecker class. It was C OO, now it's C++ OO. + * + * I've fixed the makefiles and tested compilation but am unable to test + * operation. Please back out my changes if they cause problems which + * are not obvious or easy to fix. + * + * Revision 1.7 2002/09/19 05:31:15 hippietrail + * + * More Ispell cleanup. Conditional globals and DEREF macros are removed. + * K&R function declarations removed, converted to Doxygen style comments + * where possible. No code has been changed (I hope). Compiles for me but + * unable to test. + * + * Revision 1.6 2002/09/17 03:03:28 hippietrail + * + * After seeking permission on the developer list I've reformatted all the + * spelling source which seemed to have parts which used 2, 3, 4, and 8 + * spaces for tabs. It should all look good with our standard 4-space + * tabs now. + * I've concentrated just on indentation in the actual code. More prettying + * could be done. + * * NO code changes were made * + * + * Revision 1.5 2002/09/13 17:20:12 mpritchett + * Fix more warnings for Linux build + * + * Revision 1.4 2002/03/06 08:27:16 fjfranklin + * o Only activate compound handling when the hash file says so (Per Larsson) + * + * Revision 1.3 2001/05/14 09:52:50 hub + * Removed newMain.c from GNUmakefile.am + * + * C++ comments are not C comment. Changed to C comments + * + * Revision 1.2 2001/05/12 16:05:42 thomasf + * Big pseudo changes to ispell to make it pass around a structure rather + * than rely on all sorts of gloabals willy nilly here and there. Also + * fixed our spelling class to work with accepting suggestions once more. + * This code is dirty, gross and ugly (not to mention still not supporting + * multiple hash sized just yet) but it works on my machine and will no + * doubt break other machines. + * + * Revision 1.1 2001/04/15 16:01:24 tomas_f + * moving to spell/xp + * + * Revision 1.2 1999/10/05 16:17:28 paul + * Fixed build, and other tidyness. + * Spell dialog enabled by default, with keyboard binding of F7. + * + * Revision 1.1 1999/09/29 23:33:32 justin + * Updates to the underlying ispell-based code to support suggested corrections. + * + * Revision 1.59 1995/08/05 23:19:43 geoff + * Fix a bug that caused offsets for long lines to be confused if the + * line started with a quoting uparrow. + * + * Revision 1.58 1994/11/02 06:56:00 geoff + * Remove the anyword feature, which I've decided is a bad idea. + * + * Revision 1.57 1994/10/26 05:12:39 geoff + * Try boundary characters when inserting or substituting letters, except + * (naturally) at word boundaries. + * + * Revision 1.56 1994/10/25 05:46:30 geoff + * Fix an assignment inside a conditional that could generate spurious + * warnings (as well as being bad style). Add support for the FF_ANYWORD + * option. + * + * Revision 1.55 1994/09/16 04:48:24 geoff + * Don't pass newlines from the input to various other routines, and + * don't assume that those routines leave the input unchanged. + * + * Revision 1.54 1994/09/01 06:06:41 geoff + * Change erasechar/killchar to uerasechar/ukillchar to avoid + * shared-library problems on HP systems. + * + * Revision 1.53 1994/08/31 05:58:38 geoff + * Add code to handle extremely long lines in -a mode without splitting + * words or reporting incorrect offsets. + * + * Revision 1.52 1994/05/25 04:29:24 geoff + * Fix a bug that caused line widths to be calculated incorrectly when + * displaying lines containing tabs. Fix a couple of places where + * characters were sign-extended incorrectly, which could cause 8-bit + * characters to be displayed wrong. + * + * Revision 1.51 1994/05/17 06:44:05 geoff + * Add support for controlled compound formation and the COMPOUNDONLY + * option to affix flags. + * + * Revision 1.50 1994/04/27 05:20:14 geoff + * Allow compound words to be formed from more than two components + * + * Revision 1.49 1994/04/27 01:50:31 geoff + * Add support to correctly capitalize words generated as a result of a + * missing-space suggestion. + * + * Revision 1.48 1994/04/03 23:23:02 geoff + * Clean up the code in missingspace() to be a bit simpler and more + * efficient. + * + * Revision 1.47 1994/03/15 06:24:23 geoff + * Fix the +/-/~ commands to be independent. Allow the + command to + * receive a suffix which is a deformatter type (currently hardwired to + * be either tex or nroff/troff). + * + * Revision 1.46 1994/02/21 00:20:03 geoff + * Fix some bugs that could cause bad displays in the interaction between + * TeX parsing and string characters. Show_char now will not overrun + * the inverse-video display area by accident. + * + * Revision 1.45 1994/02/14 00:34:51 geoff + * Fix correct to accept length parameters for ctok and itok, so that it + * can pass them to the to/from ichar routines. + * + * Revision 1.44 1994/01/25 07:11:22 geoff + * Get rid of all old RCS log lines in preparation for the 3.1 release. + * + */ + +#include <stdlib.h> +#include <string.h> +#include <ctype.h> +#include "ispell_checker.h" +#include "msgs.h" + +/* +extern void upcase P ((ichar_t * string)); +extern void lowcase P ((ichar_t * string)); +extern ichar_t * strtosichar P ((char * in, int canonical)); + +int compoundflag = COMPOUND_CONTROLLED; +*/ + +/* + * \param a + * \param b + * \param canonical NZ for canonical string chars + * + * \return + */ +int +ISpellChecker::casecmp (char *a, char *b, int canonical) +{ + register ichar_t * ap; + register ichar_t * bp; + ichar_t inta[INPUTWORDLEN + 4 * MAXAFFIXLEN + 4]; + ichar_t intb[INPUTWORDLEN + 4 * MAXAFFIXLEN + 4]; + + strtoichar (inta, a, sizeof inta, canonical); + strtoichar (intb, b, sizeof intb, canonical); + for (ap = inta, bp = intb; *ap != 0; ap++, bp++) + { + if (*ap != *bp) + { + if (*bp == '\0') + return m_hashheader.sortorder[*ap]; + else if (mylower (*ap)) + { + if (mylower (*bp) || mytoupper (*ap) != *bp) + return static_cast<int>(m_hashheader.sortorder[*ap]) + - static_cast<int>(m_hashheader.sortorder[*bp]); + } + else + { + if (myupper (*bp) || mytolower (*ap) != *bp) + return static_cast<int>(m_hashheader.sortorder[*ap]) + - static_cast<int>(m_hashheader.sortorder[*bp]); + } + } + } + if (*bp != '\0') + return -static_cast<int>(m_hashheader.sortorder[*bp]); + for (ap = inta, bp = intb; *ap; ap++, bp++) + { + if (*ap != *bp) + { + return static_cast<int>(m_hashheader.sortorder[*ap]) + - static_cast<int>(m_hashheader.sortorder[*bp]); + } + } + return 0; +} + +/* + * \param word + */ +void +ISpellChecker::makepossibilities (ichar_t *word) +{ + register int i; + + for (i = 0; i < MAXPOSSIBLE; i++) + m_possibilities[i][0] = 0; + m_pcount = 0; + m_maxposslen = 0; + m_easypossibilities = 0; + +#ifndef NO_CAPITALIZATION_SUPPORT + wrongcapital (word); +#endif + +/* + * according to Pollock and Zamora, CACM April 1984 (V. 27, No. 4), + * page 363, the correct order for this is: + * OMISSION = TRANSPOSITION > INSERTION > SUBSTITUTION + * thus, it was exactly backwards in the old version. -- PWP + */ + + if (m_pcount < MAXPOSSIBLE) + missingletter (word); /* omission */ + if (m_pcount < MAXPOSSIBLE) + transposedletter (word); /* transposition */ + if (m_pcount < MAXPOSSIBLE) + extraletter (word); /* insertion */ + if (m_pcount < MAXPOSSIBLE) + wrongletter (word); /* substitution */ + + if ((m_hashheader.compoundflag != COMPOUND_ANYTIME) && + m_pcount < MAXPOSSIBLE) + missingspace (word); /* two words */ + +} + +/* + * \param word + * + * \return + */ +int +ISpellChecker::insert (ichar_t *word) +{ + register int i; + register char * realword; + + realword = ichartosstr (word, 0); + for (i = 0; i < m_pcount; i++) + { + if (strcmp (m_possibilities[i], realword) == 0) + return (0); + } + + strcpy (m_possibilities[m_pcount++], realword); + i = strlen (realword); + if (i > m_maxposslen) + m_maxposslen = i; + if (m_pcount >= MAXPOSSIBLE) + return (-1); + else + return (0); +} + +#ifndef NO_CAPITALIZATION_SUPPORT +/* + * \param word + */ +void +ISpellChecker::wrongcapital (ichar_t *word) +{ + ichar_t newword[INPUTWORDLEN + MAXAFFIXLEN]; + + /* + ** When the third parameter to "good" is nonzero, it ignores + ** case. If the word matches this way, "ins_cap" will recapitalize + ** it correctly. + */ + if (good (word, 0, 1, 0, 0)) + { + icharcpy (newword, word); + upcase (newword); + ins_cap (newword, word); + } +} +#endif + +/* + * \param word + */ +void +ISpellChecker::wrongletter (ichar_t *word) +{ + register int i; + register int j; + register int n; + ichar_t savechar; + ichar_t newword[INPUTWORDLEN + MAXAFFIXLEN]; + + n = icharlen (word); + icharcpy (newword, word); +#ifndef NO_CAPITALIZATION_SUPPORT + upcase (newword); +#endif + + for (i = 0; i < n; i++) + { + savechar = newword[i]; + for (j=0; j < m_Trynum; ++j) + { + if (m_Try[j] == savechar) + continue; + else if (isboundarych (m_Try[j]) && (i == 0 || i == n - 1)) + continue; + newword[i] = m_Try[j]; + if (good (newword, 0, 1, 0, 0)) + { + if (ins_cap (newword, word) < 0) + return; + } + } + newword[i] = savechar; + } +} + +/* + * \param word + */ +void +ISpellChecker::extraletter (ichar_t *word) +{ + ichar_t newword[INPUTWORDLEN + MAXAFFIXLEN]; + register ichar_t * p; + register ichar_t * r; + + if (icharlen (word) < 2) + return; + + icharcpy (newword, word + 1); + for (p = word, r = newword; *p != 0; ) + { + if (good (newword, 0, 1, 0, 0)) + { + if (ins_cap (newword, word) < 0) + return; + } + *r++ = *p++; + } +} + +/* + * \param word + */ +void +ISpellChecker::missingletter (ichar_t *word) +{ + ichar_t newword[INPUTWORDLEN + MAXAFFIXLEN + 1]; + register ichar_t * p; + register ichar_t * r; + register int i; + + icharcpy (newword + 1, word); + for (p = word, r = newword; *p != 0; ) + { + for (i = 0; i < m_Trynum; i++) + { + if (isboundarych (m_Try[i]) && r == newword) + continue; + *r = m_Try[i]; + if (good (newword, 0, 1, 0, 0)) + { + if (ins_cap (newword, word) < 0) + return; + } + } + *r++ = *p++; + } + for (i = 0; i < m_Trynum; i++) + { + if (isboundarych (m_Try[i])) + continue; + *r = m_Try[i]; + if (good (newword, 0, 1, 0, 0)) + { + if (ins_cap (newword, word) < 0) + return; + } + } +} + +/* + * \param word + */ +void ISpellChecker::missingspace (ichar_t *word) +{ + ichar_t firsthalf[MAX_CAPS][INPUTWORDLEN + MAXAFFIXLEN]; + int firstno; /* Index into first */ + ichar_t * firstp; /* Ptr into current firsthalf word */ + ichar_t newword[INPUTWORDLEN + MAXAFFIXLEN + 1]; + int nfirsthalf; /* No. words saved in 1st half */ + int nsecondhalf; /* No. words saved in 2nd half */ + register ichar_t * p; + ichar_t secondhalf[MAX_CAPS][INPUTWORDLEN + MAXAFFIXLEN]; + int secondno; /* Index into second */ + + /* + ** We don't do words of length less than 3; this keeps us from + ** splitting all two-letter words into two single letters. We + ** also don't do maximum-length words, since adding the space + ** would exceed the size of the "possibilities" array. + */ + nfirsthalf = icharlen (word); + if (nfirsthalf < 3 || nfirsthalf >= INPUTWORDLEN + MAXAFFIXLEN - 1) + return; + icharcpy (newword + 1, word); + for (p = newword + 1; p[1] != '\0'; p++) + { + p[-1] = *p; + *p = '\0'; + if (good (newword, 0, 1, 0, 0)) + { + /* + * Save_cap must be called before good() is called on the + * second half, because it uses state left around by + * good(). This is unfortunate because it wastes a bit of + * time, but I don't think it's a significant performance + * problem. + */ + nfirsthalf = save_cap (newword, word, firsthalf); + if (good (p + 1, 0, 1, 0, 0)) + { + nsecondhalf = save_cap (p + 1, p + 1, secondhalf); + for (firstno = 0; firstno < nfirsthalf; firstno++) + { + firstp = &firsthalf[firstno][p - newword]; + for (secondno = 0; secondno < nsecondhalf; secondno++) + { + *firstp = ' '; + icharcpy (firstp + 1, secondhalf[secondno]); + if (insert (firsthalf[firstno]) < 0) + return; + *firstp = '-'; + if (insert (firsthalf[firstno]) < 0) + return; + } + } + } + } + } +} + +/* + * \param word + * \param pfxopts Options to apply to prefixes + */ +int +ISpellChecker::compoundgood (ichar_t *word, int pfxopts) +{ + ichar_t newword[INPUTWORDLEN + MAXAFFIXLEN]; + register ichar_t * p; + register ichar_t savech; + long secondcap; /* Capitalization of 2nd half */ + + /* + ** If compoundflag is COMPOUND_NEVER, compound words are never ok. + */ + if (m_hashheader.compoundflag == COMPOUND_NEVER) + return 0; + /* + ** Test for a possible compound word (for languages like German that + ** form lots of compounds). + ** + ** This is similar to missingspace, except we quit on the first hit, + ** and we won't allow either member of the compound to be a single + ** letter. + ** + ** We don't do words of length less than 2 * compoundmin, since + ** both halves must at least compoundmin letters. + */ + if (icharlen (word) < 2 * m_hashheader.compoundmin) + return 0; + icharcpy (newword, word); + p = newword + m_hashheader.compoundmin; + for ( ; p[m_hashheader.compoundmin - 1] != 0; p++) + { + savech = *p; + *p = 0; + if (good (newword, 0, 0, pfxopts, FF_COMPOUNDONLY)) + { + *p = savech; + if (good (p, 0, 1, FF_COMPOUNDONLY, 0) + || compoundgood (p, FF_COMPOUNDONLY)) + { + secondcap = whatcap (p); + switch (whatcap (newword)) + { + case ANYCASE: + case CAPITALIZED: + case FOLLOWCASE: /* Followcase can have l.c. suffix */ + return secondcap == ANYCASE; + case ALLCAPS: + return secondcap == ALLCAPS; + } + } + } + else + *p = savech; + } + return 0; +} + +/* + * \param word + */ +void +ISpellChecker::transposedletter (ichar_t *word) +{ + ichar_t newword[INPUTWORDLEN + MAXAFFIXLEN]; + register ichar_t * p; + register ichar_t temp; + + icharcpy (newword, word); + for (p = newword; p[1] != 0; p++) + { + temp = *p; + *p = p[1]; + p[1] = temp; + if (good (newword, 0, 1, 0, 0)) + { + if (ins_cap (newword, word) < 0) + return; + } + temp = *p; + *p = p[1]; + p[1] = temp; + } +} + +/*! + * Insert one or more correctly capitalized versions of word + * + * \param word + * \param pattern + * + * \return + */ +int +ISpellChecker::ins_cap (ichar_t *word, ichar_t *pattern) +{ + int i; /* Index into savearea */ + int nsaved; /* No. of words saved */ + ichar_t savearea[MAX_CAPS][INPUTWORDLEN + MAXAFFIXLEN]; + + nsaved = save_cap (word, pattern, savearea); + for (i = 0; i < nsaved; i++) + { + if (insert (savearea[i]) < 0) + return -1; + } + return 0; +} + +/*! + * Save one or more correctly capitalized versions of word + * + * \param word Word to save + * \param pattern Prototype capitalization pattern + * \param savearea Room to save words + * + * \return + */ +int +ISpellChecker::save_cap (ichar_t *word, ichar_t *pattern, + ichar_t savearea[MAX_CAPS][INPUTWORDLEN + MAXAFFIXLEN]) +{ + int hitno; /* Index into hits array */ + int nsaved; /* Number of words saved */ + int preadd; /* No. chars added to front of root */ + int prestrip; /* No. chars stripped from front */ + int sufadd; /* No. chars added to back of root */ + int sufstrip; /* No. chars stripped from back */ + + if (*word == 0) + return 0; + + for (hitno = m_numhits, nsaved = 0; --hitno >= 0 && nsaved < MAX_CAPS; ) + { + if (m_hits[hitno].prefix) + { + prestrip = m_hits[hitno].prefix->stripl; + preadd = m_hits[hitno].prefix->affl; + } + else + prestrip = preadd = 0; + if (m_hits[hitno].suffix) + { + sufstrip = m_hits[hitno].suffix->stripl; + sufadd = m_hits[hitno].suffix->affl; + } + else + sufadd = sufstrip = 0; + save_root_cap (word, pattern, prestrip, preadd, + sufstrip, sufadd, + m_hits[hitno].dictent, m_hits[hitno].prefix, m_hits[hitno].suffix, + savearea, &nsaved); + } + return nsaved; +} + +/* + * \param word + * \param pattern + * \param prestrip + * \param preadd + * \param sufstrip + * \param sufadd + * \param firstdent + * \param pfxent + * \param sufent + * + * \return + */ +int +ISpellChecker::ins_root_cap (ichar_t *word, ichar_t *pattern, + int prestrip, int preadd, int sufstrip, int sufadd, + struct dent *firstdent, struct flagent *pfxent, struct flagent *sufent) +{ + int i; /* Index into savearea */ + ichar_t savearea[MAX_CAPS][INPUTWORDLEN + MAXAFFIXLEN]; + int nsaved; /* Number of words saved */ + + nsaved = 0; + save_root_cap (word, pattern, prestrip, preadd, sufstrip, sufadd, + firstdent, pfxent, sufent, savearea, &nsaved); + for (i = 0; i < nsaved; i++) + { + if (insert (savearea[i]) < 0) + return -1; + } + return 0; +} + +/* ARGSUSED */ +/*! + * \param word Word to be saved + * \param pattern Capitalization pattern + * \param prestrip No. chars stripped from front + * \param preadd No. chars added to front of root + * \param sufstrip No. chars stripped from back + * \param sufadd No. chars added to back of root + * \param firstdent First dent for root + * \param pfxent Pfx-flag entry for word + * \param sufent Sfx-flag entry for word + * \param savearea Room to save words + * \param nsaved Number saved so far (updated) + */ +void +ISpellChecker::save_root_cap (ichar_t *word, ichar_t *pattern, + int prestrip, int preadd, int sufstrip, int sufadd, + struct dent *firstdent, struct flagent *pfxent, struct flagent *sufent, + ichar_t savearea[MAX_CAPS][INPUTWORDLEN + MAXAFFIXLEN], + int * nsaved) +{ +#ifndef NO_CAPITALIZATION_SUPPORT + register struct dent * dent; +#endif /* NO_CAPITALIZATION_SUPPORT */ + int firstisupper; + ichar_t newword[INPUTWORDLEN + 4 * MAXAFFIXLEN + 4]; +#ifndef NO_CAPITALIZATION_SUPPORT + register ichar_t * p; + int len; + int i; + int limit; +#endif /* NO_CAPITALIZATION_SUPPORT */ + + if (*nsaved >= MAX_CAPS) + return; + icharcpy (newword, word); + firstisupper = myupper (pattern[0]); +#ifdef NO_CAPITALIZATION_SUPPORT + /* + ** Apply the old, simple-minded capitalization rules. + */ + if (firstisupper) + { + if (myupper (pattern[1])) + upcase (newword); + else + { + lowcase (newword); + newword[0] = mytoupper (newword[0]); + } + } + else + lowcase (newword); + icharcpy (savearea[*nsaved], newword); + (*nsaved)++; + return; +#else /* NO_CAPITALIZATION_SUPPORT */ +#define flagsareok(dent) \ + ((pfxent == NULL \ + || TSTMASKBIT (dent->mask, pfxent->flagbit)) \ + && (sufent == NULL \ + || TSTMASKBIT (dent->mask, sufent->flagbit))) + + dent = firstdent; + if ((dent->flagfield & (CAPTYPEMASK | MOREVARIANTS)) == ALLCAPS) + { + upcase (newword); /* Uppercase required */ + icharcpy (savearea[*nsaved], newword); + (*nsaved)++; + return; + } + for (p = pattern; *p; p++) + { + if (mylower (*p)) + break; + } + if (*p == 0) + { + upcase (newword); /* Pattern was all caps */ + icharcpy (savearea[*nsaved], newword); + (*nsaved)++; + return; + } + for (p = pattern + 1; *p; p++) + { + if (myupper (*p)) + break; + } + if (*p == 0) + { + /* + ** The pattern was all-lower or capitalized. If that's + ** legal, insert only that version. + */ + if (firstisupper) + { + if (captype (dent->flagfield) == CAPITALIZED + || captype (dent->flagfield) == ANYCASE) + { + lowcase (newword); + newword[0] = mytoupper (newword[0]); + icharcpy (savearea[*nsaved], newword); + (*nsaved)++; + return; + } + } + else + { + if (captype (dent->flagfield) == ANYCASE) + { + lowcase (newword); + icharcpy (savearea[*nsaved], newword); + (*nsaved)++; + return; + } + } + while (dent->flagfield & MOREVARIANTS) + { + dent = dent->next; + if (captype (dent->flagfield) == FOLLOWCASE + || !flagsareok (dent)) + continue; + if (firstisupper) + { + if (captype (dent->flagfield) == CAPITALIZED) + { + lowcase (newword); + newword[0] = mytoupper (newword[0]); + icharcpy (savearea[*nsaved], newword); + (*nsaved)++; + return; + } + } + else + { + if (captype (dent->flagfield) == ANYCASE) + { + lowcase (newword); + icharcpy (savearea[*nsaved], newword); + (*nsaved)++; + return; + } + } + } + } + /* + ** Either the sample had complex capitalization, or the simple + ** capitalizations (all-lower or capitalized) are illegal. + ** Insert all legal capitalizations, including those that are + ** all-lower or capitalized. If the prototype is capitalized, + ** capitalized all-lower samples. Watch out for affixes. + */ + dent = firstdent; + p = strtosichar (dent->word, 1); + len = icharlen (p); + if (dent->flagfield & MOREVARIANTS) + dent = dent->next; /* Skip place-holder entry */ + for ( ; ; ) + { + if (flagsareok (dent)) + { + if (captype (dent->flagfield) != FOLLOWCASE) + { + lowcase (newword); + if (firstisupper || captype (dent->flagfield) == CAPITALIZED) + newword[0] = mytoupper (newword[0]); + icharcpy (savearea[*nsaved], newword); + (*nsaved)++; + if (*nsaved >= MAX_CAPS) + return; + } + else + { + /* Followcase is the tough one. */ + p = strtosichar (dent->word, 1); + memmove ( + reinterpret_cast<char *>(newword + preadd), + reinterpret_cast<char *>(p + prestrip), + (len - prestrip - sufstrip) * sizeof (ichar_t)); + if (myupper (p[prestrip])) + { + for (i = 0; i < preadd; i++) + newword[i] = mytoupper (newword[i]); + } + else + { + for (i = 0; i < preadd; i++) + newword[i] = mytolower (newword[i]); + } + limit = len + preadd + sufadd - prestrip - sufstrip; + i = len + preadd - prestrip - sufstrip; + p += len - sufstrip - 1; + if (myupper (*p)) + { + for (p = newword + i; i < limit; i++, p++) + *p = mytoupper (*p); + } + else + { + for (p = newword + i; i < limit; i++, p++) + *p = mytolower (*p); + } + icharcpy (savearea[*nsaved], newword); + (*nsaved)++; + if (*nsaved >= MAX_CAPS) + return; + } + } + if ((dent->flagfield & MOREVARIANTS) == 0) + break; /* End of the line */ + dent = dent->next; + } + return; +#endif /* NO_CAPITALIZATION_SUPPORT */ +} + + diff --git a/tdespell2/plugins/ispell/good.cpp b/tdespell2/plugins/ispell/good.cpp new file mode 100644 index 000000000..351106d99 --- /dev/null +++ b/tdespell2/plugins/ispell/good.cpp @@ -0,0 +1,431 @@ +/* vim: set sw=8: -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* enchant + * Copyright (C) 2003 Dom Lachowicz + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + * In addition, as a special exception, Dom Lachowicz + * gives permission to link the code of this program with + * non-LGPL Spelling Provider libraries (eg: a MSFT Office + * spell checker backend) and distribute linked combinations including + * the two. You must obey the GNU Lesser General Public License in all + * respects for all of the code used other than said providers. If you modify + * this file, you may extend this exception to your version of the + * file, but you are not obligated to do so. If you do not wish to + * do so, delete this exception statement from your version. + */ + +/* + * good.c - see if a word or its root word + * is in the dictionary. + * + * Pace Willisson, 1983 + * + * Copyright 1992, 1993, Geoff Kuenning, Granada Hills, CA + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All modifications to the source code must be clearly marked as + * such. Binary redistributions based on modified source code + * must be clearly marked as modified versions in the documentation + * and/or other materials provided with the distribution. + * 4. All advertising materials mentioning features or use of this software + * must display the following acknowledgment: + * This product includes software developed by Geoff Kuenning and + * other unpaid contributors. + * 5. The name of Geoff Kuenning may not be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY GEOFF KUENNING AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL GEOFF KUENNING OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * $Log$ + * Revision 1.1 2004/01/31 16:44:12 zrusin + * ISpell plugin. + * + * Revision 1.4 2003/08/14 17:51:26 dom + * update license - exception clause should be Lesser GPL + * + * Revision 1.3 2003/07/28 20:40:25 dom + * fix up the license clause, further win32-registry proof some directory getting functions + * + * Revision 1.2 2003/07/16 22:52:37 dom + * LGPL + exception license + * + * Revision 1.1 2003/07/15 01:15:04 dom + * ispell enchant backend + * + * Revision 1.2 2003/01/29 05:50:11 hippietrail + * + * Fixed my mess in EncodingManager. + * Changed many C casts to C++ casts. + * + * Revision 1.1 2003/01/24 05:52:32 hippietrail + * + * Refactored ispell code. Old ispell global variables had been put into + * an allocated structure, a pointer to which was passed to many functions. + * I have now made all such functions and variables private members of the + * ISpellChecker class. It was C OO, now it's C++ OO. + * + * I've fixed the makefiles and tested compilation but am unable to test + * operation. Please back out my changes if they cause problems which + * are not obvious or easy to fix. + * + * Revision 1.6 2003/01/06 18:48:38 dom + * ispell cleanup, start of using new 'add' save features + * + * Revision 1.5 2002/09/19 05:31:15 hippietrail + * + * More Ispell cleanup. Conditional globals and DEREF macros are removed. + * K&R function declarations removed, converted to Doxygen style comments + * where possible. No code has been changed (I hope). Compiles for me but + * unable to test. + * + * Revision 1.4 2002/09/17 03:03:29 hippietrail + * + * After seeking permission on the developer list I've reformatted all the + * spelling source which seemed to have parts which used 2, 3, 4, and 8 + * spaces for tabs. It should all look good with our standard 4-space + * tabs now. + * I've concentrated just on indentation in the actual code. More prettying + * could be done. + * * NO code changes were made * + * + * Revision 1.3 2002/09/13 17:20:12 mpritchett + * Fix more warnings for Linux build + * + * Revision 1.2 2001/05/12 16:05:42 thomasf + * Big pseudo changes to ispell to make it pass around a structure rather + * than rely on all sorts of gloabals willy nilly here and there. Also + * fixed our spelling class to work with accepting suggestions once more. + * This code is dirty, gross and ugly (not to mention still not supporting + * multiple hash sized just yet) but it works on my machine and will no + * doubt break other machines. + * + * Revision 1.1 2001/04/15 16:01:24 tomas_f + * moving to spell/xp + * + * Revision 1.5 2000/02/09 22:35:25 sterwill + * Clean up some warnings + * + * Revision 1.4 1998/12/29 14:55:32 eric + * + * I've doctored the ispell code pretty extensively here. It is now + * warning-free on Win32. It also *works* on Win32 now, since I + * replaced all the I/O calls with ANSI standard ones. + * + * Revision 1.3 1998/12/28 23:11:30 eric + * + * modified spell code and integration to build on Windows. + * This is still a hack. + * + * Actually, it doesn't yet WORK on Windows. It just builds. + * SpellCheckInit is failing for some reason. + * + * Revision 1.2 1998/12/28 22:16:22 eric + * + * These changes begin to incorporate the spell checker into AbiWord. Most + * of this is a hack. + * + * 1. added other/spell to the -I list in config/abi_defs + * 2. replaced other/spell/Makefile with one which is more like + * our build system. + * 3. added other/spell to other/Makefile so that the build will now + * dive down and build the spell check library. + * 4. added the AbiSpell library to the Makefiles in wp/main + * 5. added a call to SpellCheckInit in wp/main/unix/UnixMain.cpp. + * This call is a HACK and should be replaced with something + * proper later. + * 6. added code to fv_View.cpp as follows: + * whenever you double-click on a word, the spell checker + * verifies that word and prints its status to stdout. + * + * Caveats: + * 1. This will break the Windows build. I'm going to work on fixing it + * now. + * 2. This only works if your dictionary is in /usr/lib/ispell/american.hash. + * The dictionary location is currently hard-coded. This will be + * fixed as well. + * + * Anyway, such as it is, it works. + * + * Revision 1.1 1998/12/28 18:04:43 davet + * Spell checker code stripped from ispell. At this point, there are + * two external routines... the Init routine, and a check-a-word routine + * which returns a boolean value, and takes a 16 bit char string. + * The code resembles the ispell code as much as possible still. + * + * Revision 1.43 1994/11/02 06:56:05 geoff + * Remove the anyword feature, which I've decided is a bad idea. + * + * Revision 1.42 1994/10/25 05:45:59 geoff + * Add support for an affix that will work with any word, even if there's + * no explicit flag. + * + * Revision 1.41 1994/05/24 06:23:06 geoff + * Let tgood decide capitalization questions, rather than doing it ourselves. + * + * Revision 1.40 1994/05/17 06:44:10 geoff + * Add support for controlled compound formation and the COMPOUNDONLY + * option to affix flags. + * + * Revision 1.39 1994/01/25 07:11:31 geoff + * Get rid of all old RCS log lines in preparation for the 3.1 release. + * + */ + +#include <ctype.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "ispell_checker.h" + + +int good P ((ichar_t * word, int ignoreflagbits, int allhits, + int pfxopts, int sfxopts)); + +#ifndef NO_CAPITALIZATION_SUPPORT + +/*! +** See if this particular capitalization (dent) is legal with these +** particular affixes. +** +** \param dent +** \param hit +** +** \return +*/ +static int entryhasaffixes (struct dent *dent, struct success *hit) +{ + if (hit->prefix && !TSTMASKBIT (dent->mask, hit->prefix->flagbit)) + return 0; + if (hit->suffix && !TSTMASKBIT (dent->mask, hit->suffix->flagbit)) + return 0; + return 1; /* Yes, these affixes are legal */ +} + +/* + * \param word + * \param hit + * \param len + * + * \return + */ +int ISpellChecker::cap_ok (ichar_t *word, struct success *hit, int len) +{ + register ichar_t * dword; + register ichar_t * w; + register struct dent * dent; + ichar_t dentword[INPUTWORDLEN + MAXAFFIXLEN]; + int preadd; + int prestrip; + int sufadd; + ichar_t * limit; + long thiscap; + long dentcap; + + thiscap = whatcap (word); + /* + ** All caps is always legal, regardless of affixes. + */ + preadd = prestrip = sufadd = 0; + if (thiscap == ALLCAPS) + return 1; + else if (thiscap == FOLLOWCASE) + { + /* Set up some constants for the while(1) loop below */ + if (hit->prefix) + { + preadd = hit->prefix->affl; + prestrip = hit->prefix->stripl; + } + else + preadd = prestrip = 0; + sufadd = hit->suffix ? hit->suffix->affl : 0; + } + /* + ** Search the variants for one that matches what we have. Note + ** that thiscap can't be ALLCAPS, since we already returned + ** for that case. + */ + dent = hit->dictent; + for ( ; ; ) + { + dentcap = captype (dent->flagfield); + if (dentcap != thiscap) + { + if (dentcap == ANYCASE && thiscap == CAPITALIZED + && entryhasaffixes (dent, hit)) + return 1; + } + else /* captypes match */ + { + if (thiscap != FOLLOWCASE) + { + if (entryhasaffixes (dent, hit)) + return 1; + } + else + { + /* + ** Make sure followcase matches exactly. + ** Life is made more difficult by the + ** possibility of affixes. Start with + ** the prefix. + */ + strtoichar (dentword, dent->word, INPUTWORDLEN, 1); + dword = dentword; + limit = word + preadd; + if (myupper (dword[prestrip])) + { + for (w = word; w < limit; w++) + { + if (mylower (*w)) + goto doublecontinue; + } + } + else + { + for (w = word; w < limit; w++) + { + if (myupper (*w)) + goto doublecontinue; + } + } + dword += prestrip; + /* Do root part of word */ + limit = dword + len - preadd - sufadd; + while (dword < limit) + { + if (*dword++ != *w++) + goto doublecontinue; + } + /* Do suffix */ + dword = limit - 1; + if (myupper (*dword)) + { + for ( ; *w; w++) + { + if (mylower (*w)) + goto doublecontinue; + } + } + else + { + for ( ; *w; w++) + { + if (myupper (*w)) + goto doublecontinue; + } + } + /* + ** All failure paths go to "doublecontinue," + ** so if we get here it must match. + */ + if (entryhasaffixes (dent, hit)) + return 1; + doublecontinue: ; + } + } + if ((dent->flagfield & MOREVARIANTS) == 0) + break; + dent = dent->next; + } + + /* No matches found */ + return 0; +} +#endif + +#ifndef NO_CAPITALIZATION_SUPPORT +/*! + * \param w Word to look up + * \param ignoreflagbits NZ to ignore affix flags in dict + * \param allhits NZ to ignore case, get every hit + * \param pfxopts Options to apply to prefixes + * \param sfxopts Options to apply to suffixes + * + * \return + */ +int ISpellChecker::good (ichar_t *w, int ignoreflagbits, int allhits, int pfxopts, int sfxopts) +#else +/* ARGSUSED */ +int ISpellChecker::good (ichar_t *w, int ignoreflagbits, int dummy, int pfxopts, int sfxopts) +#endif +{ + ichar_t nword[INPUTWORDLEN + MAXAFFIXLEN]; + register ichar_t * p; + register ichar_t * q; + register int n; + register struct dent * dp; + + /* + ** Make an uppercase copy of the word we are checking. + */ + for (p = w, q = nword; *p; ) + *q++ = mytoupper (*p++); + *q = 0; + n = q - nword; + + m_numhits = 0; + + if ((dp = ispell_lookup (nword, 1)) != NULL) + { + m_hits[0].dictent = dp; + m_hits[0].prefix = NULL; + m_hits[0].suffix = NULL; +#ifndef NO_CAPITALIZATION_SUPPORT + if (allhits || cap_ok (w, &m_hits[0], n)) + m_numhits = 1; +#else + m_numhits = 1; +#endif + } + + if (m_numhits && !allhits) + return 1; + + /* try stripping off affixes */ + + chk_aff (w, nword, n, ignoreflagbits, allhits, pfxopts, sfxopts); + + return m_numhits; +} + + + + diff --git a/tdespell2/plugins/ispell/hash.cpp b/tdespell2/plugins/ispell/hash.cpp new file mode 100644 index 000000000..03bd880bb --- /dev/null +++ b/tdespell2/plugins/ispell/hash.cpp @@ -0,0 +1,204 @@ +/* vim: set sw=8: -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* enchant + * Copyright (C) 2003 Dom Lachowicz + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + * In addition, as a special exception, Dom Lachowicz + * gives permission to link the code of this program with + * non-LGPL Spelling Provider libraries (eg: a MSFT Office + * spell checker backend) and distribute linked combinations including + * the two. You must obey the GNU Lesser General Public License in all + * respects for all of the code used other than said providers. If you modify + * this file, you may extend this exception to your version of the + * file, but you are not obligated to do so. If you do not wish to + * do so, delete this exception statement from your version. + */ + +/* + * hash.c - a simple hash function for ispell + * + * Pace Willisson, 1983 + * + * Copyright 1992, 1993, Geoff Kuenning, Granada Hills, CA + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All modifications to the source code must be clearly marked as + * such. Binary redistributions based on modified source code + * must be clearly marked as modified versions in the documentation + * and/or other materials provided with the distribution. + * 4. All advertising materials mentioning features or use of this software + * must display the following acknowledgment: + * This product includes software developed by Geoff Kuenning and + * other unpaid contributors. + * 5. The name of Geoff Kuenning may not be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY GEOFF KUENNING AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL GEOFF KUENNING OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * $Log$ + * Revision 1.1 2004/01/31 16:44:12 zrusin + * ISpell plugin. + * + * Revision 1.4 2003/08/14 17:51:27 dom + * update license - exception clause should be Lesser GPL + * + * Revision 1.3 2003/07/28 20:40:26 dom + * fix up the license clause, further win32-registry proof some directory getting functions + * + * Revision 1.2 2003/07/16 22:52:39 dom + * LGPL + exception license + * + * Revision 1.1 2003/07/15 01:15:05 dom + * ispell enchant backend + * + * Revision 1.2 2003/01/29 05:50:11 hippietrail + * + * Fixed my mess in EncodingManager. + * Changed many C casts to C++ casts. + * + * Revision 1.1 2003/01/24 05:52:33 hippietrail + * + * Refactored ispell code. Old ispell global variables had been put into + * an allocated structure, a pointer to which was passed to many functions. + * I have now made all such functions and variables private members of the + * ISpellChecker class. It was C OO, now it's C++ OO. + * + * I've fixed the makefiles and tested compilation but am unable to test + * operation. Please back out my changes if they cause problems which + * are not obvious or easy to fix. + * + * Revision 1.5 2002/09/19 05:31:15 hippietrail + * + * More Ispell cleanup. Conditional globals and DEREF macros are removed. + * K&R function declarations removed, converted to Doxygen style comments + * where possible. No code has been changed (I hope). Compiles for me but + * unable to test. + * + * Revision 1.4 2002/09/17 03:03:29 hippietrail + * + * After seeking permission on the developer list I've reformatted all the + * spelling source which seemed to have parts which used 2, 3, 4, and 8 + * spaces for tabs. It should all look good with our standard 4-space + * tabs now. + * I've concentrated just on indentation in the actual code. More prettying + * could be done. + * * NO code changes were made * + * + * Revision 1.3 2002/09/13 17:20:13 mpritchett + * Fix more warnings for Linux build + * + * Revision 1.2 2001/05/12 16:05:42 thomasf + * Big pseudo changes to ispell to make it pass around a structure rather + * than rely on all sorts of gloabals willy nilly here and there. Also + * fixed our spelling class to work with accepting suggestions once more. + * This code is dirty, gross and ugly (not to mention still not supporting + * multiple hash sized just yet) but it works on my machine and will no + * doubt break other machines. + * + * Revision 1.1 2001/04/15 16:01:24 tomas_f + * moving to spell/xp + * + * Revision 1.3 1998/12/29 14:55:33 eric + * + * I've doctored the ispell code pretty extensively here. It is now + * warning-free on Win32. It also *works* on Win32 now, since I + * replaced all the I/O calls with ANSI standard ones. + * + * Revision 1.2 1998/12/28 23:11:30 eric + * + * modified spell code and integration to build on Windows. + * This is still a hack. + * + * Actually, it doesn't yet WORK on Windows. It just builds. + * SpellCheckInit is failing for some reason. + * + * Revision 1.1 1998/12/28 18:04:43 davet + * Spell checker code stripped from ispell. At this point, there are + * two external routines... the Init routine, and a check-a-word routine + * which returns a boolean value, and takes a 16 bit char string. + * The code resembles the ispell code as much as possible still. + * + * Revision 1.20 1994/01/25 07:11:34 geoff + * Get rid of all old RCS log lines in preparation for the 3.1 release. + * + */ + +#include "ispell_checker.h" + +/* + * The following hash algorithm is due to Ian Dall, with slight modifications + * by Geoff Kuenning to reflect the results of testing with the English + * dictionaries actually distributed with ispell. + */ +#define HASHSHIFT 5 + +#ifdef NO_CAPITALIZATION_SUPPORT +#define HASHUPPER(c) c +#else /* NO_CAPITALIZATION_SUPPORT */ +#define HASHUPPER(c) mytoupper(c) +#endif /* NO_CAPITALIZATION_SUPPORT */ + +/* + * \param s + * \param hashtblsize + */ +int ISpellChecker::hash (ichar_t *s, int hashtblsize) +{ + register long h = 0; + register int i; + +#ifdef ICHAR_IS_CHAR + for (i = 4; i-- && *s != 0; ) + h = (h << 8) | HASHUPPER (*s++); +#else /* ICHAR_IS_CHAR */ + for (i = 2; i-- && *s != 0; ) + h = (h << 16) | HASHUPPER (*s++); +#endif /* ICHAR_IS_CHAR */ + while (*s != 0) + { + /* + * We have to do circular shifts the hard way, since C doesn't + * have them even though the hardware probably does. Oh, well. + */ + h = (h << HASHSHIFT) + | ((h >> (32 - HASHSHIFT)) & ((1 << HASHSHIFT) - 1)); + h ^= HASHUPPER (*s++); + } + return static_cast<unsigned long>(h) % hashtblsize; +} diff --git a/tdespell2/plugins/ispell/ispell.h b/tdespell2/plugins/ispell/ispell.h new file mode 100644 index 000000000..587defc07 --- /dev/null +++ b/tdespell2/plugins/ispell/ispell.h @@ -0,0 +1,801 @@ +/* vim: set sw=8: -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* enchant + * Copyright (C) 2003 Dom Lachowicz + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + * In addition, as a special exception, Dom Lachowicz + * gives permission to link the code of this program with + * non-LGPL Spelling Provider libraries (eg: a MSFT Office + * spell checker backend) and distribute linked combinations including + * the two. You must obey the GNU Lesser General Public License in all + * respects for all of the code used other than said providers. If you modify + * this file, you may extend this exception to your version of the + * file, but you are not obligated to do so. If you do not wish to + * do so, delete this exception statement from your version. + */ + +#ifndef ISPELL_H +#define ISPELL_H + +#include <sys/types.h> + +/* + * $Id$ + */ + +/* + * Copyright 1992, 1993, Geoff Kuenning, Granada Hills, CA + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All modifications to the source code must be clearly marked as + * such. Binary redistributions based on modified source code + * must be clearly marked as modified versions in the documentation + * and/or other materials provided with the distribution. + * 4. All advertising materials mentioning features or use of this software + * must display the following acknowledgment: + * This product includes software developed by Geoff Kuenning and + * other unpaid contributors. + * 5. The name of Geoff Kuenning may not be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY GEOFF KUENNING AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL GEOFF KUENNING OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * $Log$ + * Revision 1.1 2004/01/31 16:44:12 zrusin + * ISpell plugin. + * + * Revision 1.4 2003/08/14 17:51:27 dom + * update license - exception clause should be Lesser GPL + * + * Revision 1.3 2003/07/28 20:40:26 dom + * fix up the license clause, further win32-registry proof some directory getting functions + * + * Revision 1.2 2003/07/16 22:52:40 dom + * LGPL + exception license + * + * Revision 1.1 2003/07/15 01:15:06 dom + * ispell enchant backend + * + * Revision 1.10 2003/01/24 05:52:33 hippietrail + * + * Refactored ispell code. Old ispell global variables had been put into + * an allocated structure, a pointer to which was passed to many functions. + * I have now made all such functions and variables private members of the + * ISpellChecker class. It was C OO, now it's C++ OO. + * + * I've fixed the makefiles and tested compilation but am unable to test + * operation. Please back out my changes if they cause problems which + * are not obvious or easy to fix. + * + * Revision 1.9 2002/09/19 05:31:15 hippietrail + * + * More Ispell cleanup. Conditional globals and DEREF macros are removed. + * K&R function declarations removed, converted to Doxygen style comments + * where possible. No code has been changed (I hope). Compiles for me but + * unable to test. + * + * Revision 1.8 2002/09/17 03:03:29 hippietrail + * + * After seeking permission on the developer list I've reformatted all the + * spelling source which seemed to have parts which used 2, 3, 4, and 8 + * spaces for tabs. It should all look good with our standard 4-space + * tabs now. + * I've concentrated just on indentation in the actual code. More prettying + * could be done. + * * NO code changes were made * + * + * Revision 1.7 2002/03/22 14:31:57 dom + * fix mg's compile problem + * + * Revision 1.6 2002/03/05 16:55:52 dom + * compound word support, tested against swedish + * + * Revision 1.5 2001/08/10 18:32:40 dom + * Spelling and iconv updates. god, i hate iconv + * + * Revision 1.4 2001/06/26 16:33:27 dom + * 128 StringChars and some other stuff + * + * Revision 1.3 2001/05/12 16:05:42 thomasf + * Big pseudo changes to ispell to make it pass around a structure rather + * than rely on all sorts of gloabals willy nilly here and there. Also + * fixed our spelling class to work with accepting suggestions once more. + * This code is dirty, gross and ugly (not to mention still not supporting + * multiple hash sized just yet) but it works on my machine and will no + * doubt break other machines. + * + * Revision 1.2 2001/04/18 00:59:36 thomasf + * Removed the duplicate declarations of variables that was causing build + * to bail. This new ispell stuff is a total mess. + * + * Revision 1.1 2001/04/15 16:01:24 tomas_f + * moving to spell/xp + * + * Revision 1.13 2001/04/13 12:33:12 tamlin + * ispell can now be used from C++ + * + * Revision 1.12 2001/03/25 01:30:02 tomb + * 1. Fixed ispell #define problems on Win32 + * 2. Changed the way that togglable toolbars are tracked so that Full + * Screen mode works right on Windows + * 3. Fixed SET_GATHER macro in ap_Win32Dialog_Options.h + * 4. Fixed Toggle Case dialog to default to Sentence Case when loaded + * 5. Added #define for Auto Save checkbox (though I haven't updated the + * Prefs dialog yet) + * + * Revision 1.11 2001/03/24 23:28:41 dom + * Make C++ aware and watch out for VOID on Win32 + * + * Revision 1.10 1999/12/21 18:46:29 sterwill + * ispell patch for non-English dictionaries by Henrik Berg <henrik@lansen.se> + * + * Revision 1.9 1999/10/20 03:19:35 paul + * Hacked ispell code to ignore any characters that don't fit in the lookup tables loaded from the dictionary. It ain't pretty, but at least we don't crash there any more. + * + * Revision 1.8 1999/09/29 23:33:32 justin + * Updates to the underlying ispell-based code to support suggested corrections. + * + * Revision 1.7 1999/04/13 17:12:51 jeff + * Applied "Darren O. Benham" <gecko@benham.net> spell check changes. + * Fixed crash on Win32 with the new code. + * + * Revision 1.6 1999/01/07 05:14:22 sterwill + * So it builds on Unix... it might break win32 in ispell, since ut_types + * is no longer included. This is a temporary solution to a larger problem + * of including C++ headers in C source files. + * + * Revision 1.6 1999/01/07 05:14:22 sterwill + * So it builds on Unix... it might break win32 in ispell, since ut_types + * is no longer included. This is a temporary solution to a larger problem + * of including C++ headers in C source files. + * + * Revision 1.5 1999/01/07 05:02:25 sterwill + * Checking in half-broken to avoid tree lossage + * + * Revision 1.4 1999/01/07 01:07:48 paul + * Fixed spell leaks. + * + * Revision 1.3 1998/12/29 15:03:54 eric + * + * minor fix to ispell.h to get things to compile on Linux again. + * + * Revision 1.2 1998/12/29 14:55:33 eric + * + * I've doctored the ispell code pretty extensively here. It is now + * warning-free on Win32. It also *works* on Win32 now, since I + * replaced all the I/O calls with ANSI standard ones. + * + * Revision 1.1 1998/12/28 18:04:43 davet + * Spell checker code stripped from ispell. At this point, there are + * two external routines... the Init routine, and a check-a-word routine + * which returns a boolean value, and takes a 16 bit char string. + * The code resembles the ispell code as much as possible still. + * + * Revision 1.68 1995/03/06 02:42:41 geoff + * Be vastly more paranoid about parenthesizing macro arguments. This + * fixes a bug in defmt.c where a complex argument was passed to + * isstringch. + * + * Revision 1.67 1995/01/03 19:24:12 geoff + * Get rid of a non-global declaration. + * + * Revision 1.66 1994/12/27 23:08:49 geoff + * Fix a lot of subtly bad assumptions about the widths of ints and longs + * which only show up on 64-bit machines like the Cray and the DEC Alpha. + * + * Revision 1.65 1994/11/02 06:56:10 geoff + * Remove the anyword feature, which I've decided is a bad idea. + * + * Revision 1.64 1994/10/25 05:46:18 geoff + * Add the FF_ANYWORD flag for defining an affix that will apply to any + * word, even if not explicitly specified. (Good for French.) + * + * Revision 1.63 1994/09/16 04:48:28 geoff + * Make stringdups and laststringch unsigned ints, and dupnos a plain + * int, so that we can handle more than 128 stringchars and stringchar + * types. + * + * Revision 1.62 1994/09/01 06:06:39 geoff + * Change erasechar/killchar to uerasechar/ukillchar to avoid + * shared-library problems on HP systems. + * + * Revision 1.61 1994/08/31 05:58:35 geoff + * Add contextoffset, used in -a mode to handle extremely long lines. + * + * Revision 1.60 1994/05/17 06:44:15 geoff + * Add support for controlled compound formation and the COMPOUNDONLY + * option to affix flags. + * + * Revision 1.59 1994/03/15 06:25:16 geoff + * Change deftflag's initialization so we can tell if -t/-n appeared. + * + * Revision 1.58 1994/02/07 05:53:28 geoff + * Add typecasts to the the 7-bit versions of ichar* routines + * + * Revision 1.57 1994/01/25 07:11:48 geoff + * Get rid of all old RCS log lines in preparation for the 3.1 release. + * + */ + +#include <stdio.h> +/* #include "ut_types.h" */ + +#include "ispell_def.h" + +#ifdef __cplusplus +extern "C" { +#endif /* c++ */ + +/* largest amount that a word might be extended by adding affixes */ +#ifndef MAXAFFIXLEN +#define MAXAFFIXLEN 20 +#endif + +/* +** Number of mask bits (affix flags) supported. Must be 32, 64, 128, or +** 256. If MASKBITS is 32 or 64, there are really only 26 or 58 flags +** available, respectively. If it is 32, the flags are named with the +** 26 English uppercase letters; lowercase will be converted to uppercase. +** If MASKBITS is 64, the 58 flags are named 'A' through 'z' in ASCII +** order, including the 6 special characters from 'Z' to 'a': "[\]^_`". +** If MASKBITS is 128 or 256, all the 7-bit or 8-bit characters, +** respectively, are theoretically available, though a few (newline, slash, +** null byte) are pretty hard to actually use successfully. +** +** Note that a number of non-English affix files depend on having a +** larger value for MASKBITS. See the affix files for more +** information. +*/ + +#ifndef MASKBITS +#define MASKBITS 64 +#endif + +extern int gnMaskBits; + +/* +** C type to use for masks. This should be a type that the processor +** accesses efficiently. +** +** MASKTYPE_WIDTH must correctly reflect the number of bits in a +** MASKTYPE. Unfortunately, it is also required to be a constant at +** preprocessor time, which means you can't use the sizeof operator to +** define it. +** +** Note that MASKTYPE *must* match MASKTYPE_WIDTH or you may get +** division-by-zero errors! +*/ +#ifndef MASKTYPE +#define MASKTYPE long +#endif +#ifndef MASKTYPE_WIDTH +#define MASKTYPE_WIDTH 32 +#endif + + /* program: this should be coded now in init */ + +#if MASKBITS < MASKTYPE_WIDTH +#undef MASKBITS +#define MASKBITS MASKTYPE_WIDTH +#endif /* MASKBITS < MASKTYPE_WIDTH */ + +/* +** Maximum hash table fullness percentage. Larger numbers trade space +** for time. +**/ +#ifndef MAXPCT +#define MAXPCT 70 /* Expand table when 70% full */ +#endif + +/* +** Maximum number of "string" characters that can be defined in a +** language (affix) file. Don't forget that an upper/lower string +** character counts as two! +*/ +#ifndef MAXSTRINGCHARS +#define MAXSTRINGCHARS 128 +#endif /* MAXSTRINGCHARS */ + +/* +** Maximum length of a "string" character. The default is appropriate for +** nroff-style characters starting with a backslash. +*/ +#ifndef MAXSTRINGCHARLEN +#define MAXSTRINGCHARLEN 10 +#endif /* MAXSTRINGCHARLEN */ + +/* +** Maximum number of "hits" expected on a word. This is basically the +** number of different ways different affixes can produce the same word. +** For example, with "english.aff", "brothers" can be produced 3 ways: +** "brothers," "brother+s", or "broth+ers". If this is too low, no major +** harm will be done, but ispell may occasionally forget a capitalization. +*/ +#ifndef MAX_HITS +#define MAX_HITS 10 +#endif + +/* +** Maximum number of capitalization variations expected in any word. +** Besides the obvious all-lower, all-upper, and capitalized versions, +** this includes followcase variants. If this is too low, no real +** harm will be done, but ispell may occasionally fail to suggest a +** correct capitalization. +*/ +#ifndef MAX_CAPS +#define MAX_CAPS 10 +#endif /* MAX_CAPS */ + +/* buffer size to use for file names if not in sys/param.h */ +#ifndef MAXPATHLEN +#define MAXPATHLEN 512 +#endif + +/* +** Maximum language-table search size. Smaller numbers make ispell +** run faster, at the expense of more memory (the lowest reasonable value +** is 2). If a given character appears in a significant position in +** more than MAXSEARCH suffixes, it will be given its own index table. +** If you change this, define INDEXDUMP in lookup.c to be sure your +** index table looks reasonable. +*/ +#ifndef MAXSEARCH +#define MAXSEARCH 4 +#endif + +#if defined(__STDC__) || defined(__cplusplus) +#define P(x) x + #ifndef VOID + #define VOID void + #endif +#else /* __STDC__ */ +#define P(x) () + #ifndef VOID + #define VOID char + #endif +#define const +#endif /* __STDC__ */ + +#ifdef NO8BIT +#define SET_SIZE 128 +#else +#define SET_SIZE 256 +#endif + +#define MASKSIZE (gnMaskBits / MASKTYPE_WIDTH) + +#ifdef lint +extern int TSTMASKBIT P ((MASKTYPE * mask, int bit)); +#else /* lint */ +/* The following is really testing for MASKSIZE <= 1, but cpp can't do that */ +#define TSTMASKBIT(mask, bit) \ + ((mask)[(bit) / MASKTYPE_WIDTH] & \ + ((MASKTYPE) 1 << ((bit) & (MASKTYPE_WIDTH - 1)))) +#endif /* lint */ + +#if MASKBITS > 64 +#define FULLMASKSET +#endif + +#if MASKBITS <= 32 + #define FLAGBASE ((MASKTYPE_WIDTH) - 6) +#else + # if MASKBITS <= 64 + #define FLAGBASE ((MASKTYPE_WIDTH) - 6) + # else + #define FLAGBASE 0 + # endif +#endif + +/* +** Data type for internal word storage. If necessary, we use shorts rather +** than chars so that string characters can be encoded as a single unit. +*/ +#if (SET_SIZE + MAXSTRINGCHARS) <= 256 +#ifndef lint +#define ICHAR_IS_CHAR +#endif /* lint */ +#endif + +#ifdef ICHAR_IS_CHAR +typedef unsigned char ichar_t; /* Internal character */ +#define icharlen(s) strlen ((char *) (s)) +#define icharcpy(a, b) strcpy ((char *) (a), (char *) (b)) +#define icharcmp(a, b) strcmp ((char *) (a), (char *) (b)) +#define icharncmp(a, b, n) strncmp ((char *) (a), (char *) (b), (n)) +#define chartoichar(x) ((ichar_t) (x)) +#else +typedef unsigned short ichar_t; /* Internal character */ +#define chartoichar(x) ((ichar_t) (unsigned char) (x)) + +/* + * Structure used to record data about successful lookups; these values + * are used in the ins_root_cap routine to produce correct capitalizations. + */ +struct success +{ + struct dent * dictent; /* Header of dict entry chain for wd */ + struct flagent * prefix; /* Prefix flag used, or NULL */ + struct flagent * suffix; /* Suffix flag used, or NULL */ +}; + +ichar_t* icharcpy (ichar_t* out, ichar_t* in); +int icharlen (ichar_t* in); +int icharcmp (ichar_t* s1, ichar_t* s2); +int icharncmp (ichar_t* s1, ichar_t* s2, int n); + +#endif + +struct dent +{ + struct dent * next; + char * word; + MASKTYPE mask[2]; +#ifdef FULLMASKSET + char flags; +#endif +}; + +/* +** Flags in the directory entry. If FULLMASKSET is undefined, these are +** stored in the highest bits of the last longword of the mask field. If +** FULLMASKSET is defined, they are stored in the extra "flags" field. +#ifndef NO_CAPITALIZATION_SUPPORT +** +** If a word has only one capitalization form, and that form is not +** FOLLOWCASE, it will have exactly one entry in the dictionary. The +** legal capitalizations will be indicated by the 2-bit capitalization +** field, as follows: +** +** ALLCAPS The word must appear in all capitals. +** CAPITALIZED The word must be capitalized (e.g., London). +** It will also be accepted in all capitals. +** ANYCASE The word may appear in lowercase, capitalized, +** or all-capitals. +** +** Regardless of the capitalization flags, the "word" field of the entry +** will point to an all-uppercase copy of the word. This is to simplify +** the large portion of the code that doesn't care about capitalization. +** Ispell will generate the correct version when needed. +** +** If a word has more than one capitalization, there will be multiple +** entries for it, linked together by the "next" field. The initial +** entry for such words will be a dummy entry, primarily for use by code +** that ignores capitalization. The "word" field of this entry will +** again point to an all-uppercase copy of the word. The "mask" field +** will contain the logical OR of the mask fields of all variants. +** A header entry is indicated by a capitalization type of ALLCAPS, +** with the MOREVARIANTS bit set. +** +** The following entries will define the individual variants. Each +** entry except the last has the MOREVARIANTS flag set, and each +** contains one of the following capitalization options: +** +** ALLCAPS The word must appear in all capitals. +** CAPITALIZED The word must be capitalized (e.g., London). +** It will also be accepted in all capitals. +** FOLLOWCASE The word must be capitalized exactly like the +** sample in the entry. Prefix (suffix) characters +** must be rendered in the case of the first (last) +** "alphabetic" character. It will also be accepted +** in all capitals. ("Alphabetic" means "mentioned +** in a 'casechars' statement".) +** ANYCASE The word may appear in lowercase, capitalized, +** or all-capitals. +** +** The "mask" field for the entry contains only the affix flag bits that +** are legal for that capitalization. The "word" field will be null +** except for FOLLOWCASE entries, where it will point to the +** correctly-capitalized spelling of the root word. +** +** It is worth discussing why the ALLCAPS option is used in +** the header entry. The header entry accepts an all-capitals +** version of the root plus every affix (this is always legal, since +** words get capitalized in headers and so forth). Further, all of +** the following variant entries will reject any all-capitals form +** that is illegal due to an affix. +** +** Finally, note that variations in the KEEP flag can cause a multiple-variant +** entry as well. For example, if the personal dictionary contains "ALPHA", +** (KEEP flag set) and the user adds "alpha" with the KEEP flag clear, a +** multiple-variant entry will be created so that "alpha" will be accepted +** but only "ALPHA" will actually be kept. +#endif +*/ +#ifdef FULLMASKSET +#define flagfield flags +#else +#define flagfield mask[1] +#endif +#define USED ((MASKTYPE) 1 << (FLAGBASE + 0)) +#define KEEP ((MASKTYPE) 1 << (FLAGBASE + 1)) +#ifdef NO_CAPITALIZATION_SUPPORT +#define ALLFLAGS (USED | KEEP) +#else /* NO_CAPITALIZATION_SUPPORT */ +#define ANYCASE ((MASKTYPE) 0 << (FLAGBASE + 2)) +#define ALLCAPS ((MASKTYPE) 1 << (FLAGBASE + 2)) +#define CAPITALIZED ((MASKTYPE) 2 << (FLAGBASE + 2)) +#define FOLLOWCASE ((MASKTYPE) 3 << (FLAGBASE + 2)) +#define CAPTYPEMASK ((MASKTYPE) 3 << (FLAGBASE + 2)) +#define MOREVARIANTS ((MASKTYPE) 1 << (FLAGBASE + 4)) +#define ALLFLAGS (USED | KEEP | CAPTYPEMASK | MOREVARIANTS) +#define captype(x) ((x) & CAPTYPEMASK) +#endif /* NO_CAPITALIZATION_SUPPORT */ + +/* + * Language tables used to encode prefix and suffix information. + */ +struct flagent +{ + ichar_t * strip; /* String to strip off */ + ichar_t * affix; /* Affix to append */ + short flagbit; /* Flag bit this ent matches */ + short stripl; /* Length of strip */ + short affl; /* Length of affix */ + short numconds; /* Number of char conditions */ + short flagflags; /* Modifiers on this flag */ + char conds[SET_SIZE + MAXSTRINGCHARS]; /* Adj. char conds */ +}; + +/* + * Bits in flagflags + */ +#define FF_CROSSPRODUCT (1 << 0) /* Affix does cross-products */ +#define FF_COMPOUNDONLY (1 << 1) /* Afx works in compounds */ + +union ptr_union /* Aid for building flg ptrs */ +{ + struct flagptr * fp; /* Pointer to more indexing */ + struct flagent * ent; /* First of a list of ents */ +}; + +struct flagptr +{ + union ptr_union pu; /* Ent list or more indexes */ + int numents; /* If zero, pu.fp is valid */ +}; + +/* + * Description of a single string character type. + */ +struct strchartype +{ + char * name; /* Name of the type */ + char * deformatter; /* Deformatter to use */ + char * suffixes; /* File suffixes, null seps */ +}; + +/* + * Header placed at the beginning of the hash file. + */ +struct hashheader +{ + unsigned short magic; /* Magic number for ID */ + unsigned short compileoptions; /* How we were compiled */ + short maxstringchars; /* Max # strchrs we support */ + short maxstringcharlen; /* Max strchr len supported */ + short compoundmin; /* Min lth of compound parts */ + short compoundbit; /* Flag 4 compounding roots */ + int stringsize; /* Size of string table */ + int lstringsize; /* Size of lang. str tbl */ + int tblsize; /* No. entries in hash tbl */ + int stblsize; /* No. entries in sfx tbl */ + int ptblsize; /* No. entries in pfx tbl */ + int sortval; /* Largest sort ID assigned */ + int nstrchars; /* No. strchars defined */ + int nstrchartype; /* No. strchar types */ + int strtypestart; /* Start of strtype table */ + char nrchars[5]; /* Nroff special characters */ + char texchars[13]; /* TeX special characters */ + char compoundflag; /* Compund-word handling */ + char defhardflag; /* Default tryveryhard flag */ + char flagmarker; /* "Start-of-flags" char */ + unsigned short sortorder[SET_SIZE + MAXSTRINGCHARS]; /* Sort ordering */ + ichar_t lowerconv[SET_SIZE + MAXSTRINGCHARS]; /* Lower-conversion table */ + ichar_t upperconv[SET_SIZE + MAXSTRINGCHARS]; /* Upper-conversion table */ + char wordchars[SET_SIZE + MAXSTRINGCHARS]; /* NZ for chars found in wrds */ + char upperchars[SET_SIZE + MAXSTRINGCHARS]; /* NZ for uppercase chars */ + char lowerchars[SET_SIZE + MAXSTRINGCHARS]; /* NZ for lowercase chars */ + char boundarychars[SET_SIZE + MAXSTRINGCHARS]; /* NZ for boundary chars */ + char stringstarts[SET_SIZE]; /* NZ if char can start str */ + char stringchars[MAXSTRINGCHARS][MAXSTRINGCHARLEN + 1]; /* String chars */ + unsigned int stringdups[MAXSTRINGCHARS]; /* No. of "base" char */ + int dupnos[MAXSTRINGCHARS]; /* Dup char ID # */ + unsigned short magic2; /* Second magic for dbl chk */ +}; + +/* hash table magic number */ +#define MAGIC 0x9602 + +/* compile options, put in the hash header for consistency checking */ +#ifdef NO8BIT +# define MAGIC8BIT 0x01 +#else +# define MAGIC8BIT 0x00 +#endif +#ifdef NO_CAPITALIZATION_SUPPORT +# define MAGICCAPITALIZATION 0x00 +#else +# define MAGICCAPITALIZATION 0x02 +#endif +# define MAGICMASKSET 0x04 + +#if MASKBITS <= 32 +# define MAGICMASKSET 0x00 +#else +# if MASKBITS <= 64 +# else +# if MASKBITS <= 128 +# define MAGICMASKSET 0x08 +# else +# define MAGICMASKSET 0x0C +# endif +# endif +#endif + +#define COMPILEOPTIONS (MAGIC8BIT | MAGICCAPITALIZATION | MAGICMASKSET) + +/* +** Offsets into the nroff special-character array +*/ +#define NRLEFTPAREN hashheader.nrchars[0] +#define NRRIGHTPAREN hashheader.nrchars[1] +#define NRDOT hashheader.nrchars[2] +#define NRBACKSLASH hashheader.nrchars[3] +#define NRSTAR hashheader.nrchars[4] + +/* +** Offsets into the TeX special-character array +*/ +#define TEXLEFTPAREN hashheader.texchars[0] +#define TEXRIGHTPAREN hashheader.texchars[1] +#define TEXLEFTSQUARE hashheader.texchars[2] +#define TEXRIGHTSQUARE hashheader.texchars[3] +#define TEXLEFTCURLY hashheader.texchars[4] +#define TEXRIGHTCURLY hashheader.texchars[5] +#define TEXLEFTANGLE hashheader.texchars[6] +#define TEXRIGHTANGLE hashheader.texchars[7] +#define TEXBACKSLASH hashheader.texchars[8] +#define TEXDOLLAR hashheader.texchars[9] +#define TEXSTAR hashheader.texchars[10] +#define TEXDOT hashheader.texchars[11] +#define TEXPERCENT hashheader.texchars[12] + +/* +** Values for compoundflag +*/ +#define COMPOUND_NEVER 0 /* Compound words are never good */ +#define COMPOUND_ANYTIME 1 /* Accept run-together words */ +#define COMPOUND_CONTROLLED 2 /* Compounds controlled by afx flags */ +/* +** These macros are similar to the ones above, but they take into account +** the possibility of string characters. Note well that they take a POINTER, +** not a character. +** +** The "l_" versions set "len" to the length of the string character as a +** handy side effect. (Note that the global "laststringch" is also set, +** and sometimes used, by these macros.) +** +** The "l1_" versions go one step further and guarantee that the "len" +** field is valid for *all* characters, being set to 1 even if the macro +** returns false. This macro is a great example of how NOT to write +** readable C. +*/ +/*TF NOTE: This is actually defined in code (makedent) now */ +#if 0 +#define isstringch(ptr, canon) (isstringstart (*(ptr)) \ + && stringcharlen ((ptr), (canon)) > 0) +#define l_isstringch(ptr, len, canon) \ + (isstringstart (*(ptr)) \ + && (len = stringcharlen ((ptr), (canon))) \ + > 0) +#define l1_isstringch(ptr, len, canon) \ + (len = 1, \ + isstringstart ((unsigned char)(*(ptr))) \ + && ((len = \ + stringcharlen ((ptr), (canon))) \ + > 0 \ + ? 1 : (len = 1, 0))) +#endif + +/* + * Sizes of buffers returned by ichartosstr/strtosichar. + */ +#define ICHARTOSSTR_SIZE (INPUTWORDLEN + 4 * MAXAFFIXLEN + 4) +#define STRTOSICHAR_SIZE ((INPUTWORDLEN + 4 * MAXAFFIXLEN + 4) \ + * sizeof (ichar_t)) +/* TF CHANGE: We should fill this as a structure + and then use it throughout. +*/ + +/* + * Initialized variables. These are generated using macros so that they + * may be consistently declared in all programs. Numerous examples of + * usage are given below. + */ +#ifdef MAIN +#define INIT(decl, init) decl = init +#else +#define INIT(decl, init) extern decl +#endif + +#ifdef MINIMENU +INIT (int minimenusize, 2); /* MUST be either 2 or zero */ +#else /* MINIMENU */ +INIT (int minimenusize, 0); /* MUST be either 2 or zero */ +#endif /* MINIMENU */ + +INIT (int eflag, 0); /* NZ for expand mode */ +INIT (int dumpflag, 0); /* NZ to do dump mode */ +INIT (int fflag, 0); /* NZ if -f specified */ +#ifndef USG +INIT (int sflag, 0); /* NZ to stop self after EOF */ +#endif +INIT (int vflag, 0); /* NZ to display characters as M-xxx */ +INIT (int xflag, DEFNOBACKUPFLAG); /* NZ to suppress backups */ +INIT (int deftflag, -1); /* NZ for TeX mode by default */ +INIT (int tflag, DEFTEXFLAG); /* NZ for TeX mode in current file */ +INIT (int prefstringchar, -1); /* Preferred string character type */ + +INIT (int terse, 0); /* NZ for "terse" mode */ + +INIT (char tempfile[MAXPATHLEN], ""); /* Name of file we're spelling into */ + +INIT (int minword, MINWORD); /* Longest always-legal word */ +INIT (int sortit, 1); /* Sort suggestions alphabetically */ +INIT (int compoundflag, -1); /* How to treat compounds: see above */ +INIT (int tryhardflag, -1); /* Always call tryveryhard */ + +INIT (char * currentfile, NULL); /* Name of current input file */ + +/* Odd numbers for math mode in LaTeX; even for LR or paragraph mode */ +INIT (int math_mode, 0); +/* P -- paragraph or LR mode + * b -- parsing a \begin statement + * e -- parsing an \end statement + * r -- parsing a \ref type of argument. + * m -- looking for a \begin{minipage} argument. + */ +INIT (char LaTeX_Mode, 'P'); + +#ifdef __cplusplus +} +#endif /* c++ */ + +#endif /* ISPELL_H */ diff --git a/tdespell2/plugins/ispell/ispell_checker.cpp b/tdespell2/plugins/ispell/ispell_checker.cpp new file mode 100644 index 000000000..c07d9a55f --- /dev/null +++ b/tdespell2/plugins/ispell/ispell_checker.cpp @@ -0,0 +1,505 @@ +/* vim: set sw=8: -*- Mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* tdespell2 - adopted from Enchant + * Copyright (C) 2003 Dom Lachowicz + * Copyright (C) 2004 Zack Rusin <zack@kde.org> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + * In addition, as a special exception, Dom Lachowicz + * gives permission to link the code of this program with + * non-LGPL Spelling Provider libraries (eg: a MSFT Office + * spell checker backend) and distribute linked combinations including + * the two. You must obey the GNU Lesser General Public License in all + * respects for all of the code used other than said providers. If you modify + * this file, you may extend this exception to your version of the + * file, but you are not obligated to do so. If you do not wish to + * do so, delete this exception statement from your version. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include <string> +#include <vector> + +#include "sp_spell.h" +#include "ispell_checker.h" + +#include <tqmap.h> +#include <tqdir.h> +#include <tqfileinfo.h> + +/***************************************************************************/ + +typedef struct str_ispell_map +{ + const char * lang; + const char * dict; + const char * enc; +} IspellMap; + +static const char *ispell_dirs [] = { + "/usr/lib/ispell", + "/usr/local/lib/ispell", + "/usr/local/share/ispell", + "/usr/share/ispell", + "/usr/pkg/lib", + 0 +}; +static const IspellMap ispell_map [] = { + {"ca" ,"catala.hash" ,"iso-8859-1" }, + {"ca_ES" ,"catala.hash" ,"iso-8859-1" }, + {"cs" ,"czech.hash" ,"iso-8859-2" }, + {"cs_CZ" ,"czech.hash" ,"iso-8859-2" }, + {"da" ,"dansk.hash" ,"iso-8859-1" }, + {"da_DK" ,"dansk.hash" ,"iso-8859-1" }, + {"de" ,"deutsch.hash" ,"iso-8859-1" }, + {"de_CH" ,"swiss.hash" ,"iso-8859-1" }, + {"de_AT" ,"deutsch.hash" ,"iso-8859-1" }, + {"de_DE" ,"deutsch.hash" ,"iso-8859-1" }, + {"el" ,"ellhnika.hash" ,"iso-8859-7" }, + {"el_GR" ,"ellhnika.hash" ,"iso-8859-7" }, + {"en" ,"british.hash" ,"iso-8859-1" }, + {"en_AU" ,"british.hash" ,"iso-8859-1" }, + {"en_BZ" ,"british.hash" ,"iso-8859-1" }, + {"en_CA" ,"british.hash" ,"iso-8859-1" }, + {"en_GB" ,"british.hash" ,"iso-8859-1" }, + {"en_IE" ,"british.hash" ,"iso-8859-1" }, + {"en_JM" ,"british.hash" ,"iso-8859-1" }, + {"en_NZ" ,"british.hash" ,"iso-8859-1" }, + {"en_TT" ,"british.hash" ,"iso-8859-1" }, + {"en_ZA" ,"british.hash" ,"iso-8859-1" }, + {"en_ZW" ,"british.hash" ,"iso-8859-1" }, + {"en_PH" ,"american.hash" ,"iso-8859-1" }, + {"en_US" ,"american.hash" ,"iso-8859-1" }, + {"eo" ,"esperanto.hash" ,"iso-8859-3" }, + {"es" ,"espanol.hash" ,"iso-8859-1" }, + {"es_AR" ,"espanol.hash" ,"iso-8859-1" }, + {"es_BO" ,"espanol.hash" ,"iso-8859-1" }, + {"es_CL" ,"espanol.hash" ,"iso-8859-1" }, + {"es_CO" ,"espanol.hash" ,"iso-8859-1" }, + {"es_CR" ,"espanol.hash" ,"iso-8859-1" }, + {"es_DO" ,"espanol.hash" ,"iso-8859-1" }, + {"es_EC" ,"espanol.hash" ,"iso-8859-1" }, + {"es_ES" ,"espanol.hash" ,"iso-8859-1" }, + {"es_GT" ,"espanol.hash" ,"iso-8859-1" }, + {"es_HN" ,"espanol.hash" ,"iso-8859-1" }, + {"es_MX" ,"espanol.hash" ,"iso-8859-1" }, + {"es_NI" ,"espanol.hash" ,"iso-8859-1" }, + {"es_PA" ,"espanol.hash" ,"iso-8859-1" }, + {"es_PE" ,"espanol.hash" ,"iso-8859-1" }, + {"es_PR" ,"espanol.hash" ,"iso-8859-1" }, + {"es_PY" ,"espanol.hash" ,"iso-8859-1" }, + {"es_SV" ,"espanol.hash" ,"iso-8859-1" }, + {"es_UY" ,"espanol.hash" ,"iso-8859-1" }, + {"es_VE" ,"espanol.hash" ,"iso-8859-1" }, + {"fi" ,"finnish.hash" ,"iso-8859-1" }, + {"fi_FI" ,"finnish.hash" ,"iso-8859-1" }, + {"fr" ,"francais.hash" ,"iso-8859-1" }, + {"fr_BE" ,"francais.hash" ,"iso-8859-1" }, + {"fr_CA" ,"francais.hash" ,"iso-8859-1" }, + {"fr_CH" ,"francais.hash" ,"iso-8859-1" }, + {"fr_FR" ,"francais.hash" ,"iso-8859-1" }, + {"fr_LU" ,"francais.hash" ,"iso-8859-1" }, + {"fr_MC" ,"francais.hash" ,"iso-8859-1" }, + {"hu" ,"hungarian.hash" ,"iso-8859-2" }, + {"hu_HU" ,"hungarian.hash" ,"iso-8859-2" }, + {"ga" ,"irish.hash" ,"iso-8859-1" }, + {"ga_IE" ,"irish.hash" ,"iso-8859-1" }, + {"gl" ,"galician.hash" ,"iso-8859-1" }, + {"gl_ES" ,"galician.hash" ,"iso-8859-1" }, + {"ia" ,"interlingua.hash" ,"iso-8859-1" }, + {"it" ,"italian.hash" ,"iso-8859-1" }, + {"it_IT" ,"italian.hash" ,"iso-8859-1" }, + {"it_CH" ,"italian.hash" ,"iso-8859-1" }, + {"la" ,"mlatin.hash" ,"iso-8859-1" }, + {"la_IT" ,"mlatin.hash" ,"iso-8859-1" }, + {"lt" ,"lietuviu.hash" ,"iso-8859-13" }, + {"lt_LT" ,"lietuviu.hash" ,"iso-8859-13" }, + {"nl" ,"nederlands.hash" ,"iso-8859-1" }, + {"nl_NL" ,"nederlands.hash" ,"iso-8859-1" }, + {"nl_BE" ,"nederlands.hash" ,"iso-8859-1" }, + {"nb" ,"norsk.hash" ,"iso-8859-1" }, + {"nb_NO" ,"norsk.hash" ,"iso-8859-1" }, + {"nn" ,"nynorsk.hash" ,"iso-8859-1" }, + {"nn_NO" ,"nynorsk.hash" ,"iso-8859-1" }, + {"no" ,"norsk.hash" ,"iso-8859-1" }, + {"no_NO" ,"norsk.hash" ,"iso-8859-1" }, + {"pl" ,"polish.hash" ,"iso-8859-2" }, + {"pl_PL" ,"polish.hash" ,"iso-8859-2" }, + {"pt" ,"brazilian.hash" ,"iso-8859-1" }, + {"pt_BR" ,"brazilian.hash" ,"iso-8859-1" }, + {"pt_PT" ,"portugues.hash" ,"iso-8859-1" }, + {"ru" ,"russian.hash" ,"koi8-r" }, + {"ru_MD" ,"russian.hash" ,"koi8-r" }, + {"ru_RU" ,"russian.hash" ,"koi8-r" }, + {"sc" ,"sardinian.hash" ,"iso-8859-1" }, + {"sc_IT" ,"sardinian.hash" ,"iso-8859-1" }, + {"sk" ,"slovak.hash" ,"iso-8859-2" }, + {"sk_SK" ,"slovak.hash" ,"iso-8859-2" }, + {"sl" ,"slovensko.hash" ,"iso-8859-2" }, + {"sl_SI" ,"slovensko.hash" ,"iso-8859-2" }, + {"sv" ,"svenska.hash" ,"iso-8859-1" }, + {"sv_SE" ,"svenska.hash" ,"iso-8859-1" }, + {"uk" ,"ukrainian.hash" ,"koi8-u" }, + {"uk_UA" ,"ukrainian.hash" ,"koi8-u" }, + {"yi" ,"yiddish-yivo.hash" ,"utf-8" } +}; + +static const size_t size_ispell_map = ( sizeof(ispell_map) / sizeof((ispell_map)[0]) ); +static TQMap<TQString, TQString> ispell_dict_map; + + +void +ISpellChecker::try_autodetect_charset(const char * const inEncoding) +{ + if (inEncoding && strlen(inEncoding)) + { + m_translate_in = TQTextCodec::codecForName(inEncoding); + } +} + +/***************************************************************************/ +/***************************************************************************/ + +ISpellChecker::ISpellChecker() + : deftflag(-1), + prefstringchar(-1), + m_bSuccessfulInit(false), + m_BC(NULL), + m_cd(NULL), + m_cl(NULL), + m_cm(NULL), + m_ho(NULL), + m_nd(NULL), + m_so(NULL), + m_se(NULL), + m_ti(NULL), + m_te(NULL), + m_hashstrings(NULL), + m_hashtbl(NULL), + m_pflaglist(NULL), + m_sflaglist(NULL), + m_chartypes(NULL), + m_infile(NULL), + m_outfile(NULL), + m_askfilename(NULL), + m_Trynum(0), + m_translate_in(0) +{ + memset(m_sflagindex,0,sizeof(m_sflagindex)); + memset(m_pflagindex,0,sizeof(m_pflagindex)); +} + +#ifndef FREEP +#define FREEP(p) do { if (p) free(p); } while (0) +#endif + +ISpellChecker::~ISpellChecker() +{ + if (m_bSuccessfulInit) { + // only cleanup our mess if we were successfully initialized + + clearindex (m_pflagindex); + clearindex (m_sflagindex); + } + + FREEP(m_hashtbl); + FREEP(m_hashstrings); + FREEP(m_sflaglist); + FREEP(m_chartypes); + + delete m_translate_in; + m_translate_in = 0; +} + +bool +ISpellChecker::checkWord( const TQString& utf8Word ) +{ + ichar_t iWord[INPUTWORDLEN + MAXAFFIXLEN]; + if (!m_bSuccessfulInit) + return false; + + if (!utf8Word || utf8Word.length() >= (INPUTWORDLEN + MAXAFFIXLEN) || utf8Word.isEmpty()) + return false; + + bool retVal = false; + TQCString out; + if (!m_translate_in) + return false; + else { + /* convert to 8bit string and null terminate */ + int len_out = utf8Word.length(); + + out = m_translate_in->fromUnicode( utf8Word, len_out ); + } + + if (!strtoichar(iWord, out.data(), INPUTWORDLEN + MAXAFFIXLEN, 0)) + { + if (good(iWord, 0, 0, 1, 0) == 1 || + compoundgood(iWord, 1) == 1) + { + retVal = true; + } + } + + return retVal; +} + +TQStringList +ISpellChecker::suggestWord(const TQString& utf8Word) +{ + ichar_t iWord[INPUTWORDLEN + MAXAFFIXLEN]; + int c; + + if (!m_bSuccessfulInit) + return TQStringList(); + + if (utf8Word.isEmpty() || utf8Word.length() >= (INPUTWORDLEN + MAXAFFIXLEN) || + utf8Word.length() == 0) + return TQStringList(); + + TQCString out; + if (!m_translate_in) + return TQStringList(); + else + { + /* convert to 8bit string and null terminate */ + + int len_out = utf8Word.length(); + out = m_translate_in->fromUnicode( utf8Word, len_out ); + } + + if (!strtoichar(iWord, out.data(), INPUTWORDLEN + MAXAFFIXLEN, 0)) + makepossibilities(iWord); + else + return TQStringList(); + + TQStringList sugg_arr; + for (c = 0; c < m_pcount; c++) + { + TQString utf8Word; + + if (!m_translate_in) + { + /* copy to 8bit string and null terminate */ + utf8Word = TQString::fromUtf8( m_possibilities[c] ); + } + else + { + /* convert to 32bit string and null terminate */ + utf8Word = m_translate_in->toUnicode( m_possibilities[c] ); + } + + sugg_arr.append( utf8Word ); + } + + return sugg_arr; +} + +static void +s_buildHashNames (std::vector<std::string> & names, const char * dict) +{ + const char * tmp = 0; + int i = 0; + + names.clear (); + + while ( (tmp = ispell_dirs[i++]) ) { + TQCString maybeFile = TQCString( tmp ) + '/'; + maybeFile += dict; + names.push_back( maybeFile.data() ); + } +} + +static void +s_allDics() +{ + const char * tmp = 0; + int i = 0; + + while ( (tmp = ispell_dirs[i++]) ) { + TQDir dir( tmp ); + TQStringList lst = dir.entryList( "*.hash" ); + for ( TQStringList::Iterator it = lst.begin(); it != lst.end(); ++it ) { + TQFileInfo info( *it ); + for (size_t i = 0; i < size_ispell_map; i++) + { + const IspellMap * mapping = (const IspellMap *)(&(ispell_map[i])); + if (!strcmp (info.fileName().latin1(), mapping->dict)) + { + ispell_dict_map.insert( mapping->lang, *it ); + } + } + } + } +} + +TQValueList<TQString> +ISpellChecker::allDics() +{ + if ( ispell_dict_map.empty() ) + s_allDics(); + + return ispell_dict_map.keys(); +} + +TQString +ISpellChecker::loadDictionary (const char * szdict) +{ + std::vector<std::string> dict_names; + + s_buildHashNames (dict_names, szdict); + + for (size_t i = 0; i < dict_names.size(); i++) + { + if (linit(const_cast<char*>(dict_names[i].c_str())) >= 0) + return dict_names[i].c_str(); + } + + return TQString::null; +} + +/*! + * Load ispell dictionary hash file for given language. + * + * \param szLang - The language tag ("en-US") we want to use + * \return The name of the dictionary file + */ +bool +ISpellChecker::loadDictionaryForLanguage ( const char * szLang ) +{ + TQString hashname; + + const char * encoding = NULL; + const char * szFile = NULL; + + for (size_t i = 0; i < size_ispell_map; i++) + { + const IspellMap * mapping = (const IspellMap *)(&(ispell_map[i])); + if (!strcmp (szLang, mapping->lang)) + { + szFile = mapping->dict; + encoding = mapping->enc; + break; + } + } + + if (!szFile || !strlen(szFile)) + return false; + + alloc_ispell_struct(); + + hashname = loadDictionary(szFile); + if (hashname.isEmpty()) + return false; + + // one of the two above calls succeeded + setDictionaryEncoding (hashname, encoding); + + return true; +} + +void +ISpellChecker::setDictionaryEncoding( const TQString& hashname, const char * encoding ) +{ + /* Get Hash encoding from XML file. This should always work! */ + try_autodetect_charset(encoding); + + if (m_translate_in) + { + /* We still have to setup prefstringchar*/ + prefstringchar = findfiletype("utf8", 1, deftflag < 0 ? &deftflag + : static_cast<int *>(NULL)); + + if (prefstringchar < 0) + { + std::string teststring; + for(int n1 = 1; n1 <= 15; n1++) + { + teststring = "latin" + n1; + prefstringchar = findfiletype(teststring.c_str(), 1, + deftflag < 0 ? &deftflag : static_cast<int *>(NULL)); + if (prefstringchar >= 0) + break; + } + } + + return; /* success */ + } + + /* Test for UTF-8 first */ + prefstringchar = findfiletype("utf8", 1, deftflag < 0 ? &deftflag : static_cast<int *>(NULL)); + if (prefstringchar >= 0) + { + m_translate_in = TQTextCodec::codecForName("utf8"); + } + + if (m_translate_in) + return; /* success */ + + /* Test for "latinN" */ + if (!m_translate_in) + { + /* Look for "altstringtype" names from latin1 to latin15 */ + for(int n1 = 1; n1 <= 15; n1++) + { + TQString teststring = TQString("latin%1").arg(n1); + prefstringchar = findfiletype(teststring.latin1(), 1, + deftflag < 0 ? &deftflag : static_cast<int *>(NULL)); + if (prefstringchar >= 0) + { + //FIXME: latin1 might be wrong + m_translate_in = TQTextCodec::codecForName( teststring.latin1() ); + break; + } + } + } + + /* If nothing found, use latin1 */ + if (!m_translate_in) + { + m_translate_in = TQTextCodec::codecForName("latin1"); + } +} + +bool +ISpellChecker::requestDictionary(const char *szLang) +{ + if (!loadDictionaryForLanguage (szLang)) + { + // handle a shortened version of the language tag: en_US => en + std::string shortened_dict (szLang); + size_t uscore_pos; + + if ((uscore_pos = shortened_dict.rfind ('_')) != ((size_t)-1)) { + shortened_dict = shortened_dict.substr(0, uscore_pos); + if (!loadDictionaryForLanguage (shortened_dict.c_str())) + return false; + } else + return false; + } + + m_bSuccessfulInit = true; + + if (prefstringchar < 0) + m_defdupchar = 0; + else + m_defdupchar = prefstringchar; + + return true; +} diff --git a/tdespell2/plugins/ispell/ispell_checker.h b/tdespell2/plugins/ispell/ispell_checker.h new file mode 100644 index 000000000..84faa34f9 --- /dev/null +++ b/tdespell2/plugins/ispell/ispell_checker.h @@ -0,0 +1,273 @@ +/* vim: set sw=8: -*- Mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* tdespell2 - adopted from Enchant + * Copyright (C) 2003 Dom Lachowicz + * Copyright (C) 2004 Zack Rusin <zack@kde.org> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + * In addition, as a special exception, Dom Lachowicz + * gives permission to link the code of this program with + * non-LGPL Spelling Provider libraries (eg: a MSFT Office + * spell checker backend) and distribute linked combinations including + * the two. You must obey the GNU Lesser General Public License in all + * respects for all of the code used other than said providers. If you modify + * this file, you may extend this exception to your version of the + * file, but you are not obligated to do so. If you do not wish to + * do so, delete this exception statement from your version. + */ + +#ifndef ISPELL_CHECKER_H +#define ISPELL_CHECKER_H + +#include "ispell.h" + +#include <tqstringlist.h> +#include <tqvaluelist.h> +#include <tqtextcodec.h> +#include <tqstring.h> + + +class ISpellChecker +{ +public: + ISpellChecker(); + ~ISpellChecker(); + + bool checkWord(const TQString& word); + TQStringList suggestWord(const TQString& word); + + bool requestDictionary (const char * szLang); + static TQValueList<TQString> allDics(); +private: + ISpellChecker(const ISpellChecker&); // no impl + void operator=(const ISpellChecker&); // no impl + + TQString loadDictionary (const char * szLang ); + bool loadDictionaryForLanguage ( const char * szLang ); + void setDictionaryEncoding ( const TQString& hashname, const char * enc ); + + // + // The member functions after this point were formerly global functions + // passed a context structure pointer... + // + + void try_autodetect_charset(const char * inEncoding); + + // + // From ispell correct.c + // + + int casecmp P ((char * a, char * b, int canonical)); + void makepossibilities P ((ichar_t * word)); + int insert P ((ichar_t * word)); +#ifndef NO_CAPITALIZATION_SUPPORT + void wrongcapital P ((ichar_t * word)); +#endif /* NO_CAPITALIZATION_SUPPORT */ + void wrongletter P ((ichar_t * word)); + void extraletter P ((ichar_t * word)); + void missingletter P ((ichar_t * word)); + void missingspace P ((ichar_t * word)); + int compoundgood P ((ichar_t * word, int pfxopts)); + void transposedletter P ((ichar_t * word)); + int ins_cap P ((ichar_t * word, ichar_t * pattern)); + int save_cap P ((ichar_t * word, ichar_t * pattern, + ichar_t savearea[MAX_CAPS][INPUTWORDLEN + MAXAFFIXLEN])); + int ins_root_cap P ((ichar_t * word, ichar_t * pattern, + int prestrip, int preadd, int sufstrip, int sufadd, + struct dent * firstdent, struct flagent * pfxent, + struct flagent * sufent)); + void save_root_cap P ((ichar_t * word, ichar_t * pattern, + int prestrip, int preadd, int sufstrip, int sufadd, + struct dent * firstdent, struct flagent * pfxent, + struct flagent * sufent, + ichar_t savearea[MAX_CAPS][INPUTWORDLEN + MAXAFFIXLEN], + int * nsaved)); + + // + // From ispell good.c + // + + int good (ichar_t* w, int ignoreflagbits, int allhits, int pfxopts, int sfxopts); + void chk_aff (ichar_t* word, ichar_t* ucword, int len, int ignoreflagbits, int allhits, int pfxopts, int sfxopts); + int linit(char*); + struct dent * ispell_lookup (ichar_t* s, int dotree); + int strtoichar (ichar_t* out, char* in, int outlen, int canonical); + int ichartostr (char* out, ichar_t* in, int outlen, int canonical); + char * ichartosstr (ichar_t* in, int canonical); + int findfiletype (const char * name, int searchnames, int * deformatter); + long whatcap (ichar_t* word); + + /* + HACK: macros replaced with function implementations + so we could do a side-effect-free check for unicode + characters which aren't in hashheader + */ + char myupper(ichar_t c); + char mylower(ichar_t c); + int myspace(ichar_t c); + char iswordch(ichar_t c); + char isboundarych(ichar_t c); + char isstringstart(ichar_t c); + ichar_t mytolower(ichar_t c); + ichar_t mytoupper(ichar_t c); + +#ifndef ICHAR_IS_CHAR + int cap_ok (ichar_t* word, struct success* hit, int len); + + int hash (ichar_t* s, int hashtblsize); +#endif + + // + // From ispell lookup.c + // + + void clearindex P ((struct flagptr * indexp)); + void initckch P ((char *)); + + void alloc_ispell_struct(); + void free_ispell_struct(); + + // + // From ispell makedent.c + // + + int addvheader P ((struct dent * ent)); + void upcase P ((ichar_t * string)); + void lowcase P ((ichar_t * string)); + void chupcase P ((char * s)); + + int stringcharlen P ((char * bufp, int canonical)); + ichar_t * strtosichar P ((char * in, int canonical)); + char * printichar P ((int in)); + + // + // From ispell tgood.c + // + + void pfx_list_chk P ((ichar_t * word, ichar_t * ucword, + int len, int optflags, int sfxopts, struct flagptr * ind, + int ignoreflagbits, int allhits)); + void chk_suf P ((ichar_t * word, ichar_t * ucword, int len, + int optflags, struct flagent * pfxent, int ignoreflagbits, + int allhits)); + void suf_list_chk P ((ichar_t * word, ichar_t * ucword, int len, + struct flagptr * ind, int optflags, struct flagent * pfxent, + int ignoreflagbits, int allhits)); + int expand_pre P ((char * croot, ichar_t * rootword, + MASKTYPE mask[], int option, char * extra)); + int pr_pre_expansion P ((char * croot, ichar_t * rootword, + struct flagent * flent, MASKTYPE mask[], int option, + char * extra)); + int expand_suf P ((char * croot, ichar_t * rootword, + MASKTYPE mask[], int optflags, int option, char * extra)); + int pr_suf_expansion P ((char * croot, ichar_t * rootword, + struct flagent * flent, int option, char * extra)); + void forcelc P ((ichar_t * dst, int len)); + + /* this is used for converting form unsigned short to UCS-4 */ + + int deftflag; /* NZ for TeX mode by default */ + int prefstringchar; /* Preferred string character type */ + bool m_bSuccessfulInit; + + // + // The members after this point were formerly global variables + // in the original ispell code + // + + char * m_BC; /* backspace if not ^H */ + char * m_cd; /* clear to end of display */ + char * m_cl; /* clear display */ + char * m_cm; /* cursor movement */ + char * m_ho; /* home */ + char * m_nd; /* non-destructive space */ + char * m_so; /* standout */ + char * m_se; /* standout end */ + int m_sg; /* space taken by so/se */ + char * m_ti; /* terminal initialization sequence */ + char * m_te; /* terminal termination sequence */ + int m_li; /* lines */ + int m_co; /* columns */ + + char m_ctoken[INPUTWORDLEN + MAXAFFIXLEN]; /* Current token as char */ + ichar_t m_itoken[INPUTWORDLEN + MAXAFFIXLEN]; /* Ctoken as ichar_t str */ + + int m_numhits; /* number of hits in dictionary lookups */ + struct success + m_hits[MAX_HITS]; /* table of hits gotten in lookup */ + + char * m_hashstrings; /* Strings in hash table */ + struct hashheader + m_hashheader; /* Header of hash table */ + struct dent * + m_hashtbl; /* Main hash table, for dictionary */ + int m_hashsize; /* Size of main hash table */ + + char m_hashname[MAXPATHLEN]; /* Name of hash table file */ + + int m_aflag; /* NZ if -a or -A option specified */ + int m_cflag; /* NZ if -c (crunch) option */ + int m_lflag; /* NZ if -l (list) option */ + int m_incfileflag; /* whether xgets() acts exactly like gets() */ + int m_nodictflag; /* NZ if dictionary not needed */ + + int m_uerasechar; /* User's erase character, from stty */ + int m_ukillchar; /* User's kill character */ + + unsigned int m_laststringch; /* Number of last string character */ + int m_defdupchar; /* Default duplicate string type */ + + int m_numpflags; /* Number of prefix flags in table */ + int m_numsflags; /* Number of suffix flags in table */ + struct flagptr m_pflagindex[SET_SIZE + MAXSTRINGCHARS]; + /* Fast index to pflaglist */ + struct flagent * m_pflaglist; /* Prefix flag control list */ + struct flagptr m_sflagindex[SET_SIZE + MAXSTRINGCHARS]; + /* Fast index to sflaglist */ + struct flagent * m_sflaglist; /* Suffix flag control list */ + + struct strchartype * /* String character type collection */ + m_chartypes; + + FILE * m_infile; /* File being corrected */ + FILE * m_outfile; /* Corrected copy of infile */ + + char * m_askfilename; /* File specified in -f option */ + + int m_changes; /* NZ if changes made to cur. file */ + int m_readonly; /* NZ if current file is readonly */ + int m_quit; /* NZ if we're done with this file */ + +#define MAXPOSSIBLE 100 /* Max no. of possibilities to generate */ + + char m_possibilities[MAXPOSSIBLE][INPUTWORDLEN + MAXAFFIXLEN]; + /* Table of possible corrections */ + int m_pcount; /* Count of possibilities generated */ + int m_maxposslen; /* Length of longest possibility */ + int m_easypossibilities; /* Number of "easy" corrections found */ + /* ..(defined as those using legal affixes) */ + + /* + * The following array contains a list of characters that should be tried + * in "missingletter." Note that lowercase characters are omitted. + */ + int m_Trynum; /* Size of "Try" array */ + ichar_t m_Try[SET_SIZE + MAXSTRINGCHARS]; + + TQTextCodec *m_translate_in; /* Selected translation from/to Unicode */ +}; + +#endif /* ISPELL_CHECKER_H */ diff --git a/tdespell2/plugins/ispell/ispell_def.h b/tdespell2/plugins/ispell/ispell_def.h new file mode 100644 index 000000000..b3d149c43 --- /dev/null +++ b/tdespell2/plugins/ispell/ispell_def.h @@ -0,0 +1,34 @@ +/* vim: set sw=8: -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* enchant + * Copyright (C) 2003 Dom Lachowicz + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + * In addition, as a special exception, Dom Lachowicz + * gives permission to link the code of this program with + * non-LGPL Spelling Provider libraries (eg: a MSFT Office + * spell checker backend) and distribute linked combinations including + * the two. You must obey the GNU Lesser General Public License in all + * respects for all of the code used other than said providers. If you modify + * this file, you may extend this exception to your version of the + * file, but you are not obligated to do so. If you do not wish to + * do so, delete this exception statement from your version. + */ + +/* largest word accepted from a file by any input routine, plus one */ +#ifndef INPUTWORDLEN +#define INPUTWORDLEN 100 +#endif diff --git a/tdespell2/plugins/ispell/lookup.cpp b/tdespell2/plugins/ispell/lookup.cpp new file mode 100644 index 000000000..b815ebd98 --- /dev/null +++ b/tdespell2/plugins/ispell/lookup.cpp @@ -0,0 +1,764 @@ +/* vim: set sw=8: -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* tdespell2 - adopted from enchant + * Copyright (C) 2003 Dom Lachowicz + * Copyright (C) 2004 Zack Rusin <zack@kde.org> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + * In addition, as a special exception, Dom Lachowicz + * gives permission to link the code of this program with + * non-LGPL Spelling Provider libraries (eg: a MSFT Office + * spell checker backend) and distribute linked combinations including + * the two. You must obey the GNU General Public License in all + * respects for all of the code used other than said providers. If you modify + * this file, you may extend this exception to your version of the + * file, but you are not obligated to do so. If you do not wish to + * do so, delete this exception statement from your version. + */ + +/* + * lookup.c - see if a word appears in the dictionary + * + * Pace Willisson, 1983 + * + * Copyright 1987, 1988, 1989, 1992, 1993, Geoff Kuenning, Granada Hills, CA + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All modifications to the source code must be clearly marked as + * such. Binary redistributions based on modified source code + * must be clearly marked as modified versions in the documentation + * and/or other materials provided with the distribution. + * 4. All advertising materials mentioning features or use of this software + * must display the following acknowledgment: + * This product includes software developed by Geoff Kuenning and + * other unpaid contributors. + * 5. The name of Geoff Kuenning may not be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY GEOFF KUENNING AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL GEOFF KUENNING OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * $Log$ + * Revision 1.1 2004/01/31 16:44:12 zrusin + * ISpell plugin. + * + * Revision 1.7 2003/09/25 02:44:48 dom + * bug 5813 + * + * Revision 1.6 2003/08/26 13:20:40 dom + * ispell crasher fix, implement enchant_dictionary_release + * + * Revision 1.5 2003/08/26 13:08:03 uwog + * Fix segfault when the requested dictionary couldn't be found. + * + * Revision 1.4 2003/08/14 16:27:36 dom + * update some documentation + * + * Revision 1.3 2003/07/28 20:40:27 dom + * fix up the license clause, further win32-registry proof some directory getting functions + * + * Revision 1.2 2003/07/16 22:52:47 dom + * LGPL + exception license + * + * Revision 1.1 2003/07/15 01:15:07 dom + * ispell enchant backend + * + * Revision 1.3 2003/01/29 05:50:12 hippietrail + * + * Fixed my mess in EncodingManager. + * Changed many C casts to C++ casts. + * + * Revision 1.2 2003/01/25 03:16:05 hippietrail + * + * An UT_ICONV_INVALID fix which escaped the last commit. + * + * Revision 1.1 2003/01/24 05:52:34 hippietrail + * + * Refactored ispell code. Old ispell global variables had been put into + * an allocated structure, a pointer to which was passed to many functions. + * I have now made all such functions and variables private members of the + * ISpellChecker class. It was C OO, now it's C++ OO. + * + * I've fixed the makefiles and tested compilation but am unable to test + * operation. Please back out my changes if they cause problems which + * are not obvious or easy to fix. + * + * Revision 1.12 2003/01/06 18:48:39 dom + * ispell cleanup, start of using new 'add' save features + * + * Revision 1.11 2002/09/19 05:31:17 hippietrail + * + * More Ispell cleanup. Conditional globals and DEREF macros are removed. + * K&R function declarations removed, converted to Doxygen style comments + * where possible. No code has been changed (I hope). Compiles for me but + * unable to test. + * + * Revision 1.10 2002/09/17 03:03:30 hippietrail + * + * After seeking permission on the developer list I've reformatted all the + * spelling source which seemed to have parts which used 2, 3, 4, and 8 + * spaces for tabs. It should all look good with our standard 4-space + * tabs now. + * I've concentrated just on indentation in the actual code. More prettying + * could be done. + * * NO code changes were made * + * + * Revision 1.9 2002/09/13 17:20:13 mpritchett + * Fix more warnings for Linux build + * + * Revision 1.8 2002/05/03 09:49:43 fjfranklin + * o hash downloader update (Gabriel Gerhardsson) + * - Comment out the "Can't open <dictionary>" printf. + * - Make the progressbar more clean at the begining of the download. + * - Add support for tarballs that doesn't have the full path included + * - Fix copyright headers on the newly added files (*HashDownloader.*) + * + * Revision 1.7 2001/08/27 19:06:30 dom + * Lots of compilation fixes + * + * Revision 1.6 2001/08/10 18:32:40 dom + * Spelling and iconv updates. god, i hate iconv + * + * Revision 1.5 2001/08/10 09:57:49 hub + * Patch by sobomax@FreeBSD.org + * #include "iconv.h" directive is missed from src/other/spell/xp/lookup.c and + * src/wp/impexp/xp/ie_imp_RTF.cpp. + * See bug 1823 + * + * Revision 1.4 2001/07/18 17:46:01 dom + * Module changes, and fix compiler warnings + * + * Revision 1.3 2001/06/12 21:32:49 dom + * More ispell work... + * + * Revision 1.2 2001/05/12 16:05:42 thomasf + * Big pseudo changes to ispell to make it pass around a structure rather + * than rely on all sorts of gloabals willy nilly here and there. Also + * fixed our spelling class to work with accepting suggestions once more. + * This code is dirty, gross and ugly (not to mention still not supporting + * multiple hash sized just yet) but it works on my machine and will no + * doubt break other machines. + * + * Revision 1.1 2001/04/15 16:01:24 tomas_f + * moving to spell/xp + * + * Revision 1.7 1999/09/29 23:33:32 justin + * Updates to the underlying ispell-based code to support suggested corrections. + * + * Revision 1.6 1999/04/13 17:12:51 jeff + * Applied "Darren O. Benham" <gecko@benham.net> spell check changes. + * Fixed crash on Win32 with the new code. + * + * Revision 1.5 1999/01/07 01:07:48 paul + * Fixed spell leaks. + * + * Revision 1.5 1999/01/07 01:07:48 paul + * Fixed spell leaks. + * + * Revision 1.4 1998/12/29 14:55:33 eric + * + * I've doctored the ispell code pretty extensively here. It is now + * warning-free on Win32. It also *works* on Win32 now, since I + * replaced all the I/O calls with ANSI standard ones. + * + * Revision 1.3 1998/12/28 23:11:30 eric + * + * modified spell code and integration to build on Windows. + * This is still a hack. + * + * Actually, it doesn't yet WORK on Windows. It just builds. + * SpellCheckInit is failing for some reason. + * + * Revision 1.2 1998/12/28 22:16:22 eric + * + * These changes begin to incorporate the spell checker into AbiWord. Most + * of this is a hack. + * + * 1. added other/spell to the -I list in config/abi_defs + * 2. replaced other/spell/Makefile with one which is more like + * our build system. + * 3. added other/spell to other/Makefile so that the build will now + * dive down and build the spell check library. + * 4. added the AbiSpell library to the Makefiles in wp/main + * 5. added a call to SpellCheckInit in wp/main/unix/UnixMain.cpp. + * This call is a HACK and should be replaced with something + * proper later. + * 6. added code to fv_View.cpp as follows: + * whenever you double-click on a word, the spell checker + * verifies that word and prints its status to stdout. + * + * Caveats: + * 1. This will break the Windows build. I'm going to work on fixing it + * now. + * 2. This only works if your dictionary is in /usr/lib/ispell/american.hash. + * The dictionary location is currently hard-coded. This will be + * fixed as well. + * + * Anyway, such as it is, it works. + * + * Revision 1.1 1998/12/28 18:04:43 davet + * Spell checker code stripped from ispell. At this point, there are + * two external routines... the Init routine, and a check-a-word routine + * which returns a boolean value, and takes a 16 bit char string. + * The code resembles the ispell code as much as possible still. + * + * Revision 1.42 1995/01/08 23:23:42 geoff + * Support MSDOS_BINARY_OPEN when opening the hash file to read it in. + * + * Revision 1.41 1994/01/25 07:11:51 geoff + * Get rid of all old RCS log lines in preparation for the 3.1 release. + * + */ + +#include <stdlib.h> +#include <string.h> +#include <ctype.h> + +#include "ispell_checker.h" +#include "msgs.h" + +#ifdef INDEXDUMP +static void dumpindex P ((struct flagptr * indexp, int depth)); +#endif /* INDEXDUMP */ + +int gnMaskBits = 64; + +/*! + * \param hashname name of the hash file (dictionary) + * + * \return + */ +int ISpellChecker::linit (char *hashname) +{ + FILE* fpHash; + + register int i; + register struct dent * dp; + struct flagent * entry; + struct flagptr * ind; + int nextchar, x; + int viazero; + register ichar_t * cp; + + if ((fpHash = fopen (hashname, "rb")) == NULL) + { + return (-1); + } + + m_hashsize = fread (reinterpret_cast<char *>(&m_hashheader), 1, sizeof m_hashheader, fpHash); + if (m_hashsize < static_cast<int>(sizeof(m_hashheader))) + { + if (m_hashsize < 0) + fprintf (stderr, LOOKUP_C_CANT_READ, hashname); + else if (m_hashsize == 0) + fprintf (stderr, LOOKUP_C_NULL_HASH, hashname); + else + fprintf (stderr, + LOOKUP_C_SHORT_HASH (m_hashname, m_hashsize, + static_cast<int>(sizeof m_hashheader))); + return (-1); + } + else if (m_hashheader.magic != MAGIC) + { + fprintf (stderr, + LOOKUP_C_BAD_MAGIC (hashname, static_cast<unsigned int>(MAGIC), + static_cast<unsigned int>(m_hashheader.magic))); + return (-1); + } + else if (m_hashheader.magic2 != MAGIC) + { + fprintf (stderr, + LOOKUP_C_BAD_MAGIC2 (hashname, static_cast<unsigned int>(MAGIC), + static_cast<unsigned int>(m_hashheader.magic2))); + return (-1); + } +/* else if (hashheader.compileoptions != COMPILEOPTIONS*/ + else if ( 1 != 1 + || m_hashheader.maxstringchars != MAXSTRINGCHARS + || m_hashheader.maxstringcharlen != MAXSTRINGCHARLEN) + { + fprintf (stderr, + LOOKUP_C_BAD_OPTIONS (static_cast<unsigned int>(m_hashheader.compileoptions), + m_hashheader.maxstringchars, m_hashheader.maxstringcharlen, + static_cast<unsigned int>(COMPILEOPTIONS), MAXSTRINGCHARS, MAXSTRINGCHARLEN)); + return (-1); + } + + { + m_hashtbl = + (struct dent *) + calloc (static_cast<unsigned>(m_hashheader.tblsize), sizeof (struct dent)); + m_hashsize = m_hashheader.tblsize; + m_hashstrings = static_cast<char *>(malloc(static_cast<unsigned>(m_hashheader.stringsize))); + } + m_numsflags = m_hashheader.stblsize; + m_numpflags = m_hashheader.ptblsize; + m_sflaglist = (struct flagent *) + malloc ((m_numsflags + m_numpflags) * sizeof (struct flagent)); + if (m_hashtbl == NULL || m_hashstrings == NULL || m_sflaglist == NULL) + { + fprintf (stderr, LOOKUP_C_NO_HASH_SPACE); + return (-1); + } + m_pflaglist = m_sflaglist + m_numsflags; + + { + if( fread ( m_hashstrings, 1, static_cast<unsigned>(m_hashheader.stringsize), fpHash) + != static_cast<size_t>(m_hashheader.stringsize) ) + { + fprintf (stderr, LOOKUP_C_BAD_FORMAT); + fprintf (stderr, "stringsize err\n" ); + return (-1); + } + if ( m_hashheader.compileoptions & 0x04 ) + { + if( fread (reinterpret_cast<char *>(m_hashtbl), 1, static_cast<unsigned>(m_hashheader.tblsize) * sizeof(struct dent), fpHash) + != (static_cast<size_t>(m_hashheader.tblsize * sizeof (struct dent)))) + { + fprintf (stderr, LOOKUP_C_BAD_FORMAT); + return (-1); + } + } + else + { + for( x=0; x<m_hashheader.tblsize; x++ ) + { + if( fread ( reinterpret_cast<char*>(m_hashtbl+x), sizeof( struct dent)-sizeof( MASKTYPE ), 1, fpHash) + != 1) + { + fprintf (stderr, LOOKUP_C_BAD_FORMAT); + return (-1); + } + } /*for*/ + } /*else*/ + } + if (fread (reinterpret_cast<char *>(m_sflaglist), 1, + static_cast<unsigned>(m_numsflags+ m_numpflags) * sizeof (struct flagent), fpHash) + != (m_numsflags + m_numpflags) * sizeof (struct flagent)) + { + fprintf (stderr, LOOKUP_C_BAD_FORMAT); + return (-1); + } + fclose (fpHash); + + { + for (i = m_hashsize, dp = m_hashtbl; --i >= 0; dp++) + { + if (dp->word == (char *) -1) + dp->word = NULL; + else + dp->word = &m_hashstrings [ reinterpret_cast<size_t>(dp->word) ]; + if (dp->next == (struct dent *) -1) + dp->next = NULL; + else + dp->next = &m_hashtbl [ reinterpret_cast<size_t>(dp->next) ]; + } + } + + for (i = m_numsflags + m_numpflags, entry = m_sflaglist; --i >= 0; entry++) + { + if (entry->stripl) + entry->strip = reinterpret_cast<ichar_t *>(&m_hashstrings[reinterpret_cast<size_t>(entry->strip)]); + else + entry->strip = NULL; + if (entry->affl) + entry->affix = reinterpret_cast<ichar_t *>(&m_hashstrings[reinterpret_cast<size_t>(entry->affix)]); + else + entry->affix = NULL; + } + /* + ** Warning - 'entry' and 'i' are reset in the body of the loop + ** below. Don't try to optimize it by (e.g.) moving the decrement + ** of i into the loop condition. + */ + for (i = m_numsflags, entry = m_sflaglist; i > 0; i--, entry++) + { + if (entry->affl == 0) + { + cp = NULL; + ind = &m_sflagindex[0]; + viazero = 1; + } + else + { + cp = entry->affix + entry->affl - 1; + ind = &m_sflagindex[*cp]; + viazero = 0; + while (ind->numents == 0 && ind->pu.fp != NULL) + { + if (cp == entry->affix) + { + ind = &ind->pu.fp[0]; + viazero = 1; + } + else + { + ind = &ind->pu.fp[*--cp]; + viazero = 0; + } + } + } + if (ind->numents == 0) + ind->pu.ent = entry; + ind->numents++; + /* + ** If this index entry has more than MAXSEARCH flags in + ** it, we will split it into subentries to reduce the + ** searching. However, the split doesn't make sense in + ** two cases: (a) if we are already at the end of the + ** current affix, or (b) if all the entries in the list + ** have identical affixes. Since the list is sorted, (b) + ** is true if the first and last affixes in the list + ** are identical. + */ + if (!viazero && ind->numents >= MAXSEARCH + && icharcmp (entry->affix, ind->pu.ent->affix) != 0) + { + /* Sneaky trick: back up and reprocess */ + entry = ind->pu.ent - 1; /* -1 is for entry++ in loop */ + i = m_numsflags - (entry - m_sflaglist); + ind->pu.fp = + (struct flagptr *) + calloc (static_cast<unsigned>(SET_SIZE + m_hashheader.nstrchars), + sizeof (struct flagptr)); + if (ind->pu.fp == NULL) + { + fprintf (stderr, LOOKUP_C_NO_LANG_SPACE); + return (-1); + } + ind->numents = 0; + } + } + /* + ** Warning - 'entry' and 'i' are reset in the body of the loop + ** below. Don't try to optimize it by (e.g.) moving the decrement + ** of i into the loop condition. + */ + for (i = m_numpflags, entry = m_pflaglist; i > 0; i--, entry++) + { + if (entry->affl == 0) + { + cp = NULL; + ind = &m_pflagindex[0]; + viazero = 1; + } + else + { + cp = entry->affix; + ind = &m_pflagindex[*cp++]; + viazero = 0; + while (ind->numents == 0 && ind->pu.fp != NULL) + { + if (*cp == 0) + { + ind = &ind->pu.fp[0]; + viazero = 1; + } + else + { + ind = &ind->pu.fp[*cp++]; + viazero = 0; + } + } + } + if (ind->numents == 0) + ind->pu.ent = entry; + ind->numents++; + /* + ** If this index entry has more than MAXSEARCH flags in + ** it, we will split it into subentries to reduce the + ** searching. However, the split doesn't make sense in + ** two cases: (a) if we are already at the end of the + ** current affix, or (b) if all the entries in the list + ** have identical affixes. Since the list is sorted, (b) + ** is true if the first and last affixes in the list + ** are identical. + */ + if (!viazero && ind->numents >= MAXSEARCH + && icharcmp (entry->affix, ind->pu.ent->affix) != 0) + { + /* Sneaky trick: back up and reprocess */ + entry = ind->pu.ent - 1; /* -1 is for entry++ in loop */ + i = m_numpflags - (entry - m_pflaglist); + ind->pu.fp = + static_cast<struct flagptr *>(calloc(SET_SIZE + m_hashheader.nstrchars, + sizeof (struct flagptr))); + if (ind->pu.fp == NULL) + { + fprintf (stderr, LOOKUP_C_NO_LANG_SPACE); + return (-1); + } + ind->numents = 0; + } + } +#ifdef INDEXDUMP + fprintf (stderr, "Prefix index table:\n"); + dumpindex (m_pflagindex, 0); + fprintf (stderr, "Suffix index table:\n"); + dumpindex (m_sflagindex, 0); +#endif + if (m_hashheader.nstrchartype == 0) + m_chartypes = NULL; + else + { + m_chartypes = (struct strchartype *) + malloc (m_hashheader.nstrchartype * sizeof (struct strchartype)); + if (m_chartypes == NULL) + { + fprintf (stderr, LOOKUP_C_NO_LANG_SPACE); + return (-1); + } + for (i = 0, nextchar = m_hashheader.strtypestart; + i < m_hashheader.nstrchartype; + i++) + { + m_chartypes[i].name = &m_hashstrings[nextchar]; + nextchar += strlen (m_chartypes[i].name) + 1; + m_chartypes[i].deformatter = &m_hashstrings[nextchar]; + nextchar += strlen (m_chartypes[i].deformatter) + 1; + m_chartypes[i].suffixes = &m_hashstrings[nextchar]; + while (m_hashstrings[nextchar] != '\0') + nextchar += strlen (&m_hashstrings[nextchar]) + 1; + nextchar++; + } + } + + initckch(NULL); + + return (0); +} + +#ifndef FREEP +#define FREEP(p) do { if (p) free(p); } while (0) +#endif + +/*! + * \param wchars Characters in -w option, if any + */ +void ISpellChecker::initckch (char *wchars) +{ + register ichar_t c; + char num[4]; + + for (c = 0; c < static_cast<ichar_t>(SET_SIZE+ m_hashheader.nstrchars); ++c) + { + if (iswordch (c)) + { + if (!mylower (c)) + { + m_Try[m_Trynum] = c; + ++m_Trynum; + } + } + else if (isboundarych (c)) + { + m_Try[m_Trynum] = c; + ++m_Trynum; + } + } + if (wchars != NULL) + { + while (m_Trynum < SET_SIZE && *wchars != '\0') + { + if (*wchars != 'n' && *wchars != '\\') + { + c = *wchars; + ++wchars; + } + else + { + ++wchars; + num[0] = '\0'; + num[1] = '\0'; + num[2] = '\0'; + num[3] = '\0'; + if (isdigit (wchars[0])) + { + num[0] = wchars[0]; + if (isdigit (wchars[1])) + { + num[1] = wchars[1]; + if (isdigit (wchars[2])) + num[2] = wchars[2]; + } + } + if (wchars[-1] == 'n') + { + wchars += strlen (num); + c = atoi (num); + } + else + { + wchars += strlen (num); + c = 0; + if (num[0]) + c = num[0] - '0'; + if (num[1]) + { + c <<= 3; + c += num[1] - '0'; + } + if (num[2]) + { + c <<= 3; + c += num[2] - '0'; + } + } + } +/* c &= NOPARITY;*/ + if (!m_hashheader.wordchars[c]) + { + m_hashheader.wordchars[c] = 1; + m_hashheader.sortorder[c] = m_hashheader.sortval++; + m_Try[m_Trynum] = c; + ++m_Trynum; + } + } + } +} + +/* + * \param indexp + */ +void ISpellChecker::clearindex (struct flagptr *indexp) +{ + register int i; + for (i = 0; i < SET_SIZE + m_hashheader.nstrchars; i++, indexp++) + { + if (indexp->numents == 0 && indexp->pu.fp != NULL) + { + clearindex(indexp->pu.fp); + free(indexp->pu.fp); + } + } +} + +#ifdef INDEXDUMP +static void dumpindex (indexp, depth) + register struct flagptr * indexp; + register int depth; +{ + register int i; + int j; + int k; + char stripbuf[INPUTWORDLEN + 4 * MAXAFFIXLEN + 4]; + + for (i = 0; i < SET_SIZE + hashheader.nstrchars; i++, indexp++) + { + if (indexp->numents == 0 && indexp->pu.fp != NULL) + { + for (j = depth; --j >= 0; ) + putc (' ', stderr); + if (i >= ' ' && i <= '~') + putc (i, stderr); + else + fprintf (stderr, "0x%x", i); + putc ('\n', stderr); + dumpindex (indexp->pu.fp, depth + 1); + } + else if (indexp->numents) + { + for (j = depth; --j >= 0; ) + putc (' ', stderr); + if (i >= ' ' && i <= '~') + putc (i, stderr); + else + fprintf (stderr, "0x%x", i); + fprintf (stderr, " -> %d entries\n", indexp->numents); + for (k = 0; k < indexp->numents; k++) + { + for (j = depth; --j >= 0; ) + putc (' ', stderr); + if (indexp->pu.ent[k].stripl) + { + ichartostr (stripbuf, indexp->pu.ent[k].strip, + sizeof stripbuf, 1); + fprintf (stderr, " entry %d (-%s,%s)\n", + &indexp->pu.ent[k] - sflaglist, + stripbuf, + indexp->pu.ent[k].affl + ? ichartosstr (indexp->pu.ent[k].affix, 1) : "-"); + } + else + fprintf (stderr, " entry %d (%s)\n", + &indexp->pu.ent[k] - sflaglist, + ichartosstr (indexp->pu.ent[k].affix, 1)); + } + } + } +} +#endif + +/* n is length of s */ + +/* + * \param s + * \param dotree + * + * \return + */ +struct dent * ISpellChecker::ispell_lookup (ichar_t *s, int dotree) +{ + register struct dent * dp; + register char * s1; + char schar[INPUTWORDLEN + MAXAFFIXLEN]; + + dp = &m_hashtbl[hash (s, m_hashsize)]; + if (ichartostr (schar, s, sizeof schar, 1)) + fprintf (stderr, WORD_TOO_LONG (schar)); + for ( ; dp != NULL; dp = dp->next) + { + /* quick strcmp, but only for equality */ + s1 = dp->word; + if (s1 && s1[0] == schar[0] && strcmp (s1 + 1, schar + 1) == 0) + return dp; +#ifndef NO_CAPITALIZATION_SUPPORT + while (dp->flagfield & MOREVARIANTS) /* Skip variations */ + dp = dp->next; +#endif + } + return NULL; +} + +void ISpellChecker::alloc_ispell_struct() +{ + m_translate_in = 0; +} + +void ISpellChecker::free_ispell_struct() +{ +} diff --git a/tdespell2/plugins/ispell/makedent.cpp b/tdespell2/plugins/ispell/makedent.cpp new file mode 100644 index 000000000..9c168dc17 --- /dev/null +++ b/tdespell2/plugins/ispell/makedent.cpp @@ -0,0 +1,972 @@ +/* vim: set sw=8: -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* enchant + * Copyright (C) 2003 Dom Lachowicz + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + * In addition, as a special exception, Dom Lachowicz + * gives permission to link the code of this program with + * non-LGPL Spelling Provider libraries (eg: a MSFT Office + * spell checker backend) and distribute linked combinations including + * the two. You must obey the GNU Lesser General Public License in all + * respects for all of the code used other than said providers. If you modify + * this file, you may extend this exception to your version of the + * file, but you are not obligated to do so. If you do not wish to + * do so, delete this exception statement from your version. + */ + +/* + * Copyright 1988, 1989, 1992, 1993, Geoff Kuenning, Granada Hills, CA + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All modifications to the source code must be clearly marked as + * such. Binary redistributions based on modified source code + * must be clearly marked as modified versions in the documentation + * and/or other materials provided with the distribution. + * 4. All advertising materials mentioning features or use of this software + * must display the following acknowledgment: + * This product includes software developed by Geoff Kuenning and + * other unpaid contributors. + * 5. The name of Geoff Kuenning may not be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY GEOFF KUENNING AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL GEOFF KUENNING OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * $Log$ + * Revision 1.2 2004/02/01 04:46:46 zrusin + * Both ispell and aspell plugins are not working properly. We can start switching. + * + * Revision 1.1 2004/01/31 16:44:12 zrusin + * ISpell plugin. + * + * Revision 1.4 2003/08/14 17:51:28 dom + * update license - exception clause should be Lesser GPL + * + * Revision 1.3 2003/07/28 20:40:27 dom + * fix up the license clause, further win32-registry proof some directory getting functions + * + * Revision 1.2 2003/07/16 22:52:49 dom + * LGPL + exception license + * + * Revision 1.1 2003/07/15 01:15:08 dom + * ispell enchant backend + * + * Revision 1.3 2003/02/12 02:10:38 hippietrail + * + * C casts -> C++ casts + * Improved const-correctness due to changing casts + * Fixed some warnings + * + * Revision 1.2 2003/01/29 05:50:12 hippietrail + * + * Fixed my mess in EncodingManager. + * Changed many C casts to C++ casts. + * + * Revision 1.1 2003/01/24 05:52:35 hippietrail + * + * Refactored ispell code. Old ispell global variables had been put into + * an allocated structure, a pointer to which was passed to many functions. + * I have now made all such functions and variables private members of the + * ISpellChecker class. It was C OO, now it's C++ OO. + * + * I've fixed the makefiles and tested compilation but am unable to test + * operation. Please back out my changes if they cause problems which + * are not obvious or easy to fix. + * + * Revision 1.8 2003/01/06 18:48:40 dom + * ispell cleanup, start of using new 'add' save features + * + * Revision 1.7 2003/01/04 19:09:04 dom + * some tidying... bug pissing me off... + * + * Revision 1.6 2002/09/19 05:31:18 hippietrail + * + * More Ispell cleanup. Conditional globals and DEREF macros are removed. + * K&R function declarations removed, converted to Doxygen style comments + * where possible. No code has been changed (I hope). Compiles for me but + * unable to test. + * + * Revision 1.5 2002/09/17 03:03:30 hippietrail + * + * After seeking permission on the developer list I've reformatted all the + * spelling source which seemed to have parts which used 2, 3, 4, and 8 + * spaces for tabs. It should all look good with our standard 4-space + * tabs now. + * I've concentrated just on indentation in the actual code. More prettying + * could be done. + * * NO code changes were made * + * + * Revision 1.4 2002/09/13 17:20:13 mpritchett + * Fix more warnings for Linux build + * + * Revision 1.3 2002/03/22 14:31:57 dom + * fix mg's compile problem + * + * Revision 1.2 2001/05/12 16:05:42 thomasf + * Big pseudo changes to ispell to make it pass around a structure rather + * than rely on all sorts of gloabals willy nilly here and there. Also + * fixed our spelling class to work with accepting suggestions once more. + * This code is dirty, gross and ugly (not to mention still not supporting + * multiple hash sized just yet) but it works on my machine and will no + * doubt break other machines. + * + * Revision 1.1 2001/04/15 16:01:24 tomas_f + * moving to spell/xp + * + * Revision 1.6 1999/12/21 18:46:29 sterwill + * ispell patch for non-English dictionaries by Henrik Berg <henrik@lansen.se> + * + * Revision 1.5 1999/10/20 03:19:35 paul + * Hacked ispell code to ignore any characters that don't fit in the lookup tables loaded from the dictionary. It ain't pretty, but at least we don't crash there any more. + * + * Revision 1.4 1999/04/13 17:12:51 jeff + * Applied "Darren O. Benham" <gecko@benham.net> spell check changes. + * Fixed crash on Win32 with the new code. + * + * Revision 1.3 1998/12/29 14:55:33 eric + * + * I've doctored the ispell code pretty extensively here. It is now + * warning-free on Win32. It also *works* on Win32 now, since I + * replaced all the I/O calls with ANSI standard ones. + * + * Revision 1.3 1998/12/29 14:55:33 eric + * + * I've doctored the ispell code pretty extensively here. It is now + * warning-free on Win32. It also *works* on Win32 now, since I + * replaced all the I/O calls with ANSI standard ones. + * + * Revision 1.2 1998/12/28 23:11:30 eric + * + * modified spell code and integration to build on Windows. + * This is still a hack. + * + * Actually, it doesn't yet WORK on Windows. It just builds. + * SpellCheckInit is failing for some reason. + * + * Revision 1.1 1998/12/28 18:04:43 davet + * Spell checker code stripped from ispell. At this point, there are + * two external routines... the Init routine, and a check-a-word routine + * which returns a boolean value, and takes a 16 bit char string. + * The code resembles the ispell code as much as possible still. + * + * Revision 1.45 1994/12/27 23:08:52 geoff + * Add code to makedent to reject words that contain non-word characters. + * This helps protect people who use ISO 8-bit characters when ispell + * isn't configured for that option. + * + * Revision 1.44 1994/10/25 05:46:20 geoff + * Fix some incorrect declarations in the lint versions of some routines. + * + * Revision 1.43 1994/09/16 03:32:34 geoff + * Issue an error message for bad affix flags + * + * Revision 1.42 1994/02/07 04:23:43 geoff + * Correctly identify the deformatter when changing file types + * + * Revision 1.41 1994/01/25 07:11:55 geoff + * Get rid of all old RCS log lines in preparation for the 3.1 release. + * + */ + +#include <stdlib.h> +#include <string.h> +#include <ctype.h> + +#include "ispell_checker.h" +#include "msgs.h" + +int makedent P ((char * lbuf, int lbuflen, struct dent * ent)); +/*int combinecaps P ((struct dent * hdr, struct dent * newent)); +#ifndef NO_CAPITALIZATION_SUPPORT +static void forcevheader P ((struct dent * hdrp, struct dent * oldp, + struct dent * newp)); +#endif / * NO_CAPITALIZATION_SUPPORT * / +static int combine_two_entries P ((struct dent * hdrp, + struct dent * oldp, struct dent * newp)); +static int acoversb P ((struct dent * enta, struct dent * entb)); +*/ +/*static int issubset P ((struct dent * ent1, struct dent * ent2)); +static void combineaffixes P ((struct dent * ent1, struct dent * ent2));*/ + +void toutent P ((FILE * outfile, struct dent * hent, + int onlykeep)); +/*static void toutword P ((FILE * outfile, char * word, + struct dent * cent)); +static void flagout P ((FILE * outfile, int flag)); +*/ +#ifndef ICHAR_IS_CHAR +ichar_t * icharcpy P ((ichar_t * out, ichar_t * in)); +int icharlen P ((ichar_t * str)); +int icharcmp P ((ichar_t * s1, ichar_t * s2)); +int icharncmp P ((ichar_t * s1, ichar_t * s2, int n)); +#endif /* ICHAR_IS_CHAR */ + +/*static int has_marker;*/ + +/* + * Fill in a directory entry, including setting the capitalization flags, and + * allocate and initialize memory for the d->word field. Returns -1 + * if there was trouble. The input word must be in canonical form. +int makedent (lbuf, lbuflen, d) +This function is not used by AbiWord. I don't know if it'll be needed for +other abi documents + */ + +#ifndef NO_CAPITALIZATION_SUPPORT +/*! +** Classify the capitalization of a sample entry. Returns one of the +** four capitalization codes ANYCASE, ALLCAPS, CAPITALIZED, or FOLLOWCASE. +** +** \param word +** +** \return +*/ +long +ISpellChecker::whatcap (ichar_t *word) +{ + register ichar_t * p; + + for (p = word; *p; p++) + { + if (mylower (*p)) + break; + } + if (*p == '\0') + return ALLCAPS; + else + { + for ( ; *p; p++) + { + if (myupper (*p)) + break; + } + if (*p == '\0') + { + /* + ** No uppercase letters follow the lowercase ones. + ** If there is more than one uppercase letter, it's + ** "followcase". If only the first one is capitalized, + ** it's "capitalize". If there are no capitals + ** at all, it's ANYCASE. + */ + if (myupper (word[0])) + { + for (p = word + 1; *p != '\0'; p++) + { + if (myupper (*p)) + return FOLLOWCASE; + } + return CAPITALIZED; + } + else + return ANYCASE; + } + else + return FOLLOWCASE; /* .../lower/upper */ + } +} + +/*! +** Add a variant-capitalization header to a word. This routine may be +** called even for a followcase word that doesn't yet have a header. +** +** \param dp Entry to update +** +** \return 0 if all was ok, -1 if allocation error. +*/ +int ISpellChecker::addvheader ( struct dent *dp) +{ + register struct dent * tdent; /* Copy of entry */ + + /* + ** Add a second entry with the correct capitalization, and then make + ** dp into a special dummy entry. + */ + tdent = static_cast<struct dent *>(malloc(sizeof (struct dent))); + if (tdent == NULL) + { + fprintf (stderr, MAKEDENT_C_NO_WORD_SPACE, dp->word); + return -1; + } + *tdent = *dp; + if (captype (tdent->flagfield) != FOLLOWCASE) + tdent->word = NULL; + else + { + /* Followcase words need a copy of the capitalization */ + tdent->word = static_cast<char *>(malloc (static_cast<unsigned int>(strlen(tdent->word)) + 1)); + if (tdent->word == NULL) + { + fprintf (stderr, MAKEDENT_C_NO_WORD_SPACE, dp->word); + free (reinterpret_cast<char *>(tdent)); + return -1; + } + strcpy (tdent->word, dp->word); + } + chupcase (dp->word); + dp->next = tdent; + dp->flagfield &= ~CAPTYPEMASK; + dp->flagfield |= (ALLCAPS | MOREVARIANTS); + return 0; +} +#endif /* NO_CAPITALIZATION_SUPPORT */ + +/* +** Combine and resolve the entries describing two capitalizations of the same +** word. This may require allocating yet more entries. +** +** Hdrp is a pointer into a hash table. If the word covered by hdrp has +** variations, hdrp must point to the header. Newp is a pointer to temporary +** storage, and space is malloc'ed if newp is to be kept. The newp->word +** field must have been allocated with mymalloc, so that this routine may free +** the space if it keeps newp but not the word. +** +** Return value: 0 if the word was added, 1 if the word was combined +** with an existing entry, and -1 if trouble occurred (e.g., malloc). +** If 1 is returned, newp->word may have been be freed using myfree. +** +** Life is made much more difficult by the KEEP flag's possibilities. We +** must ensure that a !KEEP word doesn't find its way into the personal +** dictionary as a result of this routine's actions. However, a !KEEP +** word that has affixes must have come from the main dictionary, so it +** is acceptable to combine entries in that case (got that?). +** +** The net result of all this is a set of rules that is a bloody pain +** to figure out. Basically, we want to choose one of the following actions: +** +** (1) Add newp's affixes and KEEP flag to oldp, and discard newp. +** (2) Add oldp's affixes and KEEP flag to newp, replace oldp with +** newp, and discard newp. +#ifndef NO_CAPITALIZATION_SUPPORT +** (3) Insert newp as a new entry in the variants list. If there is +** currently no variant header, this requires adding one. Adding a +** header splits into two sub-cases: +** +** (3a) If oldp is ALLCAPS and the KEEP flags match, just turn it +** into the header. +** (3b) Otherwise, add a new entry to serve as the header. +** To ease list linking, this is done by copying oldp into +** the new entry, and then performing (3a). +** +** After newp has been added as a variant, its affixes and KEEP +** flag are OR-ed into the variant header. +#endif +** +** So how to choose which? The default is always case (3), which adds newp +** as a new entry in the variants list. Cases (1) and (2) are symmetrical +** except for which entry is discarded. We can use case (1) or (2) whenever +** one entry "covers" the other. "Covering" is defined as follows: +** +** (4) For entries with matching capitalization types, A covers B +** if: +** +** (4a) B's affix flags are a subset of A's, or the KEEP flags +** match, and +** (4b) either the KEEP flags match, or A's KEEP flag is set. +** (Since A has more suffixes, combining B with it won't +** cause any extra suffixes to be added to the dictionary.) +** (4c) If the words are FOLLOWCASE, the capitalizations match +** exactly. +** +#ifndef NO_CAPITALIZATION_SUPPORT +** (5) For entries with mismatched capitalization types, A covers B +** if (4a) and (4b) are true, and: +** +** (5a) B is ALLCAPS, or +** (5b) A is ANYCASE, and B is CAPITALIZED. +#endif +** +** For any "hdrp" without variants, oldp is the same as hdrp. Otherwise, +** the above tests are applied using each variant in turn for oldp. +int combinecaps (hdrp, newp) +static void forcevheader (hdrp, oldp, newp) +static int combine_two_entries (hdrp, oldp, newp) +static int acoversb (enta, entb) +*/ + +/* + * \param s + */ +void +ISpellChecker::upcase (ichar_t *s) +{ + + while (*s) + { + *s = mytoupper (*s); + s++; + } +} + +/* + * \param s + */ +void +ISpellChecker::lowcase (ichar_t *s) +{ + + while (*s) + { + *s = mytolower (*s); + s++; + } +} + +/*! + * Upcase variant that works on normal strings. Note that it is a lot + * slower than the normal upcase. The input must be in canonical form. + * + * \param s + */ +void +ISpellChecker::chupcase (char *s) +{ + ichar_t * is; + + is = strtosichar (s, 1); + upcase (is); + ichartostr (s, is, strlen (s) + 1, 1); +} + +/* +** See if one affix field is a subset of another. Returns NZ if ent1 +** is a subset of ent2. The KEEP flag is not taken into consideration. +static int issubset (ent1, ent2) +static void combineaffixes (ent1, ent2) +*/ + +/* +** Write out a dictionary entry, including capitalization variants. +** If onlykeep is true, only those variants with KEEP set will be +** written. +Removed -- not used by Abiword +void toutent_ (toutfile, hent, onlykeep) +static void toutword (toutfile, word, cent) +static void flagout (toutfile, flag) +*/ + +/*! + * If the string under the given pointer begins with a string character, + * return the length of that "character". If not, return 0. + * May be called any time, but it's best if "isstrstart" is first + * used to filter out unnecessary calls. + * + * As a side effect, "laststringch" is set to the number of the string + * found, or to -1 if none was found. This can be useful for such things + * as case conversion. + * + * \param bufp + * \param canonical NZ if input is in canonical form + * + * \return + */ +int +ISpellChecker::stringcharlen (char *bufp, int canonical) +{ +#ifdef SLOWMULTIPLY + static char * sp[MAXSTRINGCHARS]; + static int inited = 0; +#endif /* SLOWMULTIPLY */ + register char * bufcur; + register char * stringcur; + register int stringno; + register int lowstringno; + register int highstringno; + int dupwanted; + +#ifdef SLOWMULTIPLY + if (!inited) + { + inited = 1; + for (stringno = 0; stringno < MAXSTRINGCHARS; stringno++) + sp[stringno] = &hashheader.stringchars[stringno][0]; + } +#endif /* SLOWMULTIPLY */ + lowstringno = 0; + highstringno = m_hashheader.nstrchars - 1; + dupwanted = canonical ? 0 : m_defdupchar; + while (lowstringno <= highstringno) + { + stringno = (lowstringno + highstringno) >> 1; +#ifdef SLOWMULTIPLY + stringcur = sp[stringno]; +#else /* SLOWMULTIPLY */ + stringcur = &m_hashheader.stringchars[stringno][0]; +#endif /* SLOWMULTIPLY */ + bufcur = bufp; + while (*stringcur) + { +#ifdef NO8BIT + if (((*bufcur++ ^ *stringcur) & 0x7F) != 0) +#else /* NO8BIT */ + if (*bufcur++ != *stringcur) +#endif /* NO8BIT */ + break; + /* + ** We can't use autoincrement above because of the + ** test below. + */ + stringcur++; + } + if (*stringcur == '\0') + { + if (m_hashheader.dupnos[stringno] == dupwanted) + { + /* We have a match */ + m_laststringch = m_hashheader.stringdups[stringno]; +#ifdef SLOWMULTIPLY + return stringcur - sp[stringno]; +#else /* SLOWMULTIPLY */ + return stringcur - &m_hashheader.stringchars[stringno][0]; +#endif /* SLOWMULTIPLY */ + } + else + --stringcur; + } + /* No match - choose which side to search on */ +#ifdef NO8BIT + if ((*--bufcur & 0x7F) < (*stringcur & 0x7F)) + highstringno = stringno - 1; + else if ((*bufcur & 0x7F) > (*stringcur & 0x7F)) + lowstringno = stringno + 1; +#else /* NO8BIT */ + if (*--bufcur < *stringcur) + highstringno = stringno - 1; + else if (*bufcur > *stringcur) + lowstringno = stringno + 1; +#endif /* NO8BIT */ + else if (dupwanted < m_hashheader.dupnos[stringno]) + highstringno = stringno - 1; + else + lowstringno = stringno + 1; + } + m_laststringch = static_cast<unsigned int>(-1); + return 0; /* Not a string character */ +} + +/* MACROS CONVERTED TO FUNCTIONS +** These macros are similar to the ones above, but they take into account +** the possibility of string characters. Note well that they take a POINTER, +** not a character. +** +** The "l_" versions set "len" to the length of the string character as a +** handy side effect. (Note that the global "laststringch" is also set, +** and sometimes used, by these macros.) +** +** The "l1_" versions go one step further and guarantee that the "len" +** field is valid for *all* characters, being set to 1 even if the macro +** returns false. This macro is a great example of how NOT to write +** readable C. +*/ +#define isstringch(ptr, canon) (isstringstart (*(ptr)) \ + && stringcharlen ((ptr), (canon)) > 0) +/* +int isstringch(char *ptr, int canon) { + return (isstringstart (*(ptr)) && (len = stringcharlen ((ptr), (canon))) > 0); +} +*/ + +#define l_isstringch(ptr, len, canon) \ + (isstringstart (*(ptr)) \ + && (len = stringcharlen ((ptr), (canon))) \ + > 0) +/* +int l_isstringch(char *ptr, int len, int canon) { + return (isstringstart (*(ptr)) && (len = stringcharlen ((ptr), (canon))) > 0); +} +*/ + +#define l1_isstringch(ptr, len, canon) \ + (len = 1, \ + isstringstart ((unsigned char)(*(ptr))) \ + && ((len = \ + stringcharlen ((ptr), (canon))) \ + > 0 \ + ? 1 : (len = 1, 0))) +/* +int l1_isstringch(char *ptr, int len, int canon) { + return (len = 1, isstringstart ((unsigned char)(*(ptr))) && + ((len = stringcharlen ((ptr), (canon))) > 0 ? 1 : (len = 1, 0))); +} +*/ + +/*** END MACRO CONVERSION ***/ + +/*! + * Convert an external string to an ichar_t string. If necessary, the parity + * bit is stripped off as part of the process. + * + * \param out Where to put result + * \param in String to convert + * \param outlen Size of output buffer, *BYTES* + * \param canonical NZ if input is in canonical form + * + * \return NZ if the output string overflowed. + */ +int +ISpellChecker::strtoichar (ichar_t *out, char *in, int outlen, int canonical) +{ + register int len = 1; /* Length of next character */ + + outlen /= sizeof (ichar_t); /* Convert to an ichar_t count */ + for ( ; --outlen > 0 && *in != '\0'; in += len) + { + if (l1_isstringch (in, len , canonical)) { + *out++ = SET_SIZE + m_laststringch; + } else { + *out++ = (unsigned char)( *in ); + } + } + *out = 0; + return outlen <= 0; +} + +/*! + * Convert an ichar_t string to an external string. + * + * WARNING: the resulting string may wind up being longer than the + * original. In fact, even the sequence strtoichar->ichartostr may + * produce a result longer than the original, because the output form + * may use a different string type set than the original input form. + * + * \param out Where to put result + * \param in String to convert + * \param outlen Size of output buffer, bytes + * \param canonical NZ for canonical form + * + * \return NZ if the output string overflowed. + */ +int +ISpellChecker::ichartostr ( char *out, ichar_t *in, int outlen, int canonical) +{ + register int ch; /* Next character to store */ + register int i; /* Index into duplicates list */ + register char * scharp; /* Pointer into a string char */ + + while (--outlen > 0 && (ch = *in++) != 0) + { + if (ch < SET_SIZE) + *out++ = static_cast<char>(ch); + else + { + ch -= SET_SIZE; + if (!canonical) + { + for (i = m_hashheader.nstrchars; --i >= 0; ) + { + if (m_hashheader.dupnos[i] == m_defdupchar + && (static_cast<int>(m_hashheader.stringdups[i])) == ch) + { + ch = i; + break; + } + } + } + scharp = m_hashheader.stringchars[static_cast<unsigned>(ch)]; + while ((*out++ = *scharp++) != '\0') + ; + out--; + } + } + *out = '\0'; + return outlen <= 0; +} + +/*! + * Convert a string to an ichar_t, storing the result in a static area. + * + * \param in String to convert + * \param canonical NZ if input is in canonical form + * + * \return + */ +ichar_t * +ISpellChecker::strtosichar ( char *in, int canonical) +{ + static ichar_t out[STRTOSICHAR_SIZE / sizeof (ichar_t)]; + + if (strtoichar (out, in, sizeof out, canonical)) + fprintf (stderr, WORD_TOO_LONG (in)); + return out; +} + +/*! + * Convert an ichar_t to a string, storing the result in a static area. + * + * \param in Internal string to convert + * \param canonical NZ for canonical conversion + * + * \return + */ +char * +ISpellChecker::ichartosstr (ichar_t *in, int canonical) +{ + static char out[ICHARTOSSTR_SIZE]; + + if (ichartostr (out, in, sizeof out, canonical)) + fprintf (stderr, WORD_TOO_LONG (out)); + return out; +} + +/*! + * Convert a single ichar to a printable string, storing the result in + * a static area. + * + * \param in + * + * \return + */ +char * +ISpellChecker::printichar (int in) +{ + static char out[MAXSTRINGCHARLEN + 1]; + + if (in < SET_SIZE) + { + out[0] = static_cast<char>(in); + out[1] = '\0'; + } + else + strcpy (out, m_hashheader.stringchars[static_cast<unsigned>(in) - SET_SIZE]); + return out; +} + +#ifndef ICHAR_IS_CHAR +/*! + * Copy an ichar_t. + * + * \param out Destination + * \param in Source + * + * \return + */ +ichar_t * +icharcpy (ichar_t *out, ichar_t *in) +{ + ichar_t * origout; /* Copy of destination for return */ + + origout = out; + while ((*out++ = *in++) != 0) + ; + return origout; +} + +/*! + * Return the length of an ichar_t. + * + * \param in String to count + * + * \return + */ +int +icharlen (ichar_t * in) +{ + register int len; /* Length so far */ + + for (len = 0; *in++ != 0; len++) + ; + return len; +} + +/*! + * Compare two ichar_t's. + * + * \param s1 + * \param s2 + * + * \return + */ +int +icharcmp (ichar_t * s1, ichar_t * s2) +{ + + while (*s1 != 0) + { + if (*s1++ != *s2++) + return *--s1 - *--s2; + } + return *s1 - *s2; +} + +/*! + * Strncmp for two ichar_t's. + * + * \param s1 + * \param s2 + * \param n + * + * \return + */ +int +icharncmp (ichar_t *s1, ichar_t *s2, int n) +{ + + while (--n >= 0 && *s1 != 0) + { + if (*s1++ != *s2++) + return *--s1 - *--s2; + } + if (n < 0) + return 0; + else + return *s1 - *s2; +} + +#endif /* ICHAR_IS_CHAR */ + +/* + * \param istate + * \param name + * \param searchnames + * \param deformatter + * + * \return + */ +int +ISpellChecker::findfiletype (const char *name, int searchnames, int *deformatter) +{ + char * cp; /* Pointer into suffix list */ + int cplen; /* Length of current suffix */ + register int i; /* Index into type table */ + int len; /* Length of the name */ + + /* + * Note: for now, the deformatter is set to 1 for tex, 0 for nroff. + * Further, we assume that it's one or the other, so that a test + * for tex is sufficient. This needs to be generalized. + */ + len = strlen (name); + if (searchnames) + { + for (i = 0; i < m_hashheader.nstrchartype; i++) + { + if (strcmp (name, m_chartypes[i].name) == 0) + { + if (deformatter != NULL) + *deformatter = + (strcmp (m_chartypes[i].deformatter, "tex") == 0); + return i; + } + } + } + for (i = 0; i < m_hashheader.nstrchartype; i++) + { + for (cp = m_chartypes[i].suffixes; *cp != '\0'; cp += cplen + 1) + { + cplen = strlen (cp); + if (len >= cplen && strcmp (&name[len - cplen], cp) == 0) + { + if (deformatter != NULL) + *deformatter = + (strcmp (m_chartypes[i].deformatter, "tex") == 0); + return i; + } + } + } + return -1; +} + +/* + HACK: macros replaced with function implementations + so we could do a side-effect-free check for unicode + characters which aren't in hashheader + + TODO: this is just a workaround to keep us from crashing. + more sophisticated logic needed here. +*/ +char ISpellChecker::myupper(ichar_t c) +{ + if (c < (SET_SIZE + MAXSTRINGCHARS)) + return m_hashheader.upperchars[c]; + else + return 0; +} + +char ISpellChecker::mylower(ichar_t c) +{ + if (c < (SET_SIZE + MAXSTRINGCHARS)) + return m_hashheader.lowerchars[c]; + else + return 0; +} + +int myspace(ichar_t c) +{ + return ((c > 0) && (c < 0x80) && isspace(static_cast<unsigned char>(c))); +} + +char ISpellChecker::iswordch(ichar_t c) +{ + if (c < (SET_SIZE + MAXSTRINGCHARS)) + return m_hashheader.wordchars[c]; + else + return 0; +} + +char ISpellChecker::isboundarych(ichar_t c) +{ + if (c < (SET_SIZE + MAXSTRINGCHARS)) + return m_hashheader.boundarychars[c]; + else + return 0; +} + +char ISpellChecker::isstringstart(ichar_t c) +{ + if (c < (SET_SIZE)) + return m_hashheader.stringstarts[static_cast<unsigned char>(c)]; + else + return 0; +} + +ichar_t ISpellChecker::mytolower(ichar_t c) +{ + if (c < (SET_SIZE + MAXSTRINGCHARS)) + return m_hashheader.lowerconv[c]; + else + return c; +} + +ichar_t ISpellChecker::mytoupper (ichar_t c) +{ + if (c < (SET_SIZE + MAXSTRINGCHARS)) + return m_hashheader.upperconv[c]; + else + return c; +} + diff --git a/tdespell2/plugins/ispell/msgs.h b/tdespell2/plugins/ispell/msgs.h new file mode 100644 index 000000000..e3f30220c --- /dev/null +++ b/tdespell2/plugins/ispell/msgs.h @@ -0,0 +1,329 @@ +/* vim: set sw=8: -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* enchant + * Copyright (C) 2003 Dom Lachowicz + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + * In addition, as a special exception, Dom Lachowicz + * gives permission to link the code of this program with + * non-LGPL Spelling Provider libraries (eg: a MSFT Office + * spell checker backend) and distribute linked combinations including + * the two. You must obey the GNU Lesser General Public License in all + * respects for all of the code used other than said providers. If you modify + * this file, you may extend this exception to your version of the + * file, but you are not obligated to do so. If you do not wish to + * do so, delete this exception statement from your version. + */ + +/* + * $Id$ + * + * Copyright 1992, 1993, Geoff Kuenning, Granada Hills, CA + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All modifications to the source code must be clearly marked as + * such. Binary redistributions based on modified source code + * must be clearly marked as modified versions in the documentation + * and/or other materials provided with the distribution. + * 4. All advertising materials mentioning features or use of this software + * must display the following acknowledgment: + * This product includes software developed by Geoff Kuenning and + * other unpaid contributors. + * 5. The name of Geoff Kuenning may not be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY GEOFF KUENNING AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL GEOFF KUENNING OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +/* + * Messages header file. + * + * This file contains all text strings that are written by any of the + * C programs in the ispell package. The strings are collected here so that + * you can have the option of translating them into your local language for + * the benefit of your users. + * + * Anyone who goes to the effort of making a translation may wish to return + * the translated strings to me, geoff@ITcorp.com, so that I can include + * them in a later distribution under #ifdef control. + * + * Besides the strings in this header file, you may also want to translate + * the strings in version.h, which give the version and copyright information. + * However, any translation of these strings MUST accurately preserve the + * legal rights under international law; you may wish to consult a lawyer + * about this since you will be responsible for the results of any + * incorrect translation. + * + * Most of the strings below are simple printf format strings. If the printf + * takes more than one parameter, the string is given as a parameterized + * macro in case your local language needs a different word order. + */ + +/* + * $Log$ + * Revision 1.1 2004/01/31 16:44:12 zrusin + * ISpell plugin. + * + * Revision 1.4 2003/08/14 17:51:28 dom + * update license - exception clause should be Lesser GPL + * + * Revision 1.3 2003/07/28 20:40:27 dom + * fix up the license clause, further win32-registry proof some directory getting functions + * + * Revision 1.2 2003/07/16 22:52:52 dom + * LGPL + exception license + * + * Revision 1.1 2003/07/15 01:15:08 dom + * ispell enchant backend + * + * Revision 1.1 2001/04/15 16:01:24 tomas_f + * moving to spell/xp + * + * Revision 1.1 1998/12/28 18:04:43 davet + * Spell checker code stripped from ispell. At this point, there are + * two external routines... the Init routine, and a check-a-word routine + * which returns a boolean value, and takes a 16 bit char string. + * The code resembles the ispell code as much as possible still. + * + * Revision 1.31 1994/12/27 23:08:57 geoff + * Add a message to be issued if a word contains illegal characters. + * + * Revision 1.30 1994/10/25 05:46:40 geoff + * Improve a couple of error messages relating to affix flags. + * + * Revision 1.29 1994/10/04 03:46:23 geoff + * Add a missing carriage return in the help message + * + * Revision 1.28 1994/09/16 05:07:00 geoff + * Add the BAD_FLAG message, and start a sentence in another message with + * an uppercase letter. + * + * Revision 1.27 1994/07/28 05:11:38 geoff + * Log message for previous revision: add BHASH_C_ZERO_COUNT. + * + * Revision 1.26 1994/07/28 04:53:49 geoff + * + * Revision 1.25 1994/05/24 04:54:36 geoff + * Add error messages for affix-flag checking. + * + * Revision 1.24 1994/01/25 07:12:42 geoff + * Get rid of all old RCS log lines in preparation for the 3.1 release. + * + */ + +/* + * The following strings are used in numerous places: + */ +#define BAD_FLAG "\r\nIllegal affix flag character '%c'\r\n" +#define CANT_OPEN "Can't open %s\r\n" +#define CANT_CREATE "Can't create %s\r\n" +#define WORD_TOO_LONG(w) "\r\nWord '%s' too long at line %d of %s, truncated\r\n", \ + w, __LINE__, __FILE__ + +/* + * The following strings are used in buildhash.c: + */ +#define BHASH_C_NO_DICT "No dictionary (%s)\n" +#define BHASH_C_NO_COUNT "No count file\n" +#define BHASH_C_BAD_COUNT "Bad count file\n" +#define BHASH_C_ZERO_COUNT "No words in dictionary\n" + /* I think this message looks better when it's nearly 80 characters wide, + * thus the ugly formatting in the next two defines. GK 9-87 */ +#define BHASH_C_BAFF_1(max, excess) \ + " Warning: this language table may exceed the maximum total affix length\nof %d by up to %d bytes. You should either increase MAXAFFIXLEN in config.X\nor shorten your largest affix/strip string difference. (This is the\n", \ + max, excess +#define BHASH_C_BAFF_2 \ + "difference between the affix length and the strip length in a given\nreplacement rule, or the affix length if there is no strip string\nin that rule.)\n" +#define BHASH_C_OVERFLOW "Hash table overflowed by %d words\n" +#define BHASH_C_CANT_OPEN_DICT "Can't open dictionary\n" +#define BHASH_C_NO_SPACE "Couldn't allocate hash table\n" +#define BHASH_C_COLLISION_SPACE "\ncouldn't allocate space for collision\n" +#define BHASH_C_COUNTING "Counting words in dictionary ...\n" +#define BHASH_C_WORD_COUNT "\n%d words\n" +#define BHASH_C_USAGE "Usage: buildhash [-s] dict-file aff-file hash-file\n\tbuildhash -c count aff-file\n" + +/* + * The following strings are used in correct.c: + */ +#define CORR_C_HELP_1 "Whenever a word is found that is not in the dictionary,\r\n" +#define CORR_C_HELP_2 "it is printed on the first line of the screen. If the dictionary\r\n" +#define CORR_C_HELP_3 "contains any similar words, they are listed with a number\r\n" +#define CORR_C_HELP_4 "next to each one. You have the option of replacing the word\r\n" +#define CORR_C_HELP_5 "completely, or choosing one of the suggested words.\r\n" + /* You may add HELP_6 through HELP_9 if your language needs more lines */ +#define CORR_C_HELP_6 "" +#define CORR_C_HELP_7 "" +#define CORR_C_HELP_8 "" +#define CORR_C_HELP_9 "" +#define CORR_C_HELP_COMMANDS "\r\nCommands are:\r\n\r\n" +#define CORR_C_HELP_R_CMD "R Replace the misspelled word completely.\r\n" +#define CORR_C_HELP_BLANK "Space Accept the word this time only.\r\n" +#define CORR_C_HELP_A_CMD "A Accept the word for the rest of this session.\r\n" +#define CORR_C_HELP_I_CMD "I Accept the word, and put it in your private dictionary.\r\n" +#define CORR_C_HELP_U_CMD "U Accept and add lowercase version to private dictionary.\r\n" +#define CORR_C_HELP_0_CMD "0-n Replace with one of the suggested words.\r\n" +#define CORR_C_HELP_L_CMD "L Look up words in system dictionary.\r\n" +#define CORR_C_HELP_X_CMD "X Write the rest of this file, ignoring misspellings,\r\n and start next file.\r\n" +#define CORR_C_HELP_Q_CMD "Q Quit immediately. Asks for confirmation.\r\n Leaves file unchanged.\r\n" +#define CORR_C_HELP_BANG "! Shell escape.\r\n" +#define CORR_C_HELP_REDRAW "^L Redraw screen.\r\n" +#define CORR_C_HELP_SUSPEND "^Z Suspend program.\r\n" +#define CORR_C_HELP_HELP "? Show this help screen.\r\n" +#define CORR_C_HELP_TYPE_SPACE "-- Type space to continue --" + +#define CORR_C_FILE_LABEL " File: %s" +#define CORR_C_READONLY "[READONLY]" +#define CORR_C_MINI_MENU "[SP] <number> R)epl A)ccept I)nsert L)ookup U)ncap Q)uit e(X)it or ? for help\r\n" +#define CORR_C_CONFIRM_QUIT "Are you sure you want to throw away your changes? " +#define CORR_C_REPLACE_WITH "Replace with: " +#define CORR_C_LOOKUP_PROMPT "Lookup string ('*' is wildcard): " +#define CORR_C_MORE_PROMPT "-- more --" +#define CORR_C_BLANK_MORE "\r \r" +#define CORR_C_END_LOOK "--end--" + +/* + * The following strings are used in defmt.c: + */ +#define DEFMT_C_TEX_MATH_ERROR "****ERROR in parsing TeX math mode!\r\n" +#define DEFMT_C_LR_MATH_ERROR "***ERROR in LR to math-mode switch.\n" + +/* + * The following strings are used in icombine.c: + */ +#define ICOMBINE_C_BAD_TYPE "icombine: unrecognized formatter type '%s'\n" +#define ICOMBINE_C_USAGE "Usage: icombine [-T suffix] [aff-file] < wordlist\n" + +/* + * The following strings are used in ispell.c: + */ +#define ISPELL_C_USAGE1 "Usage: %s [-dfile | -pfile | -wchars | -Wn | -t | -n | -x | -b | -S | -B | -C | -P | -m | -Lcontext | -M | -N | -Ttype | -V] file .....\n" +#define ISPELL_C_USAGE2 " %s [-dfile | -pfile | -wchars | -Wn | -t | -n | -Ttype] -l\n" +#ifndef USG +#define ISPELL_C_USAGE3 " %s [-dfile | -pfile | -ffile | -Wn | -t | -n | -s | -B | -C | -P | -m | -Ttype] {-a | -A}\n" +#else +#define ISPELL_C_USAGE3 " %s [-dfile | -pfile | -ffile | -Wn | -t | -n | -B | -C | -P | -m | -Ttype] {-a | -A}\n" +#endif +#define ISPELL_C_USAGE4 " %s [-dfile] [-wchars | -Wn] -c\n" +#define ISPELL_C_USAGE5 " %s [-dfile] [-wchars] -e[1-4]\n" +#define ISPELL_C_USAGE6 " %s [-dfile] [-wchars] -D\n" +#define ISPELL_C_USAGE7 " %s -v\n" +#define ISPELL_C_TEMP_DISAPPEARED "temporary file disappeared (%s)\r\n" +#define ISPELL_C_BAD_TYPE "ispell: unrecognized formatter type '%s'\n" +#define ISPELL_C_NO_FILE "ispell: specified file does not exist\n" +#define ISPELL_C_NO_FILES "ispell: specified files do not exist\n" +#define ISPELL_C_CANT_WRITE "Warning: Can't write to %s\r\n" +#define ISPELL_C_OPTIONS_ARE "Compiled-in options:\n" + +/* + * The following strings are used in lookup.c: + */ +#define LOOKUP_C_CANT_READ "Trouble reading hash table %s\r\n" +#define LOOKUP_C_NULL_HASH "Null hash table %s\r\n" +#define LOOKUP_C_SHORT_HASH(name, gotten, wanted) \ + "Truncated hash table %s: got %d bytes, expected %d\r\n", \ + name, gotten, wanted +#define LOOKUP_C_BAD_MAGIC(name, wanted, gotten) \ + "Illegal format hash table %s - expected magic 0x%x, got 0x%x\r\n", \ + name, wanted, gotten +#define LOOKUP_C_BAD_MAGIC2(name, wanted, gotten) \ + "Illegal format hash table %s - expected magic2 0x%x, got 0x%x\r\n", \ + name, wanted, gotten +#define LOOKUP_C_BAD_OPTIONS(gotopts, gotchars, gotlen, wantedopts, wantedchars, wantedlen) \ + "Hash table options don't agree with buildhash - 0x%x/%d/%d vs. 0x%x/%d/%d\r\n", \ + gotopts, gotchars, gotlen, \ + wantedopts, wantedchars, wantedlen +#define LOOKUP_C_NO_HASH_SPACE "Couldn't allocate space for hash table\r\n" +#define LOOKUP_C_BAD_FORMAT "Illegal format hash table\r\n" +#define LOOKUP_C_NO_LANG_SPACE "Couldn't allocate space for language tables\r\n" + +/* + * The following strings are used in makedent.c: + */ +#define MAKEDENT_C_NO_WORD_SPACE "\r\nCouldn't allocate space for word '%s'\r\n" +#define MAKEDENT_C_BAD_WORD_CHAR "\r\nWord '%s' contains illegal characters\r\n" + +/* + * The following strings are used in parse.y: + */ +#define PARSE_Y_8_BIT "Eighth bit ignored (recompile ispell without NO8BIT)" +#define PARSE_Y_NO_WORD_STRINGS "wordchars statement may not specify string characters" +#define PARSE_Y_UNMATCHED "Unmatched charset lengths" +#define PARSE_Y_NO_BOUNDARY_STRINGS "boundarychars statement may not specify string characters" +#define PARSE_Y_LONG_STRING "String character is too long" +#define PARSE_Y_NULL_STRING "String character must have nonzero length" +#define PARSE_Y_MANY_STRINGS "Too many string characters" +#define PARSE_Y_NO_SUCH_STRING "No such string character" +#define PARSE_Y_MULTIPLE_STRINGS "Alternate string character was already defined" +#define PARSE_Y_LENGTH_MISMATCH "Upper and lower versions of string character must be same length" +#define PARSE_Y_WRONG_NROFF "Incorrect character count in nroffchars statement" +#define PARSE_Y_WRONG_TEX "Incorrect character count in TeXchars statement" +#define PARSE_Y_DOUBLE_COMPOUND "Compoundwords option may only appear once" +#define PARSE_Y_LONG_FLAG "Flag must be single character" +#define PARSE_Y_BAD_FLAG "Flag must be alphabetic" +#define PARSE_Y_DUP_FLAG "Duplicate flag" +#define PARSE_Y_NO_SPACE "Out of memory" +#define PARSE_Y_NEED_BLANK "Single characters must be separated by a blank" +#define PARSE_Y_MANY_CONDS "Too many conditions; 8 maximum" +#define PARSE_Y_EOF "Unexpected EOF in quoted string" +#define PARSE_Y_LONG_QUOTE "Quoted string too long, max 256 characters" +#define PARSE_Y_ERROR_FORMAT(file, lineno, error) \ + "%s line %d: %s\n", file, lineno, error +#define PARSE_Y_MALLOC_TROUBLE "yyopen: trouble allocating memory\n" +#define PARSE_Y_UNGRAB_PROBLEM "Internal error: ungrab buffer overflow" +#define PARSE_Y_BAD_DEFORMATTER "Deformatter must be either 'nroff' or 'tex'" +#define PARSE_Y_BAD_NUMBER "Illegal digit in number" + +/* + * The following strings are used in term.c: + */ +#define TERM_C_SMALL_SCREEN "Screen too small: need at least %d lines\n" +#define TERM_C_NO_BATCH "Can't deal with non-interactive use yet.\n" +#define TERM_C_CANT_FORK "Couldn't fork, try later.\r\n" +#define TERM_C_TYPE_SPACE "\n-- Type space to continue --" + +/* + * The following strings are used in tree.c: + */ +#define TREE_C_CANT_UPDATE "Warning: Cannot update personal dictionary (%s)\r\n" +#define TREE_C_NO_SPACE "Ran out of space for personal dictionary\r\n" +#define TREE_C_TRY_ANYWAY "Continuing anyway (with reduced performance).\r\n" + +/* + * The following strings are used in unsq.c: + */ +#define UNSQ_C_BAD_COUNT "Illegal count character 0x%x\n" +#define UNSQ_C_SURPRISE_EOF "Unexpected EOF\n" diff --git a/tdespell2/plugins/ispell/sp_spell.h b/tdespell2/plugins/ispell/sp_spell.h new file mode 100644 index 000000000..9e1d7b903 --- /dev/null +++ b/tdespell2/plugins/ispell/sp_spell.h @@ -0,0 +1,60 @@ +/* vim: set sw=8: -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* enchant + * Copyright (C) 2003 Dom Lachowicz + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * In addition, as a special exception, Dom Lachowicz + * gives permission to link the code of this program with + * non-LGPL Spelling Provider libraries (eg: a MSFT Office + * spell checker backend) and distribute linked combinations including + * the two. You must obey the GNU Lesser General Public License in all + * respects for all of the code used other than said providers. If you modify + * this file, you may extend this exception to your version of the + * file, but you are not obligated to do so. If you do not wish to + * do so, delete this exception statement from your version.* + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#ifndef SPELL_H +#define SPELL_H + +/* + TODO stuff we need to do for this spell module: + + eliminate all the stderr fprintfs + rip out the support for ICHAR_IS_CHAR +*/ + +#ifdef __cplusplus +extern "C" +{ +#endif + +typedef struct _sp_suggestions { + int count; + short *score; + unsigned short **word; +} sp_suggestions; + +int SpellCheckInit(char *hashname); +void SpellCheckCleanup(void); +int SpellCheckNWord16(const unsigned short *word16, int length); +int SpellCheckSuggestNWord16(const unsigned short *word16, int length, sp_suggestions *sg); + +#ifdef __cplusplus +} +#endif + +#endif /* SPELL_H */ diff --git a/tdespell2/plugins/ispell/tdespell_ispell.desktop b/tdespell2/plugins/ispell/tdespell_ispell.desktop new file mode 100644 index 000000000..4b1aebcb7 --- /dev/null +++ b/tdespell2/plugins/ispell/tdespell_ispell.desktop @@ -0,0 +1,22 @@ +[Desktop Entry] +Type=Service +ServiceTypes=KSpell/Client +X-TDE-Library=tdespell_ispell +X-TDE-PluginInfo-Author=Zack Rusin +X-TDE-PluginInfo-Email=zack@kde.org +X-TDE-PluginInfo-Name=tdespell_ispell +X-TDE-PluginInfo-Version=0.0.1 +X-TDE-PluginInfo-Website=http://www.kde.org +X-TDE-PluginInfo-Category=Clients +X-TDE-PluginInfo-Depends= +X-TDE-PluginInfo-License=LGPL +X-TDE-PluginInfo-EnabledByDefault=true +Name=ISpell +Name[bn]=আই-স্পেল +Name[hi]=आई-स्पैल +Name[it]=Ispell +Name[ne]=आई स्पेल +Name[sv]=Ispell +Name[ta]=psதேர்ந்தெடு +Name[te]=ఐస్పెల్ +Name[tg]=psselect diff --git a/tdespell2/plugins/ispell/tdespell_ispellclient.cpp b/tdespell2/plugins/ispell/tdespell_ispellclient.cpp new file mode 100644 index 000000000..1f2e9c8ba --- /dev/null +++ b/tdespell2/plugins/ispell/tdespell_ispellclient.cpp @@ -0,0 +1,54 @@ +/* + * tdespell_aspellclient.cpp + * + * Copyright (C) 2003 Zack Rusin <zack@kde.org> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA + */ +#include "tdespell_ispellclient.h" + +#include "tdespell_ispelldict.h" +#include "ispell_checker.h" + +#include <kgenericfactory.h> +#include <kdebug.h> + +typedef KGenericFactory<ISpellClient> ISpellClientFactory; +K_EXPORT_COMPONENT_FACTORY( tdespell_ispell, ISpellClientFactory( "tdespell_ispell" ) ) + +using namespace KSpell2; + +ISpellClient::ISpellClient( TQObject *parent, const char *name, const TQStringList& /* args */ ) + : Client( parent, name ) +{ +} + +ISpellClient::~ISpellClient() +{ +} + +Dictionary* ISpellClient::dictionary( const TQString& language ) +{ + ISpellDict *ad = new ISpellDict( language ); + return ad; +} + +TQStringList ISpellClient::languages() const +{ + return ISpellChecker::allDics(); +} + +#include "tdespell_ispellclient.moc" diff --git a/tdespell2/plugins/ispell/tdespell_ispellclient.h b/tdespell2/plugins/ispell/tdespell_ispellclient.h new file mode 100644 index 000000000..06cbe0bab --- /dev/null +++ b/tdespell2/plugins/ispell/tdespell_ispellclient.h @@ -0,0 +1,56 @@ +/* + * tdespell_ispellclient.h + * + * Copyright (C) 2003 Zack Rusin <zack@kde.org> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA + */ +#ifndef KSPELL_ISPELLCLIENT_H +#define KSPELL_ISPELLCLIENT_H + +#include "client.h" +#include <tqobject.h> + +#include "ispell_checker.h" + +namespace KSpell2 { + class Dictionary; +} +using KSpell2::Dictionary; + +class ISpellClient : public KSpell2::Client +{ + Q_OBJECT +public: + ISpellClient( TQObject *parent, const char *name, const TQStringList & /* args */ ); + ~ISpellClient(); + + virtual int reliability() const { + return 10; + } + + virtual Dictionary* dictionary( const TQString& language ); + + virtual TQStringList languages() const; + + virtual TQString name() const { + return "ISpell"; + } +private: + +}; + +#endif diff --git a/tdespell2/plugins/ispell/tdespell_ispelldict.cpp b/tdespell2/plugins/ispell/tdespell_ispelldict.cpp new file mode 100644 index 000000000..089fabc9d --- /dev/null +++ b/tdespell2/plugins/ispell/tdespell_ispelldict.cpp @@ -0,0 +1,76 @@ +/** + * tdespell_aspelldict.cpp + * + * Copyright (C) 2003 Zack Rusin <zack@kde.org> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA + */ +#include "tdespell_ispelldict.h" + +#include <kdebug.h> + +#include "ispell_checker.h" + +using namespace KSpell2; + +ISpellDict::ISpellDict( const TQString& lang ) + : Dictionary( lang ) +{ + m_checker = new ISpellChecker(); + + if ( !m_checker->requestDictionary( lang.latin1() ) ) { + kdError()<<"Language \""<< lang << "\" doesn't exist for Ispell"<<endl; + } +} + +ISpellDict::~ISpellDict() +{ +} + +bool ISpellDict::check( const TQString& word ) +{ + return m_checker->checkWord( word ); +} + +TQStringList ISpellDict::suggest( const TQString& word ) +{ + return m_checker->suggestWord( word ); +} + +bool ISpellDict::checkAndSuggest( const TQString& word, + TQStringList& suggestions ) +{ + bool c = check( word ); + if ( c ) + suggestions = suggest( word ); + return c; +} + +bool ISpellDict::storeReplacement( const TQString& , + const TQString& ) +{ + return false; +} + +bool ISpellDict::addToPersonal( const TQString& ) +{ + return false; +} + +bool ISpellDict::addToSession( const TQString& ) +{ + return false; +} diff --git a/tdespell2/plugins/ispell/tdespell_ispelldict.h b/tdespell2/plugins/ispell/tdespell_ispelldict.h new file mode 100644 index 000000000..7513d094c --- /dev/null +++ b/tdespell2/plugins/ispell/tdespell_ispelldict.h @@ -0,0 +1,49 @@ +/** + * tdespell_ispelldict.h + * + * Copyright (C) 2003 Zack Rusin <zack@kde.org> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA + */ +#ifndef KSPELL_ASPELLDICT_H +#define KSPELL_ASPELLDICT_H + +#include "dictionary.h" + +class ISpellChecker; + +class ISpellDict : public KSpell2::Dictionary +{ +public: + ISpellDict( const TQString& lang ); + ~ISpellDict(); + virtual bool check( const TQString& word ); + + virtual TQStringList suggest( const TQString& word ); + + virtual bool checkAndSuggest( const TQString& word, + TQStringList& suggestions ) ; + + virtual bool storeReplacement( const TQString& bad, + const TQString& good ); + + virtual bool addToPersonal( const TQString& word ); + virtual bool addToSession( const TQString& word ); +private: + ISpellChecker *m_checker; +}; + +#endif diff --git a/tdespell2/plugins/ispell/tgood.cpp b/tdespell2/plugins/ispell/tgood.cpp new file mode 100644 index 000000000..06fbc99ef --- /dev/null +++ b/tdespell2/plugins/ispell/tgood.cpp @@ -0,0 +1,810 @@ +/* vim: set sw=8: -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* enchant + * Copyright (C) 2003 Dom Lachowicz + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + * In addition, as a special exception, Dom Lachowicz + * gives permission to link the code of this program with + * non-LGPL Spelling Provider libraries (eg: a MSFT Office + * spell checker backend) and distribute linked combinations including + * the two. You must obey the GNU Lesser General Public License in all + * respects for all of the code used other than said providers. If you modify + * this file, you may extend this exception to your version of the + * file, but you are not obligated to do so. If you do not wish to + * do so, delete this exception statement from your version. + */ + +/* + * Copyright 1987, 1988, 1989, 1992, 1993, Geoff Kuenning, Granada Hills, CA + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All modifications to the source code must be clearly marked as + * such. Binary redistributions based on modified source code + * must be clearly marked as modified versions in the documentation + * and/or other materials provided with the distribution. + * 4. All advertising materials mentioning features or use of this software + * must display the following acknowledgment: + * This product includes software developed by Geoff Kuenning and + * other unpaid contributors. + * 5. The name of Geoff Kuenning may not be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY GEOFF KUENNING AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL GEOFF KUENNING OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Table-driven version of good.c. + * + * Geoff Kuenning, July 1987 + */ + +/* + * $Log$ + * Revision 1.1 2004/01/31 16:44:12 zrusin + * ISpell plugin. + * + * Revision 1.4 2003/08/14 17:51:29 dom + * update license - exception clause should be Lesser GPL + * + * Revision 1.3 2003/07/28 20:40:28 dom + * fix up the license clause, further win32-registry proof some directory getting functions + * + * Revision 1.2 2003/07/16 22:52:56 dom + * LGPL + exception license + * + * Revision 1.1 2003/07/15 01:15:09 dom + * ispell enchant backend + * + * Revision 1.2 2003/01/29 05:50:12 hippietrail + * + * Fixed my mess in EncodingManager. + * Changed many C casts to C++ casts. + * + * Revision 1.1 2003/01/24 05:52:36 hippietrail + * + * Refactored ispell code. Old ispell global variables had been put into + * an allocated structure, a pointer to which was passed to many functions. + * I have now made all such functions and variables private members of the + * ISpellChecker class. It was C OO, now it's C++ OO. + * + * I've fixed the makefiles and tested compilation but am unable to test + * operation. Please back out my changes if they cause problems which + * are not obvious or easy to fix. + * + * Revision 1.6 2003/01/06 18:48:42 dom + * ispell cleanup, start of using new 'add' save features + * + * Revision 1.5 2002/09/19 05:31:20 hippietrail + * + * More Ispell cleanup. Conditional globals and DEREF macros are removed. + * K&R function declarations removed, converted to Doxygen style comments + * where possible. No code has been changed (I hope). Compiles for me but + * unable to test. + * + * Revision 1.4 2002/09/17 03:03:31 hippietrail + * + * After seeking permission on the developer list I've reformatted all the + * spelling source which seemed to have parts which used 2, 3, 4, and 8 + * spaces for tabs. It should all look good with our standard 4-space + * tabs now. + * I've concentrated just on indentation in the actual code. More prettying + * could be done. + * * NO code changes were made * + * + * Revision 1.3 2002/09/13 17:20:14 mpritchett + * Fix more warnings for Linux build + * + * Revision 1.2 2001/05/12 16:05:42 thomasf + * Big pseudo changes to ispell to make it pass around a structure rather + * than rely on all sorts of gloabals willy nilly here and there. Also + * fixed our spelling class to work with accepting suggestions once more. + * This code is dirty, gross and ugly (not to mention still not supporting + * multiple hash sized just yet) but it works on my machine and will no + * doubt break other machines. + * + * Revision 1.1 2001/04/15 16:01:24 tomas_f + * moving to spell/xp + * + * Revision 1.7 1999/10/20 06:03:56 sterwill + * Changed C++-style comments to C-style comments in C code. + * + * Revision 1.6 1999/10/20 03:19:35 paul + * Hacked ispell code to ignore any characters that don't fit in the lookup tables loaded from the dictionary. It ain't pretty, but at least we don't crash there any more. + * + * Revision 1.5 1999/04/13 17:12:51 jeff + * Applied "Darren O. Benham" <gecko@benham.net> spell check changes. + * Fixed crash on Win32 with the new code. + * + * Revision 1.4 1998/12/29 14:55:33 eric + * + * I've doctored the ispell code pretty extensively here. It is now + * warning-free on Win32. It also *works* on Win32 now, since I + * replaced all the I/O calls with ANSI standard ones. + * + * Revision 1.4 1998/12/29 14:55:33 eric + * + * I've doctored the ispell code pretty extensively here. It is now + * warning-free on Win32. It also *works* on Win32 now, since I + * replaced all the I/O calls with ANSI standard ones. + * + * Revision 1.3 1998/12/28 23:11:30 eric + * + * modified spell code and integration to build on Windows. + * This is still a hack. + * + * Actually, it doesn't yet WORK on Windows. It just builds. + * SpellCheckInit is failing for some reason. + * + * Revision 1.2 1998/12/28 22:16:22 eric + * + * These changes begin to incorporate the spell checker into AbiWord. Most + * of this is a hack. + * + * 1. added other/spell to the -I list in config/abi_defs + * 2. replaced other/spell/Makefile with one which is more like + * our build system. + * 3. added other/spell to other/Makefile so that the build will now + * dive down and build the spell check library. + * 4. added the AbiSpell library to the Makefiles in wp/main + * 5. added a call to SpellCheckInit in wp/main/unix/UnixMain.cpp. + * This call is a HACK and should be replaced with something + * proper later. + * 6. added code to fv_View.cpp as follows: + * whenever you double-click on a word, the spell checker + * verifies that word and prints its status to stdout. + * + * Caveats: + * 1. This will break the Windows build. I'm going to work on fixing it + * now. + * 2. This only works if your dictionary is in /usr/lib/ispell/american.hash. + * The dictionary location is currently hard-coded. This will be + * fixed as well. + * + * Anyway, such as it is, it works. + * + * Revision 1.1 1998/12/28 18:04:43 davet + * Spell checker code stripped from ispell. At this point, there are + * two external routines... the Init routine, and a check-a-word routine + * which returns a boolean value, and takes a 16 bit char string. + * The code resembles the ispell code as much as possible still. + * + * Revision 1.32 1994/11/02 06:56:16 geoff + * Remove the anyword feature, which I've decided is a bad idea. + * + * Revision 1.31 1994/10/25 05:46:25 geoff + * Add support for the FF_ANYWORD (affix applies to all words, even if + * flag bit isn't set) flag option. + * + * Revision 1.30 1994/05/24 06:23:08 geoff + * Don't create a hit if "allhits" is clear and capitalization + * mismatches. This cures a bug where a word could be in the dictionary + * and yet not found. + * + * Revision 1.29 1994/05/17 06:44:21 geoff + * Add support for controlled compound formation and the COMPOUNDONLY + * option to affix flags. + * + * Revision 1.28 1994/01/25 07:12:13 geoff + * Get rid of all old RCS log lines in preparation for the 3.1 release. + * + */ + +#include <ctype.h> +#include <stdlib.h> +#include <string.h> + +#include "ispell_checker.h" + +/*! + * Check possible affixes + * + * \param word Word to be checked + * \param ucword Upper-case-only copy of word + * \param len The length of word/ucword + * \param ignoreflagbits Ignore whether affix is legal + * \param allhits Keep going after first hit + * \param pfxopts Options to apply to prefixes + * \param sfxopts Options to apply to suffixes + */ +void ISpellChecker::chk_aff (ichar_t *word, ichar_t *ucword, + int len, int ignoreflagbits, int allhits, int pfxopts, int sfxopts) +{ + register ichar_t * cp; /* Pointer to char to index on */ + struct flagptr * ind; /* Flag index table to test */ + + pfx_list_chk (word, ucword, len, pfxopts, sfxopts, &m_pflagindex[0], + ignoreflagbits, allhits); + cp = ucword; + /* HACK: bail on unrecognized chars */ + if (*cp >= (SET_SIZE + MAXSTRINGCHARS)) + return; + ind = &m_pflagindex[*cp++]; + while (ind->numents == 0 && ind->pu.fp != NULL) + { + if (*cp == 0) + return; + if (ind->pu.fp[0].numents) + { + pfx_list_chk (word, ucword, len, pfxopts, sfxopts, &ind->pu.fp[0], + ignoreflagbits, allhits); + if (m_numhits && !allhits && /* !cflag && */ !ignoreflagbits) + return; + } + /* HACK: bail on unrecognized chars */ + if (*cp >= (SET_SIZE + MAXSTRINGCHARS)) + return; + ind = &ind->pu.fp[*cp++]; + } + pfx_list_chk (word, ucword, len, pfxopts, sfxopts, ind, ignoreflagbits, + allhits); + if (m_numhits && !allhits && /* !cflag &&*/ !ignoreflagbits) + return; + chk_suf (word, ucword, len, sfxopts, static_cast<struct flagent *>(NULL), + ignoreflagbits, allhits); +} + +/*! + * Check some prefix flags + * + * \param word Word to be checked + * \param ucword Upper-case-only word + * \param len The length of ucword + * \param optflags Options to apply + * \param sfxopts Options to apply to suffixes + * \param ind Flag index table + * \param ignoreflagbits Ignore whether affix is legal + * \param allhits Keep going after first hit + * */ +void ISpellChecker::pfx_list_chk (ichar_t *word, ichar_t *ucword, int len, int optflags, + int sfxopts, struct flagptr * ind, int ignoreflagbits, int allhits) +{ + int cond; /* Condition number */ + register ichar_t * cp; /* Pointer into end of ucword */ + struct dent * dent; /* Dictionary entry we found */ + int entcount; /* Number of entries to process */ + register struct flagent * + flent; /* Current table entry */ + int preadd; /* Length added to tword2 as prefix */ + register int tlen; /* Length of tword */ + ichar_t tword[INPUTWORDLEN + 4 * MAXAFFIXLEN + 4]; /* Tmp cpy */ + ichar_t tword2[sizeof tword]; /* 2nd copy for ins_root_cap */ + + for (flent = ind->pu.ent, entcount = ind->numents; + entcount > 0; + flent++, entcount--) + { + /* + * If this is a compound-only affix, ignore it unless we're + * looking for that specific thing. + */ + if ((flent->flagflags & FF_COMPOUNDONLY) != 0 + && (optflags & FF_COMPOUNDONLY) == 0) + continue; + + /* + * See if the prefix matches. + */ + tlen = len - flent->affl; + if (tlen > 0 + && (flent->affl == 0 + || icharncmp (flent->affix, ucword, flent->affl) == 0) + && tlen + flent->stripl >= flent->numconds) + { + /* + * The prefix matches. Remove it, replace it by the "strip" + * string (if any), and check the original conditions. + */ + if (flent->stripl) + icharcpy (tword, flent->strip); + icharcpy (tword + flent->stripl, ucword + flent->affl); + cp = tword; + for (cond = 0; cond < flent->numconds; cond++) + { + if ((flent->conds[*cp++] & (1 << cond)) == 0) + break; + } + if (cond >= flent->numconds) + { + /* + * The conditions match. See if the word is in the + * dictionary. + */ + tlen += flent->stripl; + + if (ignoreflagbits) + { + if ((dent = ispell_lookup (tword, 1)) != NULL) + { + cp = tword2; + if (flent->affl) + { + icharcpy (cp, flent->affix); + cp += flent->affl; + *cp++ = '+'; + } + preadd = cp - tword2; + icharcpy (cp, tword); + cp += tlen; + if (flent->stripl) + { + *cp++ = '-'; + icharcpy (cp, flent->strip); + } + } + } + else if ((dent = ispell_lookup (tword, 1)) != NULL + && TSTMASKBIT (dent->mask, flent->flagbit)) + { + if (m_numhits < MAX_HITS) + { + m_hits[m_numhits].dictent = dent; + m_hits[m_numhits].prefix = flent; + m_hits[m_numhits].suffix = NULL; + m_numhits++; + } + if (!allhits) + { +#ifndef NO_CAPITALIZATION_SUPPORT + if (cap_ok (word, &m_hits[0], len)) + return; + m_numhits = 0; +#else /* NO_CAPITALIZATION_SUPPORT */ + return; +#endif /* NO_CAPITALIZATION_SUPPORT */ + } + } + /* + * Handle cross-products. + */ + if (flent->flagflags & FF_CROSSPRODUCT) + chk_suf (word, tword, tlen, sfxopts | FF_CROSSPRODUCT, + flent, ignoreflagbits, allhits); + } + } + } +} + +/*! + * Check possible suffixes + * + * \param word Word to be checked + * \param ucword Upper-case-only word + * \param len The length of ucword + * \param optflags Affix option flags + * \param pfxent Prefix flag entry if cross-prod + * \param ignoreflagbits Ignore whether affix is legal + * \param allhits Keep going after first hit + */ +void +ISpellChecker::chk_suf (ichar_t *word, ichar_t *ucword, + int len, int optflags, struct flagent *pfxent, + int ignoreflagbits, int allhits) +{ + register ichar_t * cp; /* Pointer to char to index on */ + struct flagptr * ind; /* Flag index table to test */ + + suf_list_chk (word, ucword, len, &m_sflagindex[0], optflags, pfxent, + ignoreflagbits, allhits); + cp = ucword + len - 1; + /* HACK: bail on unrecognized chars */ + if (*cp >= (SET_SIZE + MAXSTRINGCHARS)) + return; + ind = &m_sflagindex[*cp]; + while (ind->numents == 0 && ind->pu.fp != NULL) + { + if (cp == ucword) + return; + if (ind->pu.fp[0].numents) + { + suf_list_chk (word, ucword, len, &ind->pu.fp[0], + optflags, pfxent, ignoreflagbits, allhits); + if (m_numhits != 0 && !allhits && /* !cflag && */ !ignoreflagbits) + return; + } + /* HACK: bail on unrecognized chars */ + if (*(cp-1) >= (SET_SIZE + MAXSTRINGCHARS)) + return; + ind = &ind->pu.fp[*--cp]; + } + suf_list_chk (word, ucword, len, ind, optflags, pfxent, + ignoreflagbits, allhits); +} + +/*! + * \param word Word to be checked + * \param ucword Upper-case-only word + * \param len The length of ucword + * \param ind Flag index table + * \param optflags Affix option flags + * \param pfxent Prefix flag entry if crossonly + * \param ignoreflagbits Ignore whether affix is legal + * \pram allhits Keep going after first hit + */ +void ISpellChecker::suf_list_chk (ichar_t *word, ichar_t *ucword, + int len, struct flagptr *ind, int optflags, + struct flagent *pfxent, int ignoreflagbits, int allhits) +{ + register ichar_t * cp; /* Pointer into end of ucword */ + int cond; /* Condition number */ + struct dent * dent; /* Dictionary entry we found */ + int entcount; /* Number of entries to process */ + register struct flagent * + flent; /* Current table entry */ + int preadd; /* Length added to tword2 as prefix */ + register int tlen; /* Length of tword */ + ichar_t tword[INPUTWORDLEN + 4 * MAXAFFIXLEN + 4]; /* Tmp cpy */ + ichar_t tword2[sizeof tword]; /* 2nd copy for ins_root_cap */ + + icharcpy (tword, ucword); + for (flent = ind->pu.ent, entcount = ind->numents; + entcount > 0; + flent++, entcount--) + { + if ((optflags & FF_CROSSPRODUCT) != 0 + && (flent->flagflags & FF_CROSSPRODUCT) == 0) + continue; + /* + * If this is a compound-only affix, ignore it unless we're + * looking for that specific thing. + */ + if ((flent->flagflags & FF_COMPOUNDONLY) != 0 + && (optflags & FF_COMPOUNDONLY) == 0) + continue; + + /* + * See if the suffix matches. + */ + tlen = len - flent->affl; + if (tlen > 0 + && (flent->affl == 0 + || icharcmp (flent->affix, ucword + tlen) == 0) + && tlen + flent->stripl >= flent->numconds) + { + /* + * The suffix matches. Remove it, replace it by the "strip" + * string (if any), and check the original conditions. + */ + icharcpy (tword, ucword); + cp = tword + tlen; + if (flent->stripl) + { + icharcpy (cp, flent->strip); + tlen += flent->stripl; + cp = tword + tlen; + } + else + *cp = '\0'; + for (cond = flent->numconds; --cond >= 0; ) + { + if ((flent->conds[*--cp] & (1 << cond)) == 0) + break; + } + if (cond < 0) + { + /* + * The conditions match. See if the word is in the + * dictionary. + */ + if (ignoreflagbits) + { + if ((dent = ispell_lookup (tword, 1)) != NULL) + { + cp = tword2; + if ((optflags & FF_CROSSPRODUCT) + && pfxent->affl != 0) + { + icharcpy (cp, pfxent->affix); + cp += pfxent->affl; + *cp++ = '+'; + } + preadd = cp - tword2; + icharcpy (cp, tword); + cp += tlen; + if ((optflags & FF_CROSSPRODUCT) + && pfxent->stripl != 0) + { + *cp++ = '-'; + icharcpy (cp, pfxent->strip); + cp += pfxent->stripl; + } + if (flent->stripl) + { + *cp++ = '-'; + icharcpy (cp, flent->strip); + cp += flent->stripl; + } + if (flent->affl) + { + *cp++ = '+'; + icharcpy (cp, flent->affix); + cp += flent->affl; + } + } + } + else if ((dent = ispell_lookup (tword, 1)) != NULL + && TSTMASKBIT (dent->mask, flent->flagbit) + && ((optflags & FF_CROSSPRODUCT) == 0 + || TSTMASKBIT (dent->mask, pfxent->flagbit))) + { + if (m_numhits < MAX_HITS) + { + m_hits[m_numhits].dictent = dent; + m_hits[m_numhits].prefix = pfxent; + m_hits[m_numhits].suffix = flent; + m_numhits++; + } + if (!allhits) + { +#ifndef NO_CAPITALIZATION_SUPPORT + if (cap_ok (word, &m_hits[0], len)) + return; + m_numhits = 0; +#else /* NO_CAPITALIZATION_SUPPORT */ + return; +#endif /* NO_CAPITALIZATION_SUPPORT */ + } + } + } + } + } +} + +/*! + * Expand a dictionary prefix entry + * + * \param croot Char version of rootword + * \param rootword Root word to expand + * \param mask Mask bits to expand on + * \param option Option, see expandmode + * \param extra Extra info to add to line + * + * \return + */ +int ISpellChecker::expand_pre (char *croot, ichar_t *rootword, MASKTYPE mask[], + int option, char *extra) +{ + int entcount; /* No. of entries to process */ + int explength; /* Length of expansions */ + register struct flagent * + flent; /* Current table entry */ + + for (flent = m_pflaglist, entcount = m_numpflags, explength = 0; + entcount > 0; + flent++, entcount--) + { + if (TSTMASKBIT (mask, flent->flagbit)) + explength += + pr_pre_expansion (croot, rootword, flent, mask, option, extra); + } + return explength; +} + +/*! + * Print a prefix expansion + * + * \param croot Char version of rootword + * \param rootword Root word to expand + * \param flent Current table entry + * \param mask Mask bits to expand on + * \param option Option, see expandmode + * \param extra Extra info to add to line + * + * \return + */ +int ISpellChecker::pr_pre_expansion ( char *croot, ichar_t *rootword, + struct flagent *flent, MASKTYPE mask[], int option, + char *extra) +{ + int cond; /* Current condition number */ + register ichar_t * nextc; /* Next case choice */ + int tlen; /* Length of tword */ + ichar_t tword[INPUTWORDLEN + MAXAFFIXLEN]; /* Temp */ + + tlen = icharlen (rootword); + if (flent->numconds > tlen) + return 0; + tlen -= flent->stripl; + if (tlen <= 0) + return 0; + tlen += flent->affl; + for (cond = 0, nextc = rootword; cond < flent->numconds; cond++) + { + if ((flent->conds[mytoupper (*nextc++)] & (1 << cond)) == 0) + return 0; + } + /* + * The conditions are satisfied. Copy the word, add the prefix, + * and make it the proper case. This code is carefully written + * to match that ins_cap and cap_ok. Note that the affix, as + * inserted, is uppercase. + * + * There is a tricky bit here: if the root is capitalized, we + * want a capitalized result. If the root is followcase, however, + * we want to duplicate the case of the first remaining letter + * of the root. In other words, "Loved/U" should generate "Unloved", + * but "LOved/U" should generate "UNLOved" and "lOved/U" should + * produce "unlOved". + */ + if (flent->affl) + { + icharcpy (tword, flent->affix); + nextc = tword + flent->affl; + } + icharcpy (nextc, rootword + flent->stripl); + if (myupper (rootword[0])) + { + /* We must distinguish followcase from capitalized and all-upper */ + for (nextc = rootword + 1; *nextc; nextc++) + { + if (!myupper (*nextc)) + break; + } + if (*nextc) + { + /* It's a followcase or capitalized word. Figure out which. */ + for ( ; *nextc; nextc++) + { + if (myupper (*nextc)) + break; + } + if (*nextc) + { + /* It's followcase. */ + if (!myupper (tword[flent->affl])) + forcelc (tword, flent->affl); + } + else + { + /* It's capitalized */ + forcelc (tword + 1, tlen - 1); + } + } + } + else + { + /* Followcase or all-lower, we don't care which */ + if (!myupper (*nextc)) + forcelc (tword, flent->affl); + } + if (option == 3) + printf ("\n%s", croot); + if (option != 4) + printf (" %s%s", ichartosstr (tword, 1), extra); + if (flent->flagflags & FF_CROSSPRODUCT) + return tlen + + expand_suf (croot, tword, mask, FF_CROSSPRODUCT, option, extra); + else + return tlen; +} + +/*! + * Expand a dictionary suffix entry + * + * \param croot Char version of rootword + * \param rootword Root word to expand + * \param mask Mask bits to expand on + * \param optflags Affix option flags + * \param option Option, see expandmode + * \param extra Extra info to add to line + * + * \return + */ +int ISpellChecker::expand_suf (char *croot, ichar_t *rootword, MASKTYPE mask[], + int optflags, int option, char *extra) +{ + int entcount; /* No. of entries to process */ + int explength; /* Length of expansions */ + register struct flagent * + flent; /* Current table entry */ + + for (flent = m_sflaglist, entcount = m_numsflags, explength = 0; + entcount > 0; + flent++, entcount--) + { + if (TSTMASKBIT (mask, flent->flagbit)) + { + if ((optflags & FF_CROSSPRODUCT) == 0 + || (flent->flagflags & FF_CROSSPRODUCT)) + explength += + pr_suf_expansion (croot, rootword, flent, option, extra); + } + } + return explength; +} + +/*! + * Print a suffix expansion + * + * \param croot Char version of rootword + * \param rootword Root word to expand + * \param flent Current table entry + * \param option Option, see expandmode + * \param extra Extra info to add to line + * + * \return + */ +int ISpellChecker::pr_suf_expansion (char *croot, ichar_t *rootword, + struct flagent *flent, int option, char *extra) +{ + int cond; /* Current condition number */ + register ichar_t * nextc; /* Next case choice */ + int tlen; /* Length of tword */ + ichar_t tword[INPUTWORDLEN + MAXAFFIXLEN]; /* Temp */ + + tlen = icharlen (rootword); + cond = flent->numconds; + if (cond > tlen) + return 0; + if (tlen - flent->stripl <= 0) + return 0; + for (nextc = rootword + tlen; --cond >= 0; ) + { + if ((flent->conds[mytoupper (*--nextc)] & (1 << cond)) == 0) + return 0; + } + /* + * The conditions are satisfied. Copy the word, add the suffix, + * and make it match the case of the last remaining character of the + * root. Again, this code carefully matches ins_cap and cap_ok. + */ + icharcpy (tword, rootword); + nextc = tword + tlen - flent->stripl; + if (flent->affl) + { + icharcpy (nextc, flent->affix); + if (!myupper (nextc[-1])) + forcelc (nextc, flent->affl); + } + else + *nextc = 0; + if (option == 3) + printf ("\n%s", croot); + if (option != 4) + printf (" %s%s", ichartosstr (tword, 1), extra); + return tlen + flent->affl - flent->stripl; +} + +/*! + * \param dst Destination to modify + * \param len Length to copy + */ +void ISpellChecker::forcelc (ichar_t *dst, int len) /* Force to lowercase */ +{ + + for ( ; --len >= 0; dst++) + *dst = mytolower (*dst); +} |