diff options
Diffstat (limited to 'kspell2/plugins/ispell/makedent.cpp')
-rw-r--r-- | kspell2/plugins/ispell/makedent.cpp | 972 |
1 files changed, 0 insertions, 972 deletions
diff --git a/kspell2/plugins/ispell/makedent.cpp b/kspell2/plugins/ispell/makedent.cpp deleted file mode 100644 index 9c168dc17..000000000 --- a/kspell2/plugins/ispell/makedent.cpp +++ /dev/null @@ -1,972 +0,0 @@ -/* vim: set sw=8: -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ -/* enchant - * Copyright (C) 2003 Dom Lachowicz - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the - * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - * Boston, MA 02110-1301, USA. - * - * In addition, as a special exception, Dom Lachowicz - * gives permission to link the code of this program with - * non-LGPL Spelling Provider libraries (eg: a MSFT Office - * spell checker backend) and distribute linked combinations including - * the two. You must obey the GNU Lesser General Public License in all - * respects for all of the code used other than said providers. If you modify - * this file, you may extend this exception to your version of the - * file, but you are not obligated to do so. If you do not wish to - * do so, delete this exception statement from your version. - */ - -/* - * Copyright 1988, 1989, 1992, 1993, Geoff Kuenning, Granada Hills, CA - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All modifications to the source code must be clearly marked as - * such. Binary redistributions based on modified source code - * must be clearly marked as modified versions in the documentation - * and/or other materials provided with the distribution. - * 4. All advertising materials mentioning features or use of this software - * must display the following acknowledgment: - * This product includes software developed by Geoff Kuenning and - * other unpaid contributors. - * 5. The name of Geoff Kuenning may not be used to endorse or promote - * products derived from this software without specific prior - * written permission. - * - * THIS SOFTWARE IS PROVIDED BY GEOFF KUENNING AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL GEOFF KUENNING OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -/* - * $Log$ - * Revision 1.2 2004/02/01 04:46:46 zrusin - * Both ispell and aspell plugins are not working properly. We can start switching. - * - * Revision 1.1 2004/01/31 16:44:12 zrusin - * ISpell plugin. - * - * Revision 1.4 2003/08/14 17:51:28 dom - * update license - exception clause should be Lesser GPL - * - * Revision 1.3 2003/07/28 20:40:27 dom - * fix up the license clause, further win32-registry proof some directory getting functions - * - * Revision 1.2 2003/07/16 22:52:49 dom - * LGPL + exception license - * - * Revision 1.1 2003/07/15 01:15:08 dom - * ispell enchant backend - * - * Revision 1.3 2003/02/12 02:10:38 hippietrail - * - * C casts -> C++ casts - * Improved const-correctness due to changing casts - * Fixed some warnings - * - * Revision 1.2 2003/01/29 05:50:12 hippietrail - * - * Fixed my mess in EncodingManager. - * Changed many C casts to C++ casts. - * - * Revision 1.1 2003/01/24 05:52:35 hippietrail - * - * Refactored ispell code. Old ispell global variables had been put into - * an allocated structure, a pointer to which was passed to many functions. - * I have now made all such functions and variables private members of the - * ISpellChecker class. It was C OO, now it's C++ OO. - * - * I've fixed the makefiles and tested compilation but am unable to test - * operation. Please back out my changes if they cause problems which - * are not obvious or easy to fix. - * - * Revision 1.8 2003/01/06 18:48:40 dom - * ispell cleanup, start of using new 'add' save features - * - * Revision 1.7 2003/01/04 19:09:04 dom - * some tidying... bug pissing me off... - * - * Revision 1.6 2002/09/19 05:31:18 hippietrail - * - * More Ispell cleanup. Conditional globals and DEREF macros are removed. - * K&R function declarations removed, converted to Doxygen style comments - * where possible. No code has been changed (I hope). Compiles for me but - * unable to test. - * - * Revision 1.5 2002/09/17 03:03:30 hippietrail - * - * After seeking permission on the developer list I've reformatted all the - * spelling source which seemed to have parts which used 2, 3, 4, and 8 - * spaces for tabs. It should all look good with our standard 4-space - * tabs now. - * I've concentrated just on indentation in the actual code. More prettying - * could be done. - * * NO code changes were made * - * - * Revision 1.4 2002/09/13 17:20:13 mpritchett - * Fix more warnings for Linux build - * - * Revision 1.3 2002/03/22 14:31:57 dom - * fix mg's compile problem - * - * Revision 1.2 2001/05/12 16:05:42 thomasf - * Big pseudo changes to ispell to make it pass around a structure rather - * than rely on all sorts of gloabals willy nilly here and there. Also - * fixed our spelling class to work with accepting suggestions once more. - * This code is dirty, gross and ugly (not to mention still not supporting - * multiple hash sized just yet) but it works on my machine and will no - * doubt break other machines. - * - * Revision 1.1 2001/04/15 16:01:24 tomas_f - * moving to spell/xp - * - * Revision 1.6 1999/12/21 18:46:29 sterwill - * ispell patch for non-English dictionaries by Henrik Berg <henrik@lansen.se> - * - * Revision 1.5 1999/10/20 03:19:35 paul - * Hacked ispell code to ignore any characters that don't fit in the lookup tables loaded from the dictionary. It ain't pretty, but at least we don't crash there any more. - * - * Revision 1.4 1999/04/13 17:12:51 jeff - * Applied "Darren O. Benham" <gecko@benham.net> spell check changes. - * Fixed crash on Win32 with the new code. - * - * Revision 1.3 1998/12/29 14:55:33 eric - * - * I've doctored the ispell code pretty extensively here. It is now - * warning-free on Win32. It also *works* on Win32 now, since I - * replaced all the I/O calls with ANSI standard ones. - * - * Revision 1.3 1998/12/29 14:55:33 eric - * - * I've doctored the ispell code pretty extensively here. It is now - * warning-free on Win32. It also *works* on Win32 now, since I - * replaced all the I/O calls with ANSI standard ones. - * - * Revision 1.2 1998/12/28 23:11:30 eric - * - * modified spell code and integration to build on Windows. - * This is still a hack. - * - * Actually, it doesn't yet WORK on Windows. It just builds. - * SpellCheckInit is failing for some reason. - * - * Revision 1.1 1998/12/28 18:04:43 davet - * Spell checker code stripped from ispell. At this point, there are - * two external routines... the Init routine, and a check-a-word routine - * which returns a boolean value, and takes a 16 bit char string. - * The code resembles the ispell code as much as possible still. - * - * Revision 1.45 1994/12/27 23:08:52 geoff - * Add code to makedent to reject words that contain non-word characters. - * This helps protect people who use ISO 8-bit characters when ispell - * isn't configured for that option. - * - * Revision 1.44 1994/10/25 05:46:20 geoff - * Fix some incorrect declarations in the lint versions of some routines. - * - * Revision 1.43 1994/09/16 03:32:34 geoff - * Issue an error message for bad affix flags - * - * Revision 1.42 1994/02/07 04:23:43 geoff - * Correctly identify the deformatter when changing file types - * - * Revision 1.41 1994/01/25 07:11:55 geoff - * Get rid of all old RCS log lines in preparation for the 3.1 release. - * - */ - -#include <stdlib.h> -#include <string.h> -#include <ctype.h> - -#include "ispell_checker.h" -#include "msgs.h" - -int makedent P ((char * lbuf, int lbuflen, struct dent * ent)); -/*int combinecaps P ((struct dent * hdr, struct dent * newent)); -#ifndef NO_CAPITALIZATION_SUPPORT -static void forcevheader P ((struct dent * hdrp, struct dent * oldp, - struct dent * newp)); -#endif / * NO_CAPITALIZATION_SUPPORT * / -static int combine_two_entries P ((struct dent * hdrp, - struct dent * oldp, struct dent * newp)); -static int acoversb P ((struct dent * enta, struct dent * entb)); -*/ -/*static int issubset P ((struct dent * ent1, struct dent * ent2)); -static void combineaffixes P ((struct dent * ent1, struct dent * ent2));*/ - -void toutent P ((FILE * outfile, struct dent * hent, - int onlykeep)); -/*static void toutword P ((FILE * outfile, char * word, - struct dent * cent)); -static void flagout P ((FILE * outfile, int flag)); -*/ -#ifndef ICHAR_IS_CHAR -ichar_t * icharcpy P ((ichar_t * out, ichar_t * in)); -int icharlen P ((ichar_t * str)); -int icharcmp P ((ichar_t * s1, ichar_t * s2)); -int icharncmp P ((ichar_t * s1, ichar_t * s2, int n)); -#endif /* ICHAR_IS_CHAR */ - -/*static int has_marker;*/ - -/* - * Fill in a directory entry, including setting the capitalization flags, and - * allocate and initialize memory for the d->word field. Returns -1 - * if there was trouble. The input word must be in canonical form. -int makedent (lbuf, lbuflen, d) -This function is not used by AbiWord. I don't know if it'll be needed for -other abi documents - */ - -#ifndef NO_CAPITALIZATION_SUPPORT -/*! -** Classify the capitalization of a sample entry. Returns one of the -** four capitalization codes ANYCASE, ALLCAPS, CAPITALIZED, or FOLLOWCASE. -** -** \param word -** -** \return -*/ -long -ISpellChecker::whatcap (ichar_t *word) -{ - register ichar_t * p; - - for (p = word; *p; p++) - { - if (mylower (*p)) - break; - } - if (*p == '\0') - return ALLCAPS; - else - { - for ( ; *p; p++) - { - if (myupper (*p)) - break; - } - if (*p == '\0') - { - /* - ** No uppercase letters follow the lowercase ones. - ** If there is more than one uppercase letter, it's - ** "followcase". If only the first one is capitalized, - ** it's "capitalize". If there are no capitals - ** at all, it's ANYCASE. - */ - if (myupper (word[0])) - { - for (p = word + 1; *p != '\0'; p++) - { - if (myupper (*p)) - return FOLLOWCASE; - } - return CAPITALIZED; - } - else - return ANYCASE; - } - else - return FOLLOWCASE; /* .../lower/upper */ - } -} - -/*! -** Add a variant-capitalization header to a word. This routine may be -** called even for a followcase word that doesn't yet have a header. -** -** \param dp Entry to update -** -** \return 0 if all was ok, -1 if allocation error. -*/ -int ISpellChecker::addvheader ( struct dent *dp) -{ - register struct dent * tdent; /* Copy of entry */ - - /* - ** Add a second entry with the correct capitalization, and then make - ** dp into a special dummy entry. - */ - tdent = static_cast<struct dent *>(malloc(sizeof (struct dent))); - if (tdent == NULL) - { - fprintf (stderr, MAKEDENT_C_NO_WORD_SPACE, dp->word); - return -1; - } - *tdent = *dp; - if (captype (tdent->flagfield) != FOLLOWCASE) - tdent->word = NULL; - else - { - /* Followcase words need a copy of the capitalization */ - tdent->word = static_cast<char *>(malloc (static_cast<unsigned int>(strlen(tdent->word)) + 1)); - if (tdent->word == NULL) - { - fprintf (stderr, MAKEDENT_C_NO_WORD_SPACE, dp->word); - free (reinterpret_cast<char *>(tdent)); - return -1; - } - strcpy (tdent->word, dp->word); - } - chupcase (dp->word); - dp->next = tdent; - dp->flagfield &= ~CAPTYPEMASK; - dp->flagfield |= (ALLCAPS | MOREVARIANTS); - return 0; -} -#endif /* NO_CAPITALIZATION_SUPPORT */ - -/* -** Combine and resolve the entries describing two capitalizations of the same -** word. This may require allocating yet more entries. -** -** Hdrp is a pointer into a hash table. If the word covered by hdrp has -** variations, hdrp must point to the header. Newp is a pointer to temporary -** storage, and space is malloc'ed if newp is to be kept. The newp->word -** field must have been allocated with mymalloc, so that this routine may free -** the space if it keeps newp but not the word. -** -** Return value: 0 if the word was added, 1 if the word was combined -** with an existing entry, and -1 if trouble occurred (e.g., malloc). -** If 1 is returned, newp->word may have been be freed using myfree. -** -** Life is made much more difficult by the KEEP flag's possibilities. We -** must ensure that a !KEEP word doesn't find its way into the personal -** dictionary as a result of this routine's actions. However, a !KEEP -** word that has affixes must have come from the main dictionary, so it -** is acceptable to combine entries in that case (got that?). -** -** The net result of all this is a set of rules that is a bloody pain -** to figure out. Basically, we want to choose one of the following actions: -** -** (1) Add newp's affixes and KEEP flag to oldp, and discard newp. -** (2) Add oldp's affixes and KEEP flag to newp, replace oldp with -** newp, and discard newp. -#ifndef NO_CAPITALIZATION_SUPPORT -** (3) Insert newp as a new entry in the variants list. If there is -** currently no variant header, this requires adding one. Adding a -** header splits into two sub-cases: -** -** (3a) If oldp is ALLCAPS and the KEEP flags match, just turn it -** into the header. -** (3b) Otherwise, add a new entry to serve as the header. -** To ease list linking, this is done by copying oldp into -** the new entry, and then performing (3a). -** -** After newp has been added as a variant, its affixes and KEEP -** flag are OR-ed into the variant header. -#endif -** -** So how to choose which? The default is always case (3), which adds newp -** as a new entry in the variants list. Cases (1) and (2) are symmetrical -** except for which entry is discarded. We can use case (1) or (2) whenever -** one entry "covers" the other. "Covering" is defined as follows: -** -** (4) For entries with matching capitalization types, A covers B -** if: -** -** (4a) B's affix flags are a subset of A's, or the KEEP flags -** match, and -** (4b) either the KEEP flags match, or A's KEEP flag is set. -** (Since A has more suffixes, combining B with it won't -** cause any extra suffixes to be added to the dictionary.) -** (4c) If the words are FOLLOWCASE, the capitalizations match -** exactly. -** -#ifndef NO_CAPITALIZATION_SUPPORT -** (5) For entries with mismatched capitalization types, A covers B -** if (4a) and (4b) are true, and: -** -** (5a) B is ALLCAPS, or -** (5b) A is ANYCASE, and B is CAPITALIZED. -#endif -** -** For any "hdrp" without variants, oldp is the same as hdrp. Otherwise, -** the above tests are applied using each variant in turn for oldp. -int combinecaps (hdrp, newp) -static void forcevheader (hdrp, oldp, newp) -static int combine_two_entries (hdrp, oldp, newp) -static int acoversb (enta, entb) -*/ - -/* - * \param s - */ -void -ISpellChecker::upcase (ichar_t *s) -{ - - while (*s) - { - *s = mytoupper (*s); - s++; - } -} - -/* - * \param s - */ -void -ISpellChecker::lowcase (ichar_t *s) -{ - - while (*s) - { - *s = mytolower (*s); - s++; - } -} - -/*! - * Upcase variant that works on normal strings. Note that it is a lot - * slower than the normal upcase. The input must be in canonical form. - * - * \param s - */ -void -ISpellChecker::chupcase (char *s) -{ - ichar_t * is; - - is = strtosichar (s, 1); - upcase (is); - ichartostr (s, is, strlen (s) + 1, 1); -} - -/* -** See if one affix field is a subset of another. Returns NZ if ent1 -** is a subset of ent2. The KEEP flag is not taken into consideration. -static int issubset (ent1, ent2) -static void combineaffixes (ent1, ent2) -*/ - -/* -** Write out a dictionary entry, including capitalization variants. -** If onlykeep is true, only those variants with KEEP set will be -** written. -Removed -- not used by Abiword -void toutent_ (toutfile, hent, onlykeep) -static void toutword (toutfile, word, cent) -static void flagout (toutfile, flag) -*/ - -/*! - * If the string under the given pointer begins with a string character, - * return the length of that "character". If not, return 0. - * May be called any time, but it's best if "isstrstart" is first - * used to filter out unnecessary calls. - * - * As a side effect, "laststringch" is set to the number of the string - * found, or to -1 if none was found. This can be useful for such things - * as case conversion. - * - * \param bufp - * \param canonical NZ if input is in canonical form - * - * \return - */ -int -ISpellChecker::stringcharlen (char *bufp, int canonical) -{ -#ifdef SLOWMULTIPLY - static char * sp[MAXSTRINGCHARS]; - static int inited = 0; -#endif /* SLOWMULTIPLY */ - register char * bufcur; - register char * stringcur; - register int stringno; - register int lowstringno; - register int highstringno; - int dupwanted; - -#ifdef SLOWMULTIPLY - if (!inited) - { - inited = 1; - for (stringno = 0; stringno < MAXSTRINGCHARS; stringno++) - sp[stringno] = &hashheader.stringchars[stringno][0]; - } -#endif /* SLOWMULTIPLY */ - lowstringno = 0; - highstringno = m_hashheader.nstrchars - 1; - dupwanted = canonical ? 0 : m_defdupchar; - while (lowstringno <= highstringno) - { - stringno = (lowstringno + highstringno) >> 1; -#ifdef SLOWMULTIPLY - stringcur = sp[stringno]; -#else /* SLOWMULTIPLY */ - stringcur = &m_hashheader.stringchars[stringno][0]; -#endif /* SLOWMULTIPLY */ - bufcur = bufp; - while (*stringcur) - { -#ifdef NO8BIT - if (((*bufcur++ ^ *stringcur) & 0x7F) != 0) -#else /* NO8BIT */ - if (*bufcur++ != *stringcur) -#endif /* NO8BIT */ - break; - /* - ** We can't use autoincrement above because of the - ** test below. - */ - stringcur++; - } - if (*stringcur == '\0') - { - if (m_hashheader.dupnos[stringno] == dupwanted) - { - /* We have a match */ - m_laststringch = m_hashheader.stringdups[stringno]; -#ifdef SLOWMULTIPLY - return stringcur - sp[stringno]; -#else /* SLOWMULTIPLY */ - return stringcur - &m_hashheader.stringchars[stringno][0]; -#endif /* SLOWMULTIPLY */ - } - else - --stringcur; - } - /* No match - choose which side to search on */ -#ifdef NO8BIT - if ((*--bufcur & 0x7F) < (*stringcur & 0x7F)) - highstringno = stringno - 1; - else if ((*bufcur & 0x7F) > (*stringcur & 0x7F)) - lowstringno = stringno + 1; -#else /* NO8BIT */ - if (*--bufcur < *stringcur) - highstringno = stringno - 1; - else if (*bufcur > *stringcur) - lowstringno = stringno + 1; -#endif /* NO8BIT */ - else if (dupwanted < m_hashheader.dupnos[stringno]) - highstringno = stringno - 1; - else - lowstringno = stringno + 1; - } - m_laststringch = static_cast<unsigned int>(-1); - return 0; /* Not a string character */ -} - -/* MACROS CONVERTED TO FUNCTIONS -** These macros are similar to the ones above, but they take into account -** the possibility of string characters. Note well that they take a POINTER, -** not a character. -** -** The "l_" versions set "len" to the length of the string character as a -** handy side effect. (Note that the global "laststringch" is also set, -** and sometimes used, by these macros.) -** -** The "l1_" versions go one step further and guarantee that the "len" -** field is valid for *all* characters, being set to 1 even if the macro -** returns false. This macro is a great example of how NOT to write -** readable C. -*/ -#define isstringch(ptr, canon) (isstringstart (*(ptr)) \ - && stringcharlen ((ptr), (canon)) > 0) -/* -int isstringch(char *ptr, int canon) { - return (isstringstart (*(ptr)) && (len = stringcharlen ((ptr), (canon))) > 0); -} -*/ - -#define l_isstringch(ptr, len, canon) \ - (isstringstart (*(ptr)) \ - && (len = stringcharlen ((ptr), (canon))) \ - > 0) -/* -int l_isstringch(char *ptr, int len, int canon) { - return (isstringstart (*(ptr)) && (len = stringcharlen ((ptr), (canon))) > 0); -} -*/ - -#define l1_isstringch(ptr, len, canon) \ - (len = 1, \ - isstringstart ((unsigned char)(*(ptr))) \ - && ((len = \ - stringcharlen ((ptr), (canon))) \ - > 0 \ - ? 1 : (len = 1, 0))) -/* -int l1_isstringch(char *ptr, int len, int canon) { - return (len = 1, isstringstart ((unsigned char)(*(ptr))) && - ((len = stringcharlen ((ptr), (canon))) > 0 ? 1 : (len = 1, 0))); -} -*/ - -/*** END MACRO CONVERSION ***/ - -/*! - * Convert an external string to an ichar_t string. If necessary, the parity - * bit is stripped off as part of the process. - * - * \param out Where to put result - * \param in String to convert - * \param outlen Size of output buffer, *BYTES* - * \param canonical NZ if input is in canonical form - * - * \return NZ if the output string overflowed. - */ -int -ISpellChecker::strtoichar (ichar_t *out, char *in, int outlen, int canonical) -{ - register int len = 1; /* Length of next character */ - - outlen /= sizeof (ichar_t); /* Convert to an ichar_t count */ - for ( ; --outlen > 0 && *in != '\0'; in += len) - { - if (l1_isstringch (in, len , canonical)) { - *out++ = SET_SIZE + m_laststringch; - } else { - *out++ = (unsigned char)( *in ); - } - } - *out = 0; - return outlen <= 0; -} - -/*! - * Convert an ichar_t string to an external string. - * - * WARNING: the resulting string may wind up being longer than the - * original. In fact, even the sequence strtoichar->ichartostr may - * produce a result longer than the original, because the output form - * may use a different string type set than the original input form. - * - * \param out Where to put result - * \param in String to convert - * \param outlen Size of output buffer, bytes - * \param canonical NZ for canonical form - * - * \return NZ if the output string overflowed. - */ -int -ISpellChecker::ichartostr ( char *out, ichar_t *in, int outlen, int canonical) -{ - register int ch; /* Next character to store */ - register int i; /* Index into duplicates list */ - register char * scharp; /* Pointer into a string char */ - - while (--outlen > 0 && (ch = *in++) != 0) - { - if (ch < SET_SIZE) - *out++ = static_cast<char>(ch); - else - { - ch -= SET_SIZE; - if (!canonical) - { - for (i = m_hashheader.nstrchars; --i >= 0; ) - { - if (m_hashheader.dupnos[i] == m_defdupchar - && (static_cast<int>(m_hashheader.stringdups[i])) == ch) - { - ch = i; - break; - } - } - } - scharp = m_hashheader.stringchars[static_cast<unsigned>(ch)]; - while ((*out++ = *scharp++) != '\0') - ; - out--; - } - } - *out = '\0'; - return outlen <= 0; -} - -/*! - * Convert a string to an ichar_t, storing the result in a static area. - * - * \param in String to convert - * \param canonical NZ if input is in canonical form - * - * \return - */ -ichar_t * -ISpellChecker::strtosichar ( char *in, int canonical) -{ - static ichar_t out[STRTOSICHAR_SIZE / sizeof (ichar_t)]; - - if (strtoichar (out, in, sizeof out, canonical)) - fprintf (stderr, WORD_TOO_LONG (in)); - return out; -} - -/*! - * Convert an ichar_t to a string, storing the result in a static area. - * - * \param in Internal string to convert - * \param canonical NZ for canonical conversion - * - * \return - */ -char * -ISpellChecker::ichartosstr (ichar_t *in, int canonical) -{ - static char out[ICHARTOSSTR_SIZE]; - - if (ichartostr (out, in, sizeof out, canonical)) - fprintf (stderr, WORD_TOO_LONG (out)); - return out; -} - -/*! - * Convert a single ichar to a printable string, storing the result in - * a static area. - * - * \param in - * - * \return - */ -char * -ISpellChecker::printichar (int in) -{ - static char out[MAXSTRINGCHARLEN + 1]; - - if (in < SET_SIZE) - { - out[0] = static_cast<char>(in); - out[1] = '\0'; - } - else - strcpy (out, m_hashheader.stringchars[static_cast<unsigned>(in) - SET_SIZE]); - return out; -} - -#ifndef ICHAR_IS_CHAR -/*! - * Copy an ichar_t. - * - * \param out Destination - * \param in Source - * - * \return - */ -ichar_t * -icharcpy (ichar_t *out, ichar_t *in) -{ - ichar_t * origout; /* Copy of destination for return */ - - origout = out; - while ((*out++ = *in++) != 0) - ; - return origout; -} - -/*! - * Return the length of an ichar_t. - * - * \param in String to count - * - * \return - */ -int -icharlen (ichar_t * in) -{ - register int len; /* Length so far */ - - for (len = 0; *in++ != 0; len++) - ; - return len; -} - -/*! - * Compare two ichar_t's. - * - * \param s1 - * \param s2 - * - * \return - */ -int -icharcmp (ichar_t * s1, ichar_t * s2) -{ - - while (*s1 != 0) - { - if (*s1++ != *s2++) - return *--s1 - *--s2; - } - return *s1 - *s2; -} - -/*! - * Strncmp for two ichar_t's. - * - * \param s1 - * \param s2 - * \param n - * - * \return - */ -int -icharncmp (ichar_t *s1, ichar_t *s2, int n) -{ - - while (--n >= 0 && *s1 != 0) - { - if (*s1++ != *s2++) - return *--s1 - *--s2; - } - if (n < 0) - return 0; - else - return *s1 - *s2; -} - -#endif /* ICHAR_IS_CHAR */ - -/* - * \param istate - * \param name - * \param searchnames - * \param deformatter - * - * \return - */ -int -ISpellChecker::findfiletype (const char *name, int searchnames, int *deformatter) -{ - char * cp; /* Pointer into suffix list */ - int cplen; /* Length of current suffix */ - register int i; /* Index into type table */ - int len; /* Length of the name */ - - /* - * Note: for now, the deformatter is set to 1 for tex, 0 for nroff. - * Further, we assume that it's one or the other, so that a test - * for tex is sufficient. This needs to be generalized. - */ - len = strlen (name); - if (searchnames) - { - for (i = 0; i < m_hashheader.nstrchartype; i++) - { - if (strcmp (name, m_chartypes[i].name) == 0) - { - if (deformatter != NULL) - *deformatter = - (strcmp (m_chartypes[i].deformatter, "tex") == 0); - return i; - } - } - } - for (i = 0; i < m_hashheader.nstrchartype; i++) - { - for (cp = m_chartypes[i].suffixes; *cp != '\0'; cp += cplen + 1) - { - cplen = strlen (cp); - if (len >= cplen && strcmp (&name[len - cplen], cp) == 0) - { - if (deformatter != NULL) - *deformatter = - (strcmp (m_chartypes[i].deformatter, "tex") == 0); - return i; - } - } - } - return -1; -} - -/* - HACK: macros replaced with function implementations - so we could do a side-effect-free check for unicode - characters which aren't in hashheader - - TODO: this is just a workaround to keep us from crashing. - more sophisticated logic needed here. -*/ -char ISpellChecker::myupper(ichar_t c) -{ - if (c < (SET_SIZE + MAXSTRINGCHARS)) - return m_hashheader.upperchars[c]; - else - return 0; -} - -char ISpellChecker::mylower(ichar_t c) -{ - if (c < (SET_SIZE + MAXSTRINGCHARS)) - return m_hashheader.lowerchars[c]; - else - return 0; -} - -int myspace(ichar_t c) -{ - return ((c > 0) && (c < 0x80) && isspace(static_cast<unsigned char>(c))); -} - -char ISpellChecker::iswordch(ichar_t c) -{ - if (c < (SET_SIZE + MAXSTRINGCHARS)) - return m_hashheader.wordchars[c]; - else - return 0; -} - -char ISpellChecker::isboundarych(ichar_t c) -{ - if (c < (SET_SIZE + MAXSTRINGCHARS)) - return m_hashheader.boundarychars[c]; - else - return 0; -} - -char ISpellChecker::isstringstart(ichar_t c) -{ - if (c < (SET_SIZE)) - return m_hashheader.stringstarts[static_cast<unsigned char>(c)]; - else - return 0; -} - -ichar_t ISpellChecker::mytolower(ichar_t c) -{ - if (c < (SET_SIZE + MAXSTRINGCHARS)) - return m_hashheader.lowerconv[c]; - else - return c; -} - -ichar_t ISpellChecker::mytoupper (ichar_t c) -{ - if (c < (SET_SIZE + MAXSTRINGCHARS)) - return m_hashheader.upperconv[c]; - else - return c; -} - |