diff options
Diffstat (limited to 'tqtinterface/qt4/src/codecs/tqtextcodec.cpp')
-rw-r--r-- | tqtinterface/qt4/src/codecs/tqtextcodec.cpp | 3122 |
1 files changed, 0 insertions, 3122 deletions
diff --git a/tqtinterface/qt4/src/codecs/tqtextcodec.cpp b/tqtinterface/qt4/src/codecs/tqtextcodec.cpp deleted file mode 100644 index 8784e4d..0000000 --- a/tqtinterface/qt4/src/codecs/tqtextcodec.cpp +++ /dev/null @@ -1,3122 +0,0 @@ -/**************************************************************************** -** -** Implementation of TQTextCodec class -** -** Created : 981015 -** -** Copyright (C) 1998-2008 Trolltech ASA. All rights reserved. -** -** This file is part of the tools module of the TQt GUI Toolkit. -** -** This file may be used under the terms of the GNU General -** Public License versions 2.0 or 3.0 as published by the Free -** Software Foundation and appearing in the files LICENSE.GPL2 -** and LICENSE.GPL3 included in the packaging of this file. -** Alternatively you may (at your option) use any later version -** of the GNU General Public License if such license has been -** publicly approved by Trolltech ASA (or its successors, if any) -** and the KDE Free TQt Foundation. -** -** Please review the following information to ensure GNU General -** Public Licensing requirements will be met: -** http://trolltech.com/products/qt/licenses/licensing/opensource/. -** If you are unsure which license is appropriate for your use, please -** review the following information: -** http://trolltech.com/products/qt/licenses/licensing/licensingoverview -** or contact the sales department at sales@trolltech.com. -** -** This file may be used under the terms of the Q Public License as -** defined by Trolltech ASA and appearing in the file LICENSE.TQPL -** included in the packaging of this file. Licensees holding valid TQt -** Commercial licenses may use this file in accordance with the TQt -** Commercial License Agreement provided with the Software. -** -** This file is provided "AS IS" with NO WARRANTY OF ANY KIND, -** INCLUDING THE WARRANTIES OF DESIGN, MERCHANTABILITY AND FITNESS FOR -** A PARTICULAR PURPOSE. Trolltech reserves all rights not granted -** herein. -** -**********************************************************************/ - -#include "tqplatformdefs.h" - -// UNIX Large File Support redefines open -> open64 -#if defined(open) -# undef open -#endif - -#include "tqtextcodec.h" -#ifndef TQT_NO_TEXTCODEC - -#include "tqvaluelist.h" -#include "tqtextcodecfactory.h" -#include "tqutfcodec.h" -#include "tqnamespace.h" -#ifndef TQT_NO_CODECS -#include "tqrtlcodec.h" -#include "tqtsciicodec.h" -#include "tqisciicodec_p.h" -#endif // TQT_NO_CODECS -#ifndef TQT_NO_BIG_CODECS -#include "tqbig5codec.h" -#include "tqeucjpcodec.h" -#include "tqeuckrcodec.h" -#include "tqgb18030codec.h" -#include "tqjiscodec.h" -#include "tqjpunicode.h" -#include "tqsjiscodec.h" -#endif // TQT_NO_BIG_CODECS -#include "tqfile.h" -#include "tqstrlist.h" -#include "tqstring.h" -#include "../tools/tqlocale_p.h" - -#if !defined(TQT_NO_CODECS) && !defined(TQT_NO_BIG_CODECS) && defined(TQ_WS_X11) -# include "tqfontcodecs_p.h" -#endif - -#ifdef TQT_THREAD_SUPPORT -# include <private/tqmutexpool_p.h> -#endif // TQT_THREAD_SUPPORT - -#include <stdlib.h> -#include <ctype.h> -#ifndef TQ_OS_TEMP -#include <locale.h> -#endif -#if defined(_XOPEN_UNIX) && !defined(TQ_OS_TQNX6) -#include <langinfo.h> -#endif - -static TQValueList<TQTextCodec*> *all = 0; -static bool destroying_is_ok; // starts out as 0 -static TQTextCodec * localeMapper = 0; - -class TQTextCodecCleanup { -public: - ~TQTextCodecCleanup() { - TQTextCodec::deleteAllCodecs(); - } -}; -static TQTextCodecCleanup qtextcodec_cleanup; - -/*! - Deletes all the created codecs. - - \warning Do not call this function. - - TQApplication calls this function just before exiting to delete - any TQTextCodec objects that may be lying around. Since various - other classes hold pointers to TQTextCodec objects, it is not safe - to call this function earlier. - - If you are using the utility classes (like TQString) but not using - TQApplication, calling this function at the very end of your - application may be helpful for chasing down memory leaks by - eliminating any TQTextCodec objects. -*/ - -void TQTextCodec::deleteAllCodecs() -{ - if ( !all ) - return; - -#ifdef TQT_THREAD_SUPPORT - TQMutexLocker locker( tqt_global_mutexpool ? - tqt_global_mutexpool->get( &all ) : 0 ); - if ( !all ) - return; -#endif // TQT_THREAD_SUPPORT - - destroying_is_ok = TRUE; - - TQValueList<TQTextCodec*> *ball = all; - all = 0; - TQValueList<TQTextCodec*>::Iterator it; - for ( it = ball->begin(); it != ball->end(); ++it ) { - delete *it; - *it = 0; - } - ball->clear(); - delete ball; - - destroying_is_ok = FALSE; -} - - -static void realSetup(); - - -static inline void setup() -{ - if ( all ) return; - -#ifdef TQT_THREAD_SUPPORT - TQMutexLocker locker( tqt_global_mutexpool ? - tqt_global_mutexpool->get( &all ) : 0 ); - if ( all ) return; -#endif // TQT_THREAD_SUPPORT - - realSetup(); -} - - -class TQTextStatelessEncoder: public TQTextEncoder { - const TQTextCodec* codec; -public: - TQTextStatelessEncoder(const TQTextCodec*); - TQCString fromUnicode(const TQString& uc, int& lenInOut); -}; - - -class TQTextStatelessDecoder : public TQTextDecoder { - const TQTextCodec* codec; -public: - TQTextStatelessDecoder(const TQTextCodec*); - TQString toUnicode(const char* chars, int len); -}; - -TQTextStatelessEncoder::TQTextStatelessEncoder(const TQTextCodec* c) : - codec(c) -{ -} - - -TQCString TQTextStatelessEncoder::fromUnicode(const TQString& uc, int& lenInOut) -{ - return codec->fromUnicode(uc,lenInOut); -} - - -TQTextStatelessDecoder::TQTextStatelessDecoder(const TQTextCodec* c) : - codec(c) -{ -} - - -TQString TQTextStatelessDecoder::toUnicode(const char* chars, int len) -{ - return codec->toUnicode(chars,len); -} - - - -/*! - \class TQTextCodec tqtextcodec.h - \brief The TQTextCodec class provides conversion between text encodings. - \reentrant - \ingroup i18n - - TQt uses Unicode to store, draw and manipulate strings. In many - situations you may wish to deal with data that uses a different - encoding. For example, most Japanese documents are still stored in - Shift-JIS or ISO2022, while Russian users often have their - documents in KOI8-R or CP1251. - - TQt provides a set of TQTextCodec classes to help with converting - non-Unicode formats to and from Unicode. You can also create your - own codec classes (\link #subclassing see later\endlink). - - The supported encodings are: - \list - \i Latin1 - \i Big5 -- Chinese - \i Big5-HKSCS -- Chinese - \i eucJP -- Japanese - \i eucKR -- Korean - \i GB2312 -- Chinese - \i GBK -- Chinese - \i GB18030 -- Chinese - \i JIS7 -- Japanese - \i Shift-JIS -- Japanese - \i TSCII -- Tamil - \i utf8 -- Unicode, 8-bit - \i utf16 -- Unicode - \i KOI8-R -- Russian - \i KOI8-U -- Ukrainian - \i ISO8859-1 -- Western - \i ISO8859-2 -- Central European - \i ISO8859-3 -- Central European - \i ISO8859-4 -- Baltic - \i ISO8859-5 -- Cyrillic - \i ISO8859-6 -- Arabic - \i ISO8859-7 -- Greek - \i ISO8859-8 -- Hebrew, visually ordered - \i ISO8859-8-i -- Hebrew, logically ordered - \i ISO8859-9 -- Turkish - \i ISO8859-10 - \i ISO8859-13 - \i ISO8859-14 - \i ISO8859-15 -- Western - \i IBM 850 - \i IBM 866 - \i CP874 - \i CP1250 -- Central European - \i CP1251 -- Cyrillic - \i CP1252 -- Western - \i CP1253 -- Greek - \i CP1254 -- Turkish - \i CP1255 -- Hebrew - \i CP1256 -- Arabic - \i CP1257 -- Baltic - \i CP1258 - \i Apple Roman - \i TIS-620 -- Thai - \endlist - - TQTextCodecs can be used as follows to convert some locally encoded - string to Unicode. Suppose you have some string encoded in Russian - KOI8-R encoding, and want to convert it to Unicode. The simple way - to do this is: - - \code - TQCString locallyEncoded = "..."; // text to convert - TQTextCodec *codec = TQTextCodec::codecForName("KOI8-R"); // get the codec for KOI8-R - TQString tqunicodeString = codec->toUnicode( locallyEncoded ); - \endcode - - After this, \c{tqunicodeString} holds the text converted to Unicode. - Converting a string from Unicode to the local encoding is just as - easy: - - \code - TQString tqunicodeString = "..."; // any Unicode text - TQTextCodec *codec = TQTextCodec::codecForName("KOI8-R"); // get the codec for KOI8-R - TQCString locallyEncoded = codec->fromUnicode( tqunicodeString ); - \endcode - - Some care must be taken when trying to convert the data in chunks, - for example, when receiving it over a network. In such cases it is - possible that a multi-byte character will be split over two - chunks. At best this might result in the loss of a character and - at worst cause the entire conversion to fail. - - The approach to use in these situations is to create a TQTextDecoder - object for the codec and use this TQTextDecoder for the whole - decoding process, as shown below: - - \code - TQTextCodec *codec = TQTextCodec::codecForName( "Shift-JIS" ); - TQTextDecoder *decoder = codec->makeDecoder(); - - TQString tqunicodeString; - while( receiving_data ) { - TQByteArray chunk = new_data; - tqunicodeString += decoder->toUnicode( chunk.data(), chunk.length() ); - } - \endcode - - The TQTextDecoder object maintains state between chunks and therefore - works correctly even if a multi-byte character is split between - chunks. - - \target subclassing - \section1 Creating your own Codec class - - Support for new text encodings can be added to TQt by creating - TQTextCodec subclasses. - - Built-in codecs can be overridden by custom codecs since more - recently created TQTextCodec objects take precedence over earlier - ones. - - You may find it more convenient to make your codec class available - as a plugin; see the \link plugins-howto.html plugin - documentation\endlink for more details. - - The abstract virtual functions describe the encoder to the - system and the coder is used as required in the different - text file formats supported by TQTextStream, and under X11, for the - locale-specific character input and output. - - To add support for another 8-bit encoding to TQt, make a subclass - of TQTextCodec and implement at least the following methods: - - \code - const char* name() const - \endcode - Return the official name for the encoding. - - \code - int mibEnum() const - \endcode - Return the MIB enum for the encoding if it is listed in the - \link http://www.iana.org/assignments/character-sets - IANA character-sets encoding file\endlink. - - If the encoding is multi-byte then it will have "state"; that is, - the interpretation of some bytes will be dependent on some preceding - bytes. For such encodings, you must implement: - - \code - TQTextDecoder* makeDecoder() const - \endcode - Return a TQTextDecoder that remembers incomplete multi-byte sequence - prefixes or other required state. - - If the encoding does \e not require state, you should implement: - - \code - TQString toUnicode(const char* chars, int len) const - \endcode - Converts \e len characters from \e chars to Unicode. - - The base TQTextCodec class has default implementations of the above - two functions, \e{but they are mutually recursive}, so you must - re-implement at least one of them, or both for improved efficiency. - - For conversion from Unicode to 8-bit encodings, it is rarely necessary - to maintain state. However, two functions similar to the two above - are used for encoding: - - \code - TQTextEncoder* makeEncoder() const - \endcode - Return a TQTextEncoder. - - \code - TQCString fromUnicode(const TQString& uc, int& lenInOut ) const - \endcode - Converts \e lenInOut characters (of type TQChar) from the start of - the string \e uc, returning a TQCString result, and also returning - the \link TQCString::length() length\endlink of the result in - \e lenInOut. - - Again, these are mutually recursive so only one needs to be implemented, - or both if greater efficiency is possible. - - Finally, you must implement: - - \code - int heuristicContentMatch(const char* chars, int len) const - \endcode - Gives a value indicating how likely it is that \e len characters - from \e chars are in the encoding. - - A good model for this function is the - TQWindowsLocalCodec::heuristicContentMatch function found in the TQt - sources. - - A TQTextCodec subclass might have improved performance if you also - re-implement: - - \code - bool canEncode( TQChar ) const - \endcode - Test if a Unicode character can be encoded. - - \code - bool canEncode( const TQString& ) const - \endcode - Test if a string of Unicode characters can be encoded. - - \code - int heuristicNameMatch(const char* hint) const - \endcode - Test if a possibly non-standard name is referring to the codec. - - Codecs can also be created as \link plugins-howto.html plugins\endlink. -*/ - - -/*! - \nonreentrant - - Constructs a TQTextCodec, and gives it the highest precedence. The - TQTextCodec should always be constructed on the heap (i.e. with \c - new). TQt takes ownership and will delete it when the application - terminates. -*/ -TQTextCodec::TQTextCodec() -{ - setup(); - all->insert( all->begin(), this ); -} - - -/*! - \nonreentrant - - Destroys the TQTextCodec. Note that you should not delete codecs - yourself: once created they become TQt's responsibility. -*/ -TQTextCodec::~TQTextCodec() -{ - if ( !destroying_is_ok ) - qWarning("TQTextCodec::~TQTextCodec() called by application"); - if ( all ) - all->remove( this ); -} - - -/*! - Returns a value indicating how likely it is that this decoder is - appropriate for decoding some format that has the given name. The - name is compared with the \a hint. - - A good match returns a positive number around the length of the - string. A bad match is negative. - - The default implementation calls simpleHeuristicNameMatch() with - the name of the codec. -*/ -int TQTextCodec::heuristicNameMatch(const char* hint) const -{ - return simpleHeuristicNameMatch(name(),hint); -} - - -// returns a string containing the letters and numbers from input, -// with a space separating run of a character class. e.g. "iso8859-1" -// becomes "iso 8859 1" -static TQString lettersAndNumbers( const char * input ) -{ - TQString result; - TQChar c; - - while( input && *input ) { - c = *input; - if ( c.isLetter() || c.isNumber() ) - result += c.lower(); - if ( input[1] ) { - // add space at character class transition, except - // transition from upper-case to lower-case letter - TQChar n( input[1] ); - if ( c.isLetter() && n.isLetter() ) { - if ( c == c.lower() && n == n.upper() ) - result += ' '; - } else if ( c.category() != n.category() ) { - result += ' '; - } - } - input++; - } - return result.simplifyWhiteSpace(); -} - -/*! - A simple utility function for heuristicNameMatch(): it does some - very minor character-skipping so that almost-exact matches score - high. \a name is the text we're matching and \a hint is used for - the comparison. -*/ -int TQTextCodec::simpleHeuristicNameMatch(const char* name, const char* hint) -{ - // if they're the same, return a perfect score. - if ( name && hint && *name && *hint && qstricmp( name, hint ) == 0 ) - return tqstrlen( hint ); - - // if the letters and numbers are the same, we have an "almost" - // perfect match. - TQString h( lettersAndNumbers( hint ) ); - TQString n( lettersAndNumbers( name ) ); - if ( h == n ) - return tqstrlen( hint )-1; - - if ( h.stripWhiteSpace() == n.stripWhiteSpace() ) - return tqstrlen( hint )-2; - - // could do some more here, but I don't think it's worth it - - return 0; -} - - -/*! - Returns the TQTextCodec \a i positions from the most recently - inserted codec, or 0 if there is no such TQTextCodec. Thus, - codecForIndex(0) returns the most recently created TQTextCodec. -*/ -TQTextCodec* TQTextCodec::codecForIndex(int i) -{ - setup(); - return (uint)i >= all->count() ? 0 : *all->at(i); -} - - -/*! - Returns the TQTextCodec which matches the \link - TQTextCodec::mibEnum() MIBenum\endlink \a mib. -*/ -TQTextCodec* TQTextCodec::codecForMib(int mib) -{ - setup(); - TQValueList<TQTextCodec*>::ConstIterator i; - TQTextCodec* result=0; - for ( i = all->begin(); i != all->end(); ++i ) { - result = *i; - if ( result->mibEnum()==mib ) - return result; - } - -#if !defined(TQT_NO_COMPONENT) && !defined(TQT_LITE_COMPONENT) - if ( !result || (result && result->mibEnum() != mib) ) { - TQTextCodec *codec = TQTextCodecFactory::createForMib(mib); - if (codec) - result = codec; - } -#endif // !TQT_NO_COMPONENT !TQT_LITE_COMPONENT - - return result; -} - - - - - -#ifdef TQ_OS_WIN32 -class TQWindowsLocalCodec: public TQTextCodec -{ -public: - TQWindowsLocalCodec(); - ~TQWindowsLocalCodec(); - - TQString toUnicode(const char* chars, int len) const; - TQCString fromUnicode(const TQString& uc, int& lenInOut ) const; - - const char* name() const; - int mibEnum() const; - - int heuristicContentMatch(const char* chars, int len) const; - - TQTextDecoder* makeDecoder() const; -}; - -TQWindowsLocalCodec::TQWindowsLocalCodec() -{ -} - -TQWindowsLocalCodec::~TQWindowsLocalCodec() -{ -} - - -TQString TQWindowsLocalCodec::toUnicode(const char* chars, int len) const -{ - if ( len == 1 && chars ) { // Optimization; avoids allocation - char c[2]; - c[0] = *chars; - c[1] = 0; - return qt_winMB2TQString( c, 2 ); - } - if ( len < 0 ) - return qt_winMB2TQString( chars ); - TQCString s(chars,len+1); - return qt_winMB2TQString(s); -} - -TQCString TQWindowsLocalCodec::fromUnicode(const TQString& uc, int& lenInOut ) const -{ - TQCString r = qt_winTQString2MB( uc, lenInOut ); - lenInOut = r.length(); - return r; -} - - -const char* TQWindowsLocalCodec::name() const -{ - return "System"; -} - -int TQWindowsLocalCodec::mibEnum() const -{ - return 0; -} - - -int TQWindowsLocalCodec::heuristicContentMatch(const char* chars, int len) const -{ - // ### Not a bad default implementation? - TQString t = toUnicode(chars,len); - int l = t.length(); - TQCString mb = fromUnicode(t,l); - int i=0; - while ( i < len ) { - if ( chars[i] == mb[i] ) - i++; - else - break; - } - return i; -} - -class TQWindowsLocalDecoder: public TQTextDecoder -{ - const TQWindowsLocalCodec* codec; - int nbuf; - uchar buf[4]; // hopefully this will be enough -public: - TQWindowsLocalDecoder(const TQWindowsLocalCodec *c) : codec(c), nbuf(0) - { - } - - TQString toUnicode(const char* chars, int len) - { - if (len != 1 && nbuf == 0) - return codec->toUnicode(chars, len); - if (len == 1) { - char c[sizeof buf + 2]; - memcpy(c, buf, nbuf); - c[nbuf] = *chars; - c[nbuf+1] = 0; - - // try to decode this: - TQString retval = codec->toUnicode(c, -1); - if ( retval.isEmpty() ) { - // it didn't return anything; we probably stopped mid-way in a multi-byte - // character - buf[nbuf++] = *chars; - if (nbuf + 1 == sizeof buf) { - qWarning("TQWindowsLocalDecoder: exceeded max internal buffer size"); - nbuf = 0; - } - } - else - nbuf = 0; // decoded successfully - - return retval; - } - - if (len == -1) - len = (int)strlen(chars); - - // Ugh! We need to allocate memory - char *s = new char[nbuf + len + 1]; - memcpy(s, buf, nbuf); - memcpy(s + nbuf, chars, len); - s[nbuf + len] = 0; - - TQString retval = codec->toUnicode(s, -1); - nbuf = 0; - delete[] s; - return retval; - } -}; - -TQTextDecoder* TQWindowsLocalCodec::makeDecoder() const -{ - return new TQWindowsLocalDecoder(this); -} - -#else - -/* locale names mostly copied from XFree86 */ -static const char * const iso8859_2locales[] = { - "croatian", "cs", "cs_CS", "cs_CZ","cz", "cz_CZ", "czech", "hr", - "hr_HR", "hu", "hu_HU", "hungarian", "pl", "pl_PL", "polish", "ro", - "ro_RO", "rumanian", "serbocroatian", "sh", "sh_SP", "sh_YU", "sk", - "sk_SK", "sl", "sl_CS", "sl_SI", "slovak", "slovene", "sr_SP", 0 }; - -static const char * const iso8859_3locales[] = { - "eo", 0 }; - -static const char * const iso8859_4locales[] = { - "ee", "ee_EE", 0 }; - -static const char * const iso8859_5locales[] = { - "mk", "mk_MK", "sp", "sp_YU", 0 }; - -static const char * const cp_1251locales[] = { - "be", "be_BY", "bg", "bg_BG", "bulgarian", 0 }; - -static const char * const pt_154locales[] = { - "ba_RU", "ky", "ky_KG", "kk", "kk_KZ", 0 }; - -static const char * const iso8859_6locales[] = { - "ar_AA", "ar_SA", "arabic", 0 }; - -static const char * const iso8859_7locales[] = { - "el", "el_GR", "greek", 0 }; - -static const char * const iso8859_8locales[] = { - "hebrew", "he", "he_IL", "iw", "iw_IL", 0 }; - -static const char * const iso8859_9locales[] = { - "tr", "tr_TR", "turkish", 0 }; - -static const char * const iso8859_13locales[] = { - "lt", "lt_LT", "lv", "lv_LV", 0 }; - -static const char * const iso8859_15locales[] = { - "et", "et_EE", - // Euro countries - "br_FR", "ca_ES", "de", "de_AT", "de_BE", "de_DE", "de_LU", "en_IE", - "es", "es_ES", "eu_ES", "fi", "fi_FI", "finnish", "fr", "fr_FR", - "fr_BE", "fr_LU", "french", "ga_IE", "gl_ES", "it", "it_IT", "oc_FR", - "nl", "nl_BE", "nl_NL", "pt", "pt_PT", "sv_FI", "wa_BE", - 0 }; - -static const char * const koi8_ulocales[] = { - "uk", "uk_UA", "ru_UA", "ukrainian", 0 }; - -static const char * const tis_620locales[] = { - "th", "th_TH", "thai", 0 }; - -static const char * const tcvnlocales[] = { - "vi", "vi_VN", 0 }; - -static bool try_locale_list( const char * const locale[], const char * lang ) -{ - int i; - for( i=0; locale[i] && *locale[i] && strcmp(locale[i], lang); i++ ) - ; - return locale[i] != 0; -} - -// For the probably_koi8_locales we have to look. the standard says -// these are 8859-5, but almost all Russian users use KOI8-R and -// incorrectly set $LANG to ru_RU. We'll check tolower() to see what -// tolower() thinks ru_RU means. - -// If you read the history, it seems that many Russians blame ISO and -// Perestroika for the confusion. -// -// The real bug is that some programs break if the user specifies -// ru_RU.KOI8-R. - -static const char * const probably_koi8_rlocales[] = { - "ru", "ru_SU", "ru_RU", "russian", 0 }; - -static TQTextCodec * ru_RU_hack( const char * i ) { - TQTextCodec * ru_RU_codec = 0; - - TQCString origlocale = setlocale( LC_CTYPE, i ); - // tqunicode koi8r latin5 name - // 0x044E 0xC0 0xEE CYRILLIC SMALL LETTER YU - // 0x042E 0xE0 0xCE CYRILLIC CAPITAL LETTER YU - int latin5 = tolower( 0xCE ); - int koi8r = tolower( 0xE0 ); - if ( koi8r == 0xC0 && latin5 != 0xEE ) { - ru_RU_codec = TQTextCodec::codecForName( "KOI8-R" ); - } else if ( koi8r != 0xC0 && latin5 == 0xEE ) { - ru_RU_codec = TQTextCodec::codecForName( "ISO 8859-5" ); - } else { - // something else again... let's assume... *throws dice* - ru_RU_codec = TQTextCodec::codecForName( "KOI8-R" ); - qWarning( "TQTextCodec: using KOI8-R, probe failed (%02x %02x %s)", - koi8r, latin5, i ); - } - setlocale( LC_CTYPE, origlocale.data() ); - - return ru_RU_codec; -} - -#endif - -/*! - Set the codec to \a c; this will be returned by codecForLocale(). - This might be needed for some applications that want to use their - own mechanism for setting the locale. - - \sa codecForLocale() -*/ -void TQTextCodec::setCodecForLocale(TQTextCodec *c) { - localeMapper = c; -} - -/*! Returns a pointer to the codec most suitable for this locale. */ - -TQTextCodec* TQTextCodec::codecForLocale() -{ - if ( localeMapper ) - return localeMapper; - - setup(); - - return localeMapper; -} - - -/*! - Searches all installed TQTextCodec objects and returns the one - which best matches \a name; the match is case-insensitive. Returns - 0 if no codec's heuristicNameMatch() reports a match better than - \a accuracy, or if \a name is a null string. - - \sa heuristicNameMatch() -*/ - -TQTextCodec* TQTextCodec::codecForName( const char* name, int accuracy ) -{ - if ( !name || !*name ) - return 0; - - setup(); - TQValueList<TQTextCodec*>::ConstIterator i; - TQTextCodec* result = 0; - int best = accuracy; - TQTextCodec* cursor; - for ( i = all->begin(); i != all->end(); ++i ) { - cursor = *i; - int s = cursor->heuristicNameMatch( name ); - if ( s > best ) { - best = s; - result = cursor; - } - } - -#if !defined(TQT_NO_COMPONENT) && !defined(TQT_LITE_COMPONENT) - if ( !result ) - result = TQTextCodecFactory::createForName(name); -#endif // !TQT_NO_COMPONENT !TQT_LITE_COMPONENT - - return result; -} - - -/*! - Searches all installed TQTextCodec objects, returning the one which - most recognizes the given content. May return 0. - - Note that this is often a poor choice, since character encodings - often use most of the available character sequences, and so only - by linguistic analysis could a true match be made. - - \a chars contains the string to check, and \a len contains the - number of characters in the string to use. - - \sa heuristicContentMatch() -*/ -TQTextCodec* TQTextCodec::codecForContent(const char* chars, int len) -{ - setup(); - TQValueList<TQTextCodec*>::ConstIterator i; - TQTextCodec* result = 0; - int best=0; - TQTextCodec* cursor; - for ( i = all->begin(); i != all->end(); ++i ) { - cursor = *i; - int s = cursor->heuristicContentMatch(chars,len); - if ( s > best ) { - best = s; - result = cursor; - } - } - return result; -} - - -/*! - \fn const char* TQTextCodec::name() const - - TQTextCodec subclasses must reimplement this function. It returns - the name of the encoding supported by the subclass. When choosing - a name for an encoding, consider these points: - \list - \i On X11, heuristicNameMatch( const char * hint ) - is used to test if a the TQTextCodec - can convert between Unicode and the encoding of a font - with encoding \e hint, such as "iso8859-1" for Latin-1 fonts, - "koi8-r" for Russian KOI8 fonts. - The default algorithm of heuristicNameMatch() uses name(). - \i Some applications may use this function to present - encodings to the end user. - \endlist - */ - -/*! - \fn int TQTextCodec::mibEnum() const - - Subclasses of TQTextCodec must reimplement this function. It - returns the MIBenum (see \link - http://www.iana.org/assignments/character-sets the - IANA character-sets encoding file\endlink for more information). - It is important that each TQTextCodec subclass returns the correct - unique value for this function. -*/ - - -/*! - Returns the preferred mime name of the encoding as defined in the - \link http://www.iana.org/assignments/character-sets - IANA character-sets encoding file\endlink. -*/ -const char* TQTextCodec::mimeName() const -{ - return name(); -} - - -/*! - \fn int TQTextCodec::heuristicContentMatch(const char* chars, int len) const - - TQTextCodec subclasses must reimplement this function. It examines - the first \a len bytes of \a chars and returns a value indicating - how likely it is that the string is a prefix of text encoded in - the encoding of the subclass. A negative return value indicates - that the text is detectably not in the encoding (e.g. it contains - characters undefined in the encoding). A return value of 0 - indicates that the text should be decoded with this codec rather - than as ASCII, but there is no particular evidence. The value - should range up to \a len. Thus, most decoders will return -1, 0, - or -\a len. - - The characters are not null terminated. - - \sa codecForContent(). -*/ - - -/*! - Creates a TQTextDecoder which stores enough state to decode chunks - of char* data to create chunks of Unicode data. The default - implementation creates a stateless decoder, which is only - sufficient for the simplest encodings where each byte corresponds - to exactly one Unicode character. - - The caller is responsible for deleting the returned object. -*/ -TQTextDecoder* TQTextCodec::makeDecoder() const -{ - return new TQTextStatelessDecoder(this); -} - - -/*! - Creates a TQTextEncoder which stores enough state to encode chunks - of Unicode data as char* data. The default implementation creates - a stateless encoder, which is only sufficient for the simplest - encodings where each Unicode character corresponds to exactly one - character. - - The caller is responsible for deleting the returned object. -*/ -TQTextEncoder* TQTextCodec::makeEncoder() const -{ - return new TQTextStatelessEncoder(this); -} - - -/*! - TQTextCodec subclasses must reimplement this function or - makeDecoder(). It converts the first \a len characters of \a chars - to Unicode. - - The default implementation makes a decoder with makeDecoder() and - converts the input with that. Note that the default makeDecoder() - implementation makes a decoder that simply calls - this function, hence subclasses \e must reimplement one function or - the other to avoid infinite recursion. -*/ -TQString TQTextCodec::toUnicode(const char* chars, int len) const -{ - if ( chars == 0 ) - return TQString::null; - TQTextDecoder* i = makeDecoder(); - TQString result = i->toUnicode(chars,len); - delete i; - return result; -} - - -/*! - TQTextCodec subclasses must reimplement either this function or - makeEncoder(). It converts the first \a lenInOut characters of \a - uc from Unicode to the encoding of the subclass. If \a lenInOut is - negative or too large, the length of \a uc is used instead. - - Converts \a lenInOut characters (not bytes) from \a uc, producing - a TQCString. \a lenInOut will be set to the \link - TQCString::length() length\endlink of the result (in bytes). - - The default implementation makes an encoder with makeEncoder() and - converts the input with that. Note that the default makeEncoder() - implementation makes an encoder that simply calls this function, - hence subclasses \e must reimplement one function or the other to - avoid infinite recursion. -*/ - -TQCString TQTextCodec::fromUnicode(const TQString& uc, int& lenInOut) const -{ - TQTextEncoder* i = makeEncoder(); - TQCString result = i->fromUnicode(uc, lenInOut); - delete i; - return result; -} - -/*! - \overload - \internal -*/ -TQByteArray TQTextCodec::fromUnicode( const TQString &str, int pos, int len ) const -{ - TQByteArray a; - if( len < 0 ) - len = str.length() - pos; - a = fromUnicode( str.mid(pos, len) ); - - if( a.size() > 0 && a[(int)a.size() - 1] == '\0' ) - a.resize( a.size() - 1 ); - return a; -} - -/*! - \overload - - \a uc is the tqunicode source string. -*/ -TQCString TQTextCodec::fromUnicode(const TQString& uc) const -{ - int l = uc.length(); - return fromUnicode(uc,l); -} - -/*! - \overload - - \a a contains the source characters; \a len contains the number of - characters in \a a to use. -*/ -TQString TQTextCodec::toUnicode(const TQByteArray& a, int len) const -{ - int l = a.size(); - l = TQMIN( l, len ); - return toUnicode( a.data(), l ); -} - -/*! - \overload - - \a a contains the source characters. -*/ -TQString TQTextCodec::toUnicode(const TQByteArray& a) const -{ - int l = a.size(); - return toUnicode( a.data(), l ); -} - -/*! - \overload - - \a a contains the source characters; \a len contains the number of - characters in \a a to use. -*/ -TQString TQTextCodec::toUnicode(const TQCString& a, int len) const -{ - int l = a.length(); - l = TQMIN( l, len ); - return toUnicode( a.data(), l ); -} - -/*! - \overload - - \a a contains the source characters. -*/ -TQString TQTextCodec::toUnicode(const TQCString& a) const -{ - int l = a.length(); - return toUnicode( a.data(), l ); -} - -/*! - \overload - - \a chars contains the source characters. -*/ -TQString TQTextCodec::toUnicode(const char* chars) const -{ - return toUnicode(chars,tqstrlen(chars)); -} - -/*! - \internal -*/ -unsigned short TQTextCodec::characterFromUnicode(const TQString &str, int pos) const -{ - TQCString result = TQTextCodec::fromUnicode(TQString(str[pos])); - uchar *ch = (uchar *) result.data(); - ushort retval = 0; - if (result.size() > 2) { - retval = (ushort) *ch << 8; - ch++; - } - return retval + *ch; -} - -/*! - Returns TRUE if the Unicode character \a ch can be fully encoded - with this codec; otherwise returns FALSE. The default - implementation tests if the result of toUnicode(fromUnicode(ch)) - is the original \a ch. Subclasses may be able to improve the - efficiency. -*/ -bool TQTextCodec::canEncode( TQChar ch ) const -{ - return toUnicode(fromUnicode(ch)) == ch; -} - -/*! - \overload - - \a s contains the string being tested for encode-ability. -*/ -bool TQTextCodec::canEncode( const TQString& s ) const -{ - if ( s.isEmpty() ) - return TRUE; - return toUnicode(fromUnicode(s)) == s; -} - - - -/*! - \class TQTextEncoder tqtextcodec.h - \brief The TQTextEncoder class provides a state-based encoder. - \reentrant - \ingroup i18n - - The encoder converts Unicode into another format, remembering any - state that is required between calls. - - \sa TQTextCodec::makeEncoder() -*/ - -/*! - Destroys the encoder. -*/ -TQTextEncoder::~TQTextEncoder() -{ -} - -/*! - \fn TQCString TQTextEncoder::fromUnicode(const TQString& uc, int& lenInOut) - - Converts \a lenInOut characters (not bytes) from \a uc, producing - a TQCString. \a lenInOut will be set to the \link - TQCString::length() length\endlink of the result (in bytes). - - The encoder is free to record state to use when subsequent calls - are made to this function (for example, it might change modes with - escape sequences if needed during the encoding of one string, then - assume that mode applies when a subsequent call begins). -*/ - -/*! - \class TQTextDecoder tqtextcodec.h - \brief The TQTextDecoder class provides a state-based decoder. - \reentrant - \ingroup i18n - - The decoder converts a text format into Unicode, remembering any - state that is required between calls. - - \sa TQTextCodec::makeEncoder() -*/ - - -/*! - Destroys the decoder. -*/ -TQTextDecoder::~TQTextDecoder() -{ -} - -/*! - \fn TQString TQTextDecoder::toUnicode(const char* chars, int len) - - Converts the first \a len bytes in \a chars to Unicode, returning - the result. - - If not all characters are used (e.g. if only part of a multi-byte - encoding is at the end of the characters), the decoder remembers - enough state to continue with the next call to this function. -*/ - -#define CHAINED 0xffff - -struct TQMultiByteUnicodeTable { - // If multiByte, ignore tqunicode and index into multiByte - // with the next character. - TQMultiByteUnicodeTable() : tqunicode(0xfffd), multiByte(0) { } - - ~TQMultiByteUnicodeTable() - { - if ( multiByte ) - delete [] multiByte; - } - - ushort tqunicode; - TQMultiByteUnicodeTable* multiByte; -}; - -static int getByte(char* &cursor) -{ - int byte = 0; - if ( *cursor ) { - if ( cursor[1] == 'x' ) - byte = strtol(cursor+2,&cursor,16); - else if ( cursor[1] == 'd' ) - byte = strtol(cursor+2,&cursor,10); - else - byte = strtol(cursor+2,&cursor,8); - } - return byte&0xff; -} - -class TQTextCodecFromIOD; - -class TQTextCodecFromIODDecoder : public TQTextDecoder { - const TQTextCodecFromIOD* codec; - TQMultiByteUnicodeTable* mb; -public: - TQTextCodecFromIODDecoder(const TQTextCodecFromIOD* c); - TQString toUnicode(const char* chars, int len); -}; - -class TQTextCodecFromIOD : public TQTextCodec { - friend class TQTextCodecFromIODDecoder; - - TQCString n; - - // If from_tqunicode_page[row()][cell()] is 0 and from_tqunicode_page_multiByte, - // use from_tqunicode_page_multiByte[row()][cell()] as string. - char** from_tqunicode_page; - char*** from_tqunicode_page_multiByte; - char unkn; - - // Only one of these is used - ushort* to_tqunicode; - TQMultiByteUnicodeTable* to_tqunicode_multiByte; - int max_bytes_per_char; - TQStrList aliases; - - bool stateless() const { return !to_tqunicode_multiByte; } - -public: - TQTextCodecFromIOD(TQIODevice* iod) - { - from_tqunicode_page = 0; - to_tqunicode_multiByte = 0; - to_tqunicode = 0; - from_tqunicode_page_multiByte = 0; - max_bytes_per_char = 1; - - const int maxlen=100; - char line[maxlen]; - char esc='\\'; - char comm='%'; - bool incmap = FALSE; - while (iod->readLine(line,maxlen) > 0) { - if (0==qstrnicmp(line,"<code_set_name>",15)) - n = line+15; - else if (0==qstrnicmp(line,"<escape_char> ",14)) - esc = line[14]; - else if (0==qstrnicmp(line,"<comment_char> ",15)) - comm = line[15]; - else if (line[0]==comm && 0==qstrnicmp(line+1," alias ",7)) { - aliases.append(line+8); - } else if (0==qstrnicmp(line,"CHARMAP",7)) { - if (!from_tqunicode_page) { - from_tqunicode_page = new char*[256]; - for (int i=0; i<256; i++) - from_tqunicode_page[i]=0; - } - if (!to_tqunicode) { - to_tqunicode = new ushort[256]; - } - incmap = TRUE; - } else if (0==qstrnicmp(line,"END CHARMAP",11)) - break; - else if (incmap) { - char* cursor = line; - int byte=-1,tqunicode=-1; - ushort* mb_tqunicode=0; - const int maxmb=8; // more -> we'll need to improve datastructures - char mb[maxmb+1]; - int nmb=0; - - while (*cursor) { - if (cursor[0]=='<' && cursor[1]=='U' && - cursor[2]>='0' && cursor[2]<='9' && - cursor[3]>='0' && cursor[3]<='9') { - - tqunicode = strtol(cursor+2,&cursor,16); - - } else if (*cursor==esc) { - - byte = getByte(cursor); - - if ( *cursor == esc ) { - if ( !to_tqunicode_multiByte ) { - to_tqunicode_multiByte = - new TQMultiByteUnicodeTable[256]; - for (int i=0; i<256; i++) { - to_tqunicode_multiByte[i].tqunicode = - to_tqunicode[i]; - to_tqunicode_multiByte[i].multiByte = 0; - } - delete [] to_tqunicode; - to_tqunicode = 0; - } - TQMultiByteUnicodeTable* mbut = - to_tqunicode_multiByte+byte; - mb[nmb++] = byte; - while ( nmb < maxmb && *cursor == esc ) { - // Always at least once - - mbut->tqunicode = CHAINED; - byte = getByte(cursor); - mb[nmb++] = byte; - if (!mbut->multiByte) { - mbut->multiByte = - new TQMultiByteUnicodeTable[256]; - } - mbut = mbut->multiByte+byte; - mb_tqunicode = & mbut->tqunicode; - } - - if ( nmb > max_bytes_per_char ) - max_bytes_per_char = nmb; - } - } else { - cursor++; - } - } - - if (tqunicode >= 0 && tqunicode <= 0xffff) - { - TQChar ch((ushort)tqunicode); - if (!from_tqunicode_page[ch.row()]) { - from_tqunicode_page[ch.row()] = new char[256]; - for (int i=0; i<256; i++) - from_tqunicode_page[ch.row()][i]=0; - } - if ( mb_tqunicode ) { - from_tqunicode_page[ch.row()][ch.cell()] = 0; - if (!from_tqunicode_page_multiByte) { - from_tqunicode_page_multiByte = new char**[256]; - for (int i=0; i<256; i++) - from_tqunicode_page_multiByte[i]=0; - } - if (!from_tqunicode_page_multiByte[ch.row()]) { - from_tqunicode_page_multiByte[ch.row()] = new char*[256]; - for (int i=0; i<256; i++) - from_tqunicode_page_multiByte[ch.row()][i] = 0; - } - mb[nmb++] = 0; - from_tqunicode_page_multiByte[ch.row()][ch.cell()] - = qstrdup(mb); - *mb_tqunicode = tqunicode; - } else { - from_tqunicode_page[ch.row()][ch.cell()] = (char)byte; - if ( to_tqunicode ) - to_tqunicode[byte] = tqunicode; - else - to_tqunicode_multiByte[byte].tqunicode = tqunicode; - } - } else { - } - } - } - n = n.stripWhiteSpace(); - - unkn = '?'; // ##### Might be a bad choice. - } - - ~TQTextCodecFromIOD() - { - if ( from_tqunicode_page ) { - for (int i=0; i<256; i++) - if (from_tqunicode_page[i]) - delete [] from_tqunicode_page[i]; - } - if ( from_tqunicode_page_multiByte ) { - for (int i=0; i<256; i++) - if (from_tqunicode_page_multiByte[i]) - for (int j=0; j<256; j++) - if (from_tqunicode_page_multiByte[i][j]) - delete [] from_tqunicode_page_multiByte[i][j]; - } - if ( to_tqunicode ) - delete [] to_tqunicode; - if ( to_tqunicode_multiByte ) - delete [] to_tqunicode_multiByte; - } - - bool ok() const - { - return !!from_tqunicode_page; - } - - TQTextDecoder* makeDecoder() const - { - if ( stateless() ) - return TQTextCodec::makeDecoder(); - else - return new TQTextCodecFromIODDecoder(this); - } - - const char* name() const - { - return n; - } - - int mibEnum() const - { - return 0; // #### Unknown. - } - - int heuristicContentMatch(const char*, int) const - { - return 0; - } - - int heuristicNameMatch(const char* hint) const - { - int bestr = TQTextCodec::heuristicNameMatch(hint); - TQStrListIterator it(aliases); - char* a; - while ((a=it.current())) { - ++it; - int r = simpleHeuristicNameMatch(a,hint); - if (r > bestr) - bestr = r; - } - return bestr; - } - - TQString toUnicode(const char* chars, int len) const - { - const uchar* uchars = (const uchar*)chars; - TQString result; - TQMultiByteUnicodeTable* multiByte=to_tqunicode_multiByte; - if ( multiByte ) { - while (len--) { - TQMultiByteUnicodeTable& mb = multiByte[*uchars]; - if ( mb.multiByte ) { - // Chained multi-byte - multiByte = mb.multiByte; - } else { - result += TQChar(mb.tqunicode); - multiByte=to_tqunicode_multiByte; - } - uchars++; - } - } else { - while (len--) - result += TQChar(to_tqunicode[*uchars++]); - } - return result; - } - -#if !defined(TQ_NO_USING_KEYWORD) - using TQTextCodec::fromUnicode; -#endif - TQCString fromUnicode(const TQString& uc, int& lenInOut) const - { - if (lenInOut > (int)uc.length()) - lenInOut = uc.length(); - int rlen = lenInOut*max_bytes_per_char; - TQCString rstr(rlen+1); - char* cursor = rstr.data(); - char* s=0; - int l = lenInOut; - int lout = 0; - for (int i=0; i<l; i++) { - TQChar ch = uc[i]; - if ( ch == TQChar::null ) { - // special - *cursor++ = 0; - } else if ( from_tqunicode_page[ch.row()] && - from_tqunicode_page[ch.row()][ch.cell()] ) - { - *cursor++ = from_tqunicode_page[ch.row()][ch.cell()]; - lout++; - } else if ( from_tqunicode_page_multiByte && - from_tqunicode_page_multiByte[ch.row()] && - (s=from_tqunicode_page_multiByte[ch.row()][ch.cell()]) ) - { - while (*s) { - *cursor++ = *s++; - lout++; - } - } else { - *cursor++ = unkn; - lout++; - } - } - *cursor = 0; - lenInOut = lout; - return rstr; - } -}; - -TQTextCodecFromIODDecoder::TQTextCodecFromIODDecoder(const TQTextCodecFromIOD* c) : - codec(c) -{ - mb = codec->to_tqunicode_multiByte; -} - -TQString TQTextCodecFromIODDecoder::toUnicode(const char* chars, int len) -{ - const uchar* uchars = (const uchar*)chars; - TQString result; - while (len--) { - TQMultiByteUnicodeTable& t = mb[*uchars]; - if ( t.multiByte ) { - // Chained multi-byte - mb = t.multiByte; - } else { - if ( t.tqunicode ) - result += TQChar(t.tqunicode); - mb=codec->to_tqunicode_multiByte; - } - uchars++; - } - return result; -} - -#ifndef TQT_NO_CODECS -// Cannot use <pre> or \code -/*! - Reads a POSIX2 charmap definition from \a iod. - The parser recognizes the following lines: - -<font name="sans"> - <code_set_name> <i>name</i></br> - <escape_char> <i>character</i></br> - % alias <i>alias</i></br> - CHARMAP</br> - <<i>token</i>> /x<i>hexbyte</i> <U<i>tqunicode</i>> ...</br> - <<i>token</i>> /d<i>decbyte</i> <U<i>tqunicode</i>> ...</br> - <<i>token</i>> /<i>octbyte</i> <U<i>tqunicode</i>> ...</br> - <<i>token</i>> /<i>any</i>/<i>any</i>... <U<i>tqunicode</i>> ...</br> - END CHARMAP</br> -</font> - - The resulting TQTextCodec is returned (and also added to the global - list of codecs). The name() of the result is taken from the - code_set_name. - - Note that a codec constructed in this way uses much more memory - and is slower than a hand-written TQTextCodec subclass, since - tables in code are kept in memory shared by all TQt applications. - - \sa loadCharmapFile() -*/ -TQTextCodec* TQTextCodec::loadCharmap(TQIODevice* iod) -{ - TQTextCodecFromIOD* r = new TQTextCodecFromIOD(iod); - if ( !r->ok() ) { - delete r; - r = 0; - } - return r; -} - -/*! - A convenience function for loadCharmap() that loads the charmap - definition from the file \a filename. -*/ -TQTextCodec* TQTextCodec::loadCharmapFile(TQString filename) -{ - TQFile f(filename); - if (f.open(IO_ReadOnly)) { - TQTextCodecFromIOD* r = new TQTextCodecFromIOD(TQT_TQIODEVICE(&f)); - if ( !r->ok() ) - delete r; - else - return r; - } - return 0; -} - -#endif //TQT_NO_CODECS - -/*! - Returns a string representing the current language and - sublanguage, e.g. "pt" for Portuguese, or "pt_br" for Portuguese/Brazil. -*/ - -const char* TQTextCodec::locale() -{ - return TQLocalePrivate::systemLocaleName(); -} - -#ifndef TQT_NO_CODECS - -class TQSimpleTextCodec: public TQTextCodec -{ -public: - TQSimpleTextCodec( int ); - ~TQSimpleTextCodec(); - - TQString toUnicode(const char* chars, int len) const; -#if !defined(TQ_NO_USING_KEYWORD) - using TQTextCodec::fromUnicode; -#endif - TQCString fromUnicode(const TQString& uc, int& lenInOut ) const; - unsigned short characterFromUnicode(const TQString &str, int pos) const; - - const char* name() const; - const char* mimeName() const; - int mibEnum() const; - - int heuristicContentMatch(const char* chars, int len) const; - - int heuristicNameMatch(const char* hint) const; -#if !defined(TQ_NO_USING_KEYWORD) - using TQTextCodec::canEncode; -#endif - bool canEncode( TQChar ch ) const; - - void fromUnicode( const TQChar *in, unsigned short *out, int length ) const; - -private: - void buildReverseMap(); - - int forwardIndex; -#ifndef TQ_WS_TQWS - TQMemArray<unsigned char> *reverseMap; -#endif -}; - -#ifdef TQ_WS_TQWS -static const TQSimpleTextCodec * reverseOwner = 0; -static TQMemArray<unsigned char> * reverseMap = 0; -#endif - -#define LAST_MIB 2004 - -static const struct { - const char *mime; - const char * cs; - int mib; - TQ_UINT16 values[128]; -} tqunicodevalues[] = { - // from RFC 1489, ftp://ftp.isi.edu/in-notes/rfc1489.txt - { "KOI8-R", "KOI8-R", 2084, - { 0x2500, 0x2502, 0x250C, 0x2510, 0x2514, 0x2518, 0x251C, 0x2524, - 0x252C, 0x2534, 0x253C, 0x2580, 0x2584, 0x2588, 0x258C, 0x2590, - 0x2591, 0x2592, 0x2593, 0x2320, 0x25A0, 0x2219/**/, 0x221A, 0x2248, - 0x2264, 0x2265, 0x00A0, 0x2321, 0x00B0, 0x00B2, 0x00B7, 0x00F7, - 0x2550, 0x2551, 0x2552, 0x0451, 0x2553, 0x2554, 0x2555, 0x2556, - 0x2557, 0x2558, 0x2559, 0x255A, 0x255B, 0x255C, 0x255D, 0x255E, - 0x255F, 0x2560, 0x2561, 0x0401, 0x2562, 0x2563, 0x2564, 0x2565, - 0x2566, 0x2567, 0x2568, 0x2569, 0x256A, 0x256B, 0x256C, 0x00A9, - 0x044E, 0x0430, 0x0431, 0x0446, 0x0434, 0x0435, 0x0444, 0x0433, - 0x0445, 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, - 0x043F, 0x044F, 0x0440, 0x0441, 0x0442, 0x0443, 0x0436, 0x0432, - 0x044C, 0x044B, 0x0437, 0x0448, 0x044D, 0x0449, 0x0447, 0x044A, - 0x042E, 0x0410, 0x0411, 0x0426, 0x0414, 0x0415, 0x0424, 0x0413, - 0x0425, 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, - 0x041F, 0x042F, 0x0420, 0x0421, 0x0422, 0x0423, 0x0416, 0x0412, - 0x042C, 0x042B, 0x0417, 0x0428, 0x042D, 0x0429, 0x0427, 0x042A } }, - // /**/ - The BULLET OPERATOR is confused. Some people think - // it should be 0x2022 (BULLET). - - // from RFC 2319, ftp://ftp.isi.edu/in-notes/rfc2319.txt - { "KOI8-U", "KOI8-U", 2088, - { 0x2500, 0x2502, 0x250C, 0x2510, 0x2514, 0x2518, 0x251C, 0x2524, - 0x252C, 0x2534, 0x253C, 0x2580, 0x2584, 0x2588, 0x258C, 0x2590, - 0x2591, 0x2592, 0x2593, 0x2320, 0x25A0, 0x2219, 0x221A, 0x2248, - 0x2264, 0x2265, 0x00A0, 0x2321, 0x00B0, 0x00B2, 0x00B7, 0x00F7, - 0x2550, 0x2551, 0x2552, 0x0451, 0x0454, 0x2554, 0x0456, 0x0457, - 0x2557, 0x2558, 0x2559, 0x255A, 0x255B, 0x0491, 0x255D, 0x255E, - 0x255F, 0x2560, 0x2561, 0x0401, 0x0404, 0x2563, 0x0406, 0x0407, - 0x2566, 0x2567, 0x2568, 0x2569, 0x256A, 0x0490, 0x256C, 0x00A9, - 0x044E, 0x0430, 0x0431, 0x0446, 0x0434, 0x0435, 0x0444, 0x0433, - 0x0445, 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, - 0x043F, 0x044F, 0x0440, 0x0441, 0x0442, 0x0443, 0x0436, 0x0432, - 0x044C, 0x044B, 0x0437, 0x0448, 0x044D, 0x0449, 0x0447, 0x044A, - 0x042E, 0x0410, 0x0411, 0x0426, 0x0414, 0x0415, 0x0424, 0x0413, - 0x0425, 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, - 0x041F, 0x042F, 0x0420, 0x0421, 0x0422, 0x0423, 0x0416, 0x0412, - 0x042C, 0x042B, 0x0417, 0x0428, 0x042D, 0x0429, 0x0427, 0x042A } }, - - // next bits generated from tables on the Unicode 2.0 CD. we can - // use these tables since this is part of the transition to using - // tqunicode everywhere in qt. - - // $ for A in 8 9 A B C D E F ; do for B in 0 1 2 3 4 5 6 7 8 9 A B C D E F ; do echo 0x${A}${B} 0xFFFD ; done ; done > /tmp/digits ; for a in 8859-* ; do ( awk '/^0x[89ABCDEF]/{ print $1, $2 }' < $a ; cat /tmp/digits ) | sort | uniq -w4 | cut -c6- | paste '-d ' - - - - - - - - | sed -e 's/ /, /g' -e 's/$/,/' -e '$ s/,$/} },/' -e '1 s/^/{ /' > ~/tmp/$a ; done - - // then I inserted the files manually. - { "ISO-8859-2", "ISO 8859-2", 5, - { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, - 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F, - 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, - 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, - 0x00A0, 0x0104, 0x02D8, 0x0141, 0x00A4, 0x013D, 0x015A, 0x00A7, - 0x00A8, 0x0160, 0x015E, 0x0164, 0x0179, 0x00AD, 0x017D, 0x017B, - 0x00B0, 0x0105, 0x02DB, 0x0142, 0x00B4, 0x013E, 0x015B, 0x02C7, - 0x00B8, 0x0161, 0x015F, 0x0165, 0x017A, 0x02DD, 0x017E, 0x017C, - 0x0154, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x0139, 0x0106, 0x00C7, - 0x010C, 0x00C9, 0x0118, 0x00CB, 0x011A, 0x00CD, 0x00CE, 0x010E, - 0x0110, 0x0143, 0x0147, 0x00D3, 0x00D4, 0x0150, 0x00D6, 0x00D7, - 0x0158, 0x016E, 0x00DA, 0x0170, 0x00DC, 0x00DD, 0x0162, 0x00DF, - 0x0155, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x013A, 0x0107, 0x00E7, - 0x010D, 0x00E9, 0x0119, 0x00EB, 0x011B, 0x00ED, 0x00EE, 0x010F, - 0x0111, 0x0144, 0x0148, 0x00F3, 0x00F4, 0x0151, 0x00F6, 0x00F7, - 0x0159, 0x016F, 0x00FA, 0x0171, 0x00FC, 0x00FD, 0x0163, 0x02D9} }, - { "ISO-8859-3", "ISO 8859-3", 6, - { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, - 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F, - 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, - 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, - 0x00A0, 0x0126, 0x02D8, 0x00A3, 0x00A4, 0xFFFD, 0x0124, 0x00A7, - 0x00A8, 0x0130, 0x015E, 0x011E, 0x0134, 0x00AD, 0xFFFD, 0x017B, - 0x00B0, 0x0127, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x0125, 0x00B7, - 0x00B8, 0x0131, 0x015F, 0x011F, 0x0135, 0x00BD, 0xFFFD, 0x017C, - 0x00C0, 0x00C1, 0x00C2, 0xFFFD, 0x00C4, 0x010A, 0x0108, 0x00C7, - 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF, - 0xFFFD, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x0120, 0x00D6, 0x00D7, - 0x011C, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x016C, 0x015C, 0x00DF, - 0x00E0, 0x00E1, 0x00E2, 0xFFFD, 0x00E4, 0x010B, 0x0109, 0x00E7, - 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF, - 0xFFFD, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x0121, 0x00F6, 0x00F7, - 0x011D, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x016D, 0x015D, 0x02D9} }, - { "ISO-8859-4", "ISO 8859-4", 7, - { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, - 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F, - 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, - 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, - 0x00A0, 0x0104, 0x0138, 0x0156, 0x00A4, 0x0128, 0x013B, 0x00A7, - 0x00A8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00AD, 0x017D, 0x00AF, - 0x00B0, 0x0105, 0x02DB, 0x0157, 0x00B4, 0x0129, 0x013C, 0x02C7, - 0x00B8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014A, 0x017E, 0x014B, - 0x0100, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x012E, - 0x010C, 0x00C9, 0x0118, 0x00CB, 0x0116, 0x00CD, 0x00CE, 0x012A, - 0x0110, 0x0145, 0x014C, 0x0136, 0x00D4, 0x00D5, 0x00D6, 0x00D7, - 0x00D8, 0x0172, 0x00DA, 0x00DB, 0x00DC, 0x0168, 0x016A, 0x00DF, - 0x0101, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x012F, - 0x010D, 0x00E9, 0x0119, 0x00EB, 0x0117, 0x00ED, 0x00EE, 0x012B, - 0x0111, 0x0146, 0x014D, 0x0137, 0x00F4, 0x00F5, 0x00F6, 0x00F7, - 0x00F8, 0x0173, 0x00FA, 0x00FB, 0x00FC, 0x0169, 0x016B, 0x02D9} }, - { "ISO-8859-5", "ISO 8859-5", 8, - { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, - 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F, - 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, - 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, - 0x00A0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407, - 0x0408, 0x0409, 0x040A, 0x040B, 0x040C, 0x00AD, 0x040E, 0x040F, - 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, - 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F, - 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, - 0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F, - 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, - 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, 0x043F, - 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447, - 0x0448, 0x0449, 0x044A, 0x044B, 0x044C, 0x044D, 0x044E, 0x044F, - 0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457, - 0x0458, 0x0459, 0x045A, 0x045B, 0x045C, 0x00A7, 0x045E, 0x045F} }, - { "ISO-8859-6", "ISO 8859-6", 82, - { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, - 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F, - 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, - 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, - 0x00A0, 0xFFFD, 0xFFFD, 0xFFFD, 0x00A4, 0xFFFD, 0xFFFD, 0xFFFD, - 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x060C, 0x00AD, 0xFFFD, 0xFFFD, - 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, - 0xFFFD, 0xFFFD, 0xFFFD, 0x061B, 0xFFFD, 0xFFFD, 0xFFFD, 0x061F, - 0xFFFD, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627, - 0x0628, 0x0629, 0x062A, 0x062B, 0x062C, 0x062D, 0x062E, 0x062F, - 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637, - 0x0638, 0x0639, 0x063A, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, - 0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647, - 0x0648, 0x0649, 0x064A, 0x064B, 0x064C, 0x064D, 0x064E, 0x064F, - 0x0650, 0x0651, 0x0652, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, - 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD} }, - { "ISO-8859-7", "ISO 8859-7", 10, - { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, - 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F, - 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, - 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, - 0x00A0, 0x2018, 0x2019, 0x00A3, 0xFFFD, 0xFFFD, 0x00A6, 0x00A7, - 0x00A8, 0x00A9, 0xFFFD, 0x00AB, 0x00AC, 0x00AD, 0xFFFD, 0x2015, - 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x0384, 0x0385, 0x0386, 0x00B7, - 0x0388, 0x0389, 0x038A, 0x00BB, 0x038C, 0x00BD, 0x038E, 0x038F, - 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, - 0x0398, 0x0399, 0x039A, 0x039B, 0x039C, 0x039D, 0x039E, 0x039F, - 0x03A0, 0x03A1, 0xFFFD, 0x03A3, 0x03A4, 0x03A5, 0x03A6, 0x03A7, - 0x03A8, 0x03A9, 0x03AA, 0x03AB, 0x03AC, 0x03AD, 0x03AE, 0x03AF, - 0x03B0, 0x03B1, 0x03B2, 0x03B3, 0x03B4, 0x03B5, 0x03B6, 0x03B7, - 0x03B8, 0x03B9, 0x03BA, 0x03BB, 0x03BC, 0x03BD, 0x03BE, 0x03BF, - 0x03C0, 0x03C1, 0x03C2, 0x03C3, 0x03C4, 0x03C5, 0x03C6, 0x03C7, - 0x03C8, 0x03C9, 0x03CA, 0x03CB, 0x03CC, 0x03CD, 0x03CE, 0xFFFD} }, - { "ISO-8859-8-I", "ISO 8859-8-I", 85, - { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, - 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F, - 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, - 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, - 0x00A0, 0xFFFD, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, - 0x00A8, 0x00A9, 0x00D7, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x203E, - 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, - 0x00B8, 0x00B9, 0x00F7, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0xFFFD, - 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, - 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, - 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, - 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x2017, - 0x05D0, 0x05D1, 0x05D2, 0x05D3, 0x05D4, 0x05D5, 0x05D6, 0x05D7, - 0x05D8, 0x05D9, 0x05DA, 0x05DB, 0x05DC, 0x05DD, 0x05DE, 0x05DF, - 0x05E0, 0x05E1, 0x05E2, 0x05E3, 0x05E4, 0x05E5, 0x05E6, 0x05E7, - 0x05E8, 0x05E9, 0x05EA, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD} }, - { "ISO-8859-9", "ISO 8859-9", 12, - { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, - 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F, - 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, - 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, - 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, - 0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF, - 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, - 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF, - 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7, - 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF, - 0x011E, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7, - 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x0130, 0x015E, 0x00DF, - 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, - 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF, - 0x011F, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7, - 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x0131, 0x015F, 0x00FF} }, - { "ISO-8859-10", "ISO 8859-10", 13, - { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, - 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F, - 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, - 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, - 0x00A0, 0x0104, 0x0112, 0x0122, 0x012A, 0x0128, 0x0136, 0x00A7, - 0x013B, 0x0110, 0x0160, 0x0166, 0x017D, 0x00AD, 0x016A, 0x014A, - 0x00B0, 0x0105, 0x0113, 0x0123, 0x012B, 0x0129, 0x0137, 0x00B7, - 0x013C, 0x0111, 0x0161, 0x0167, 0x017E, 0x2015, 0x016B, 0x014B, - 0x0100, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x012E, - 0x010C, 0x00C9, 0x0118, 0x00CB, 0x0116, 0x00CD, 0x00CE, 0x00CF, - 0x00D0, 0x0145, 0x014C, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x0168, - 0x00D8, 0x0172, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF, - 0x0101, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x012F, - 0x010D, 0x00E9, 0x0119, 0x00EB, 0x0117, 0x00ED, 0x00EE, 0x00EF, - 0x00F0, 0x0146, 0x014D, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x0169, - 0x00F8, 0x0173, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x0138} }, - { "ISO-8859-13", "ISO 8859-13", 109, - { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, - 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F, - 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, - 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, - 0x00A0, 0x201D, 0x00A2, 0x00A3, 0x00A4, 0x201E, 0x00A6, 0x00A7, - 0x00D8, 0x00A9, 0x0156, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00C6, - 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x201C, 0x00B5, 0x00B6, 0x00B7, - 0x00F8, 0x00B9, 0x0157, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00E6, - 0x0104, 0x012E, 0x0100, 0x0106, 0x00C4, 0x00C5, 0x0118, 0x0112, - 0x010C, 0x00C9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012A, 0x013B, - 0x0160, 0x0143, 0x0145, 0x00D3, 0x014C, 0x00D5, 0x00D6, 0x00D7, - 0x0172, 0x0141, 0x015A, 0x016A, 0x00DC, 0x017B, 0x017D, 0x00DF, - 0x0105, 0x012F, 0x0101, 0x0107, 0x00E4, 0x00E5, 0x0119, 0x0113, - 0x010D, 0x00E9, 0x017A, 0x0117, 0x0123, 0x0137, 0x012B, 0x013C, - 0x0161, 0x0144, 0x0146, 0x00F3, 0x014D, 0x00F5, 0x00F6, 0x00F7, - 0x0173, 0x0142, 0x015B, 0x016B, 0x00FC, 0x017C, 0x017E, 0x2019} }, - { "ISO-8859-14", "ISO 8859-14", 110, - { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, - 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F, - 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, - 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, - 0x00A0, 0x1E02, 0x1E03, 0x00A3, 0x010A, 0x010B, 0x1E0A, 0x00A7, - 0x1E80, 0x00A9, 0x1E82, 0x1E0B, 0x1EF2, 0x00AD, 0x00AE, 0x0178, - 0x1E1E, 0x1E1F, 0x0120, 0x0121, 0x1E40, 0x1E41, 0x00B6, 0x1E56, - 0x1E81, 0x1E57, 0x1E83, 0x1E60, 0x1EF3, 0x1E84, 0x1E85, 0x1E61, - 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7, - 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF, - 0x0174, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x1E6A, - 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x0176, 0x00DF, - 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, - 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF, - 0x0175, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x1E6B, - 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x0177, 0x00FF} }, - { "ISO-8859-16", "ISO 8859-16", 112, - { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, - 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F, - 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, - 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, - 0x00A0, 0x0104, 0x0105, 0x0141, 0x20AC, 0x201E, 0x0160, 0x00A7, - 0x0161, 0x00A9, 0x0218, 0x00AB, 0x0179, 0x00AD, 0x017A, 0x017B, - 0x00B0, 0x00B1, 0x010C, 0x0142, 0x017D, 0x201D, 0x00B6, 0x00B7, - 0x017E, 0x010D, 0x0219, 0x00BB, 0x0152, 0x0153, 0x0178, 0x017C, - 0x00C0, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x0106, 0x00C6, 0x00C7, - 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF, - 0x0110, 0x0143, 0x00D2, 0x00D3, 0x00D4, 0x0150, 0x00D6, 0x015A, - 0x0170, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x0118, 0x021A, 0x00DF, - 0x00E0, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x0107, 0x00E6, 0x00E7, - 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF, - 0x0111, 0x0144, 0x00F2, 0x00F3, 0x00F4, 0x0151, 0x00F6, 0x015B, - 0x0171, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x0119, 0x021B, 0x00FF} }, - - // next bits generated again from tables on the Unicode 3.0 CD. - - // $ for a in CP* ; do ( awk '/^0x[89ABCDEF]/{ print $1, $2 }' < $a ) | sort | sed -e 's/#UNDEF.*$/0xFFFD/' | cut -c6- | paste '-d ' - - - - - - - - | sed -e 's/ /, /g' -e 's/$/,/' -e '$ s/,$/} },/' -e '1 s/^/{ /' > ~/tmp/$a ; done - - { "CP 850", "IBM 850", 2009, - { 0x00C7, 0x00FC, 0x00E9, 0x00E2, 0x00E4, 0x00E0, 0x00E5, 0x00E7, - 0x00EA, 0x00EB, 0x00E8, 0x00EF, 0x00EE, 0x00EC, 0x00C4, 0x00C5, - 0x00C9, 0x00E6, 0x00C6, 0x00F4, 0x00F6, 0x00F2, 0x00FB, 0x00F9, - 0x00FF, 0x00D6, 0x00DC, 0x00F8, 0x00A3, 0x00D8, 0x00D7, 0x0192, - 0x00E1, 0x00ED, 0x00F3, 0x00FA, 0x00F1, 0x00D1, 0x00AA, 0x00BA, - 0x00BF, 0x00AE, 0x00AC, 0x00BD, 0x00BC, 0x00A1, 0x00AB, 0x00BB, - 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x00C1, 0x00C2, 0x00C0, - 0x00A9, 0x2563, 0x2551, 0x2557, 0x255D, 0x00A2, 0x00A5, 0x2510, - 0x2514, 0x2534, 0x252C, 0x251C, 0x2500, 0x253C, 0x00E3, 0x00C3, - 0x255A, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256C, 0x00A4, - 0x00F0, 0x00D0, 0x00CA, 0x00CB, 0x00C8, 0x0131, 0x00CD, 0x00CE, - 0x00CF, 0x2518, 0x250C, 0x2588, 0x2584, 0x00A6, 0x00CC, 0x2580, - 0x00D3, 0x00DF, 0x00D4, 0x00D2, 0x00F5, 0x00D5, 0x00B5, 0x00FE, - 0x00DE, 0x00DA, 0x00DB, 0x00D9, 0x00FD, 0x00DD, 0x00AF, 0x00B4, - 0x00AD, 0x00B1, 0x2017, 0x00BE, 0x00B6, 0x00A7, 0x00F7, 0x00B8, - 0x00B0, 0x00A8, 0x00B7, 0x00B9, 0x00B3, 0x00B2, 0x25A0, 0x00A0} }, - { "CP 874", "CP 874", 0, //### what is the mib? - { 0x20AC, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x2026, 0xFFFD, 0xFFFD, - 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, - 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, - 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, - 0x00A0, 0x0E01, 0x0E02, 0x0E03, 0x0E04, 0x0E05, 0x0E06, 0x0E07, - 0x0E08, 0x0E09, 0x0E0A, 0x0E0B, 0x0E0C, 0x0E0D, 0x0E0E, 0x0E0F, - 0x0E10, 0x0E11, 0x0E12, 0x0E13, 0x0E14, 0x0E15, 0x0E16, 0x0E17, - 0x0E18, 0x0E19, 0x0E1A, 0x0E1B, 0x0E1C, 0x0E1D, 0x0E1E, 0x0E1F, - 0x0E20, 0x0E21, 0x0E22, 0x0E23, 0x0E24, 0x0E25, 0x0E26, 0x0E27, - 0x0E28, 0x0E29, 0x0E2A, 0x0E2B, 0x0E2C, 0x0E2D, 0x0E2E, 0x0E2F, - 0x0E30, 0x0E31, 0x0E32, 0x0E33, 0x0E34, 0x0E35, 0x0E36, 0x0E37, - 0x0E38, 0x0E39, 0x0E3A, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x0E3F, - 0x0E40, 0x0E41, 0x0E42, 0x0E43, 0x0E44, 0x0E45, 0x0E46, 0x0E47, - 0x0E48, 0x0E49, 0x0E4A, 0x0E4B, 0x0E4C, 0x0E4D, 0x0E4E, 0x0E4F, - 0x0E50, 0x0E51, 0x0E52, 0x0E53, 0x0E54, 0x0E55, 0x0E56, 0x0E57, - 0x0E58, 0x0E59, 0x0E5A, 0x0E5B, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD} }, - { "IBM 866", "IBM 866", 2086, - { 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, - 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F, - 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, - 0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F, - 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, - 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, 0x043F, - 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2561, 0x2562, 0x2556, - 0x2555, 0x2563, 0x2551, 0x2557, 0x255D, 0x255C, 0x255B, 0x2510, - 0x2514, 0x2534, 0x252C, 0x251C, 0x2500, 0x253C, 0x255E, 0x255F, - 0x255A, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256C, 0x2567, - 0x2568, 0x2564, 0x2565, 0x2559, 0x2558, 0x2552, 0x2553, 0x256B, - 0x256A, 0x2518, 0x250C, 0x2588, 0x2584, 0x258C, 0x2590, 0x2580, - 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447, - 0x0448, 0x0449, 0x044A, 0x044B, 0x044C, 0x044D, 0x044E, 0x044F, - 0x0401, 0x0451, 0x0404, 0x0454, 0x0407, 0x0457, 0x040E, 0x045E, - 0x00B0, 0x2219, 0x00B7, 0x221A, 0x2116, 0x00A4, 0x25A0, 0x00A0} }, - - { "windows-1250", "CP 1250", 2250, - { 0x20AC, 0xFFFD, 0x201A, 0xFFFD, 0x201E, 0x2026, 0x2020, 0x2021, - 0xFFFD, 0x2030, 0x0160, 0x2039, 0x015A, 0x0164, 0x017D, 0x0179, - 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, - 0xFFFD, 0x2122, 0x0161, 0x203A, 0x015B, 0x0165, 0x017E, 0x017A, - 0x00A0, 0x02C7, 0x02D8, 0x0141, 0x00A4, 0x0104, 0x00A6, 0x00A7, - 0x00A8, 0x00A9, 0x015E, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x017B, - 0x00B0, 0x00B1, 0x02DB, 0x0142, 0x00B4, 0x00B5, 0x00B6, 0x00B7, - 0x00B8, 0x0105, 0x015F, 0x00BB, 0x013D, 0x02DD, 0x013E, 0x017C, - 0x0154, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x0139, 0x0106, 0x00C7, - 0x010C, 0x00C9, 0x0118, 0x00CB, 0x011A, 0x00CD, 0x00CE, 0x010E, - 0x0110, 0x0143, 0x0147, 0x00D3, 0x00D4, 0x0150, 0x00D6, 0x00D7, - 0x0158, 0x016E, 0x00DA, 0x0170, 0x00DC, 0x00DD, 0x0162, 0x00DF, - 0x0155, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x013A, 0x0107, 0x00E7, - 0x010D, 0x00E9, 0x0119, 0x00EB, 0x011B, 0x00ED, 0x00EE, 0x010F, - 0x0111, 0x0144, 0x0148, 0x00F3, 0x00F4, 0x0151, 0x00F6, 0x00F7, - 0x0159, 0x016F, 0x00FA, 0x0171, 0x00FC, 0x00FD, 0x0163, 0x02D9} }, - { "windows-1251", "CP 1251", 2251, - { 0x0402, 0x0403, 0x201A, 0x0453, 0x201E, 0x2026, 0x2020, 0x2021, - 0x20AC, 0x2030, 0x0409, 0x2039, 0x040A, 0x040C, 0x040B, 0x040F, - 0x0452, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, - 0xFFFD, 0x2122, 0x0459, 0x203A, 0x045A, 0x045C, 0x045B, 0x045F, - 0x00A0, 0x040E, 0x045E, 0x0408, 0x00A4, 0x0490, 0x00A6, 0x00A7, - 0x0401, 0x00A9, 0x0404, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x0407, - 0x00B0, 0x00B1, 0x0406, 0x0456, 0x0491, 0x00B5, 0x00B6, 0x00B7, - 0x0451, 0x2116, 0x0454, 0x00BB, 0x0458, 0x0405, 0x0455, 0x0457, - 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, - 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F, - 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, - 0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F, - 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, - 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, 0x043F, - 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447, - 0x0448, 0x0449, 0x044A, 0x044B, 0x044C, 0x044D, 0x044E, 0x044F} }, - { "windows-1252", "CP 1252", 2252, - { 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, - 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0xFFFD, 0x017D, 0xFFFD, - 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, - 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0xFFFD, 0x017E, 0x0178, - 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, - 0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF, - 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, - 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF, - 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7, - 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF, - 0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7, - 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF, - 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, - 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF, - 0x00F0, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7, - 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x00FF} }, - { "windows-1253", "CP 1253", 2253, - { 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, - 0xFFFD, 0x2030, 0xFFFD, 0x2039, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, - 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, - 0xFFFD, 0x2122, 0xFFFD, 0x203A, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, - 0x00A0, 0x0385, 0x0386, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, - 0x00A8, 0x00A9, 0xFFFD, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x2015, - 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x0384, 0x00B5, 0x00B6, 0x00B7, - 0x0388, 0x0389, 0x038A, 0x00BB, 0x038C, 0x00BD, 0x038E, 0x038F, - 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, - 0x0398, 0x0399, 0x039A, 0x039B, 0x039C, 0x039D, 0x039E, 0x039F, - 0x03A0, 0x03A1, 0xFFFD, 0x03A3, 0x03A4, 0x03A5, 0x03A6, 0x03A7, - 0x03A8, 0x03A9, 0x03AA, 0x03AB, 0x03AC, 0x03AD, 0x03AE, 0x03AF, - 0x03B0, 0x03B1, 0x03B2, 0x03B3, 0x03B4, 0x03B5, 0x03B6, 0x03B7, - 0x03B8, 0x03B9, 0x03BA, 0x03BB, 0x03BC, 0x03BD, 0x03BE, 0x03BF, - 0x03C0, 0x03C1, 0x03C2, 0x03C3, 0x03C4, 0x03C5, 0x03C6, 0x03C7, - 0x03C8, 0x03C9, 0x03CA, 0x03CB, 0x03CC, 0x03CD, 0x03CE, 0xFFFD} }, - { "windows-1254", "CP 1254", 2254, - { 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, - 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0xFFFD, 0xFFFD, 0xFFFD, - 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, - 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0xFFFD, 0xFFFD, 0x0178, - 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, - 0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF, - 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, - 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF, - 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7, - 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF, - 0x011E, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7, - 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x0130, 0x015E, 0x00DF, - 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, - 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF, - 0x011F, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7, - 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x0131, 0x015F, 0x00FF} }, - { "windows-1255", "CP 1255", 2255, - { 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, - 0x02C6, 0x2030, 0xFFFD, 0x2039, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, - 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, - 0x02DC, 0x2122, 0xFFFD, 0x203A, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, - 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x20AA, 0x00A5, 0x00A6, 0x00A7, - 0x00A8, 0x00A9, 0x00D7, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF, - 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, - 0x00B8, 0x00B9, 0x00F7, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF, - 0x05B0, 0x05B1, 0x05B2, 0x05B3, 0x05B4, 0x05B5, 0x05B6, 0x05B7, - 0x05B8, 0x05B9, 0xFFFD, 0x05BB, 0x05BC, 0x05BD, 0x05BE, 0x05BF, - 0x05C0, 0x05C1, 0x05C2, 0x05C3, 0x05F0, 0x05F1, 0x05F2, 0x05F3, - 0x05F4, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, - 0x05D0, 0x05D1, 0x05D2, 0x05D3, 0x05D4, 0x05D5, 0x05D6, 0x05D7, - 0x05D8, 0x05D9, 0x05DA, 0x05DB, 0x05DC, 0x05DD, 0x05DE, 0x05DF, - 0x05E0, 0x05E1, 0x05E2, 0x05E3, 0x05E4, 0x05E5, 0x05E6, 0x05E7, - 0x05E8, 0x05E9, 0x05EA, 0xFFFD, 0xFFFD, 0x200E, 0x200F, 0xFFFD} }, - { "windows-1256", "CP 1256", 2256, - { 0x20AC, 0x067E, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, - 0x02C6, 0x2030, 0x0679, 0x2039, 0x0152, 0x0686, 0x0698, 0x0688, - 0x06AF, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, - 0x06A9, 0x2122, 0x0691, 0x203A, 0x0153, 0x200C, 0x200D, 0x06BA, - 0x00A0, 0x060C, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, - 0x00A8, 0x00A9, 0x06BE, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF, - 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, - 0x00B8, 0x00B9, 0x061B, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x061F, - 0x06C1, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627, - 0x0628, 0x0629, 0x062A, 0x062B, 0x062C, 0x062D, 0x062E, 0x062F, - 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x00D7, - 0x0637, 0x0638, 0x0639, 0x063A, 0x0640, 0x0641, 0x0642, 0x0643, - 0x00E0, 0x0644, 0x00E2, 0x0645, 0x0646, 0x0647, 0x0648, 0x00E7, - 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x0649, 0x064A, 0x00EE, 0x00EF, - 0x064B, 0x064C, 0x064D, 0x064E, 0x00F4, 0x064F, 0x0650, 0x00F7, - 0x0651, 0x00F9, 0x0652, 0x00FB, 0x00FC, 0x200E, 0x200F, 0x06D2} }, - { "windows-1257", "CP 1257", 2257, - { 0x20AC, 0xFFFD, 0x201A, 0xFFFD, 0x201E, 0x2026, 0x2020, 0x2021, - 0xFFFD, 0x2030, 0xFFFD, 0x2039, 0xFFFD, 0x00A8, 0x02C7, 0x00B8, - 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, - 0xFFFD, 0x2122, 0xFFFD, 0x203A, 0xFFFD, 0x00AF, 0x02DB, 0xFFFD, - 0x00A0, 0xFFFD, 0x00A2, 0x00A3, 0x00A4, 0xFFFD, 0x00A6, 0x00A7, - 0x00D8, 0x00A9, 0x0156, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00C6, - 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, - 0x00F8, 0x00B9, 0x0157, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00E6, - 0x0104, 0x012E, 0x0100, 0x0106, 0x00C4, 0x00C5, 0x0118, 0x0112, - 0x010C, 0x00C9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012A, 0x013B, - 0x0160, 0x0143, 0x0145, 0x00D3, 0x014C, 0x00D5, 0x00D6, 0x00D7, - 0x0172, 0x0141, 0x015A, 0x016A, 0x00DC, 0x017B, 0x017D, 0x00DF, - 0x0105, 0x012F, 0x0101, 0x0107, 0x00E4, 0x00E5, 0x0119, 0x0113, - 0x010D, 0x00E9, 0x017A, 0x0117, 0x0123, 0x0137, 0x012B, 0x013C, - 0x0161, 0x0144, 0x0146, 0x00F3, 0x014D, 0x00F5, 0x00F6, 0x00F7, - 0x0173, 0x0142, 0x015B, 0x016B, 0x00FC, 0x017C, 0x017E, 0x02D9} }, - { "windows-1258", "CP 1258", 2258, - { 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, - 0x02C6, 0x2030, 0xFFFD, 0x2039, 0x0152, 0xFFFD, 0xFFFD, 0xFFFD, - 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, - 0x02DC, 0x2122, 0xFFFD, 0x203A, 0x0153, 0xFFFD, 0xFFFD, 0x0178, - 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, - 0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF, - 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, - 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF, - 0x00C0, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x00C5, 0x00C6, 0x00C7, - 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x0300, 0x00CD, 0x00CE, 0x00CF, - 0x0110, 0x00D1, 0x0309, 0x00D3, 0x00D4, 0x01A0, 0x00D6, 0x00D7, - 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x01AF, 0x0303, 0x00DF, - 0x00E0, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x00E5, 0x00E6, 0x00E7, - 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x0301, 0x00ED, 0x00EE, 0x00EF, - 0x0111, 0x00F1, 0x0323, 0x00F3, 0x00F4, 0x01A1, 0x00F6, 0x00F7, - 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x01B0, 0x20AB, 0x00FF} }, - - { "Apple Roman", "Apple Roman", 0, - { 0x00C4, 0x00C5, 0x00C7, 0x00C9, 0x00D1, 0x00D6, 0x00DC, 0x00E1, - 0x00E0, 0x00E2, 0x00E4, 0x00E3, 0x00E5, 0x00E7, 0x00E9, 0x00E8, - 0x00EA, 0x00EB, 0x00ED, 0x00EC, 0x00EE, 0x00EF, 0x00F1, 0x00F3, - 0x00F2, 0x00F4, 0x00F6, 0x00F5, 0x00FA, 0x00F9, 0x00FB, 0x00FC, - 0x2020, 0x00B0, 0x00A2, 0x00A3, 0x00A7, 0x2022, 0x00B6, 0x00DF, - 0x00AE, 0x00A9, 0x2122, 0x00B4, 0x00A8, 0x2260, 0x00C6, 0x00D8, - 0x221E, 0x00B1, 0x2264, 0x2265, 0x00A5, 0x00B5, 0x2202, 0x2211, - 0x220F, 0x03C0, 0x222B, 0x00AA, 0x00BA, 0x03A9, 0x00E6, 0x00F8, - 0x00BF, 0x00A1, 0x00AC, 0x221A, 0x0192, 0x2248, 0x2206, 0x00AB, - 0x00BB, 0x2026, 0x00A0, 0x00C0, 0x00C3, 0x00D5, 0x0152, 0x0153, - 0x2013, 0x2014, 0x201C, 0x201D, 0x2018, 0x2019, 0x00F7, 0x25CA, - 0x00FF, 0x0178, 0x2044, 0x20AC, 0x2039, 0x203A, 0xFB01, 0xFB02, - 0x2021, 0x00B7, 0x201A, 0x201E, 0x2030, 0x00C2, 0x00CA, 0x00C1, - 0x00CB, 0x00C8, 0x00CD, 0x00CE, 0x00CF, 0x00CC, 0x00D3, 0x00D4, - 0xF8FF, 0x00D2, 0x00DA, 0x00DB, 0x00D9, 0x0131, 0x02C6, 0x02DC, - 0x00AF, 0x02D8, 0x02D9, 0x02DA, 0x00B8, 0x02DD, 0x02DB, 0x02C7} }, - - - - // This one is based on the charmap file - // /usr/share/i18n/charmaps/SAMI-WS2.gz, which is manually adapted - // to this format by Børre Gaup <boerre@subdimension.com> - { "WINSAMI2", "WS2", 0, - { 0x20AC, 0xFFFD, 0x010C, 0x0192, 0x010D, 0x01B7, 0x0292, 0x01EE, - 0x01EF, 0x0110, 0x0160, 0x2039, 0x0152, 0xFFFD, 0xFFFD, 0xFFFD, - 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, - 0x0111, 0x01E6, 0x0161, 0x203A, 0x0153, 0xFFFD, 0xFFFD, 0x0178, - 0x00A0, 0x01E7, 0x01E4, 0x00A3, 0x00A4, 0x01E5, 0x00A6, 0x00A7, - 0x00A8, 0x00A9, 0x021E, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x021F, - 0x00B0, 0x00B1, 0x01E8, 0x01E9, 0x00B4, 0x00B5, 0x00B6, 0x00B7, - 0x014A, 0x014B, 0x0166, 0x00BB, 0x0167, 0x00BD, 0x017D, 0x017E, - 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7, - 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF, - 0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7, - 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF, - 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, - 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF, - 0x00F0, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7, - 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x00FF} }, - - - // this one is generated from the charmap file located in /usr/share/i18n/charmaps - // on most Linux distributions. The thai character set tis620 is byte by byte equivalent - // to iso8859-11, so we name it 8859-11 here, but recognise the name tis620 too. - - // $ for A in 8 9 A B C D E F ; do for B in 0 1 2 3 4 5 6 7 8 9 A B C D E F ; do echo x${A}${B} 0xFFFD ; done ; done > /tmp/digits ; ( cut -c25- < TIS-620 ; cat /tmp/digits ) | awk '/^x[89ABCDEF]/{ print $1, $2 }' | sed -e 's/<U/0x/' -e 's/>//' | sort | uniq -w4 | cut -c5- | paste '-d ' - - - - - - - - | sed -e 's/ /, /g' -e 's/$/,/' -e '$ s/,$/} },/' -e '1 s/^/{ /' > ~/tmp/tis-620 - { "TIS-620", "ISO 8859-11", 2259, // Thai character set mib enum taken from tis620 (which is byte by byte equivalent) - { 0x20AC, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x2026, 0xFFFD, 0xFFFD, - 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, - 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, - 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, - 0xFFFD, 0x0E01, 0x0E02, 0x0E03, 0x0E04, 0x0E05, 0x0E06, 0x0E07, - 0x0E08, 0x0E09, 0x0E0A, 0x0E0B, 0x0E0C, 0x0E0D, 0x0E0E, 0x0E0F, - 0x0E10, 0x0E11, 0x0E12, 0x0E13, 0x0E14, 0x0E15, 0x0E16, 0x0E17, - 0x0E18, 0x0E19, 0x0E1A, 0x0E1B, 0x0E1C, 0x0E1D, 0x0E1E, 0x0E1F, - 0x0E20, 0x0E21, 0x0E22, 0x0E23, 0x0E24, 0x0E25, 0x0E26, 0x0E27, - 0x0E28, 0x0E29, 0x0E2A, 0x0E2B, 0x0E2C, 0x0E2D, 0x0E2E, 0x0E2F, - 0x0E30, 0x0E31, 0x0E32, 0x0E33, 0x0E34, 0x0E35, 0x0E36, 0x0E37, - 0x0E38, 0x0E39, 0x0E3A, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x0E3F, - 0x0E40, 0x0E41, 0x0E42, 0x0E43, 0x0E44, 0x0E45, 0x0E46, 0x0E47, - 0x0E48, 0x0E49, 0x0E4A, 0x0E4B, 0x0E4C, 0x0E4D, 0x0E4E, 0x0E4F, - 0x0E50, 0x0E51, 0x0E52, 0x0E53, 0x0E54, 0x0E55, 0x0E56, 0x0E57, - 0x0E58, 0x0E59, 0x0E5A, 0x0E5B, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD } }, - - /* - Name: hp-roman8 [HP-PCL5,RFC1345,KXS2] - MIBenum: 2004 - Source: LaserJet IIP Printer User's Manual, - HP part no 33471-90901, Hewlet-Packard, June 1989. - Alias: roman8 - Alias: r8 - Alias: csHPRoman8 - */ - { "Roman8", "HP-Roman8", 2004, - { 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, - 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, - 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, - 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, - 0x00A0, 0x00C0, 0x00C2, 0x00C8, 0x00CA, 0x00CB, 0x00CE, 0x00CF, - 0x00B4, 0x02CB, 0x02C6, 0x00A8, 0x02DC, 0x00D9, 0x00DB, 0x20A4, - 0x00AF, 0x00DD, 0x00FD, 0x00B0, 0x00C7, 0x00E7, 0x00D1, 0x00F1, - 0x00A1, 0x00BF, 0x00A4, 0x00A3, 0x00A5, 0x00A7, 0x0192, 0x00A2, - 0x00E2, 0x00EA, 0x00F4, 0x00FB, 0x00E1, 0x00E9, 0x00F3, 0x00FA, - 0x00E0, 0x00E8, 0x00F2, 0x00F9, 0x00E4, 0x00EB, 0x00F6, 0x00FC, - 0x00C5, 0x00EE, 0x00D8, 0x00C6, 0x00E5, 0x00ED, 0x00F8, 0x00E6, - 0x00C4, 0x00EC, 0x00D6, 0x00DC, 0x00C9, 0x00EF, 0x00DF, 0x00D4, - 0x00C1, 0x00C3, 0x00E3, 0x00D0, 0x00F0, 0x00CD, 0x00CC, 0x00D3, - 0x00D2, 0x00D5, 0x00F5, 0x0160, 0x0161, 0x00DA, 0x0178, 0x00FF, - 0x00DE, 0x00FE, 0x00B7, 0x00B5, 0x00B6, 0x00BE, 0x2014, 0x00BC, - 0x00BD, 0x00AA, 0x00BA, 0x00AB, 0x25A0, 0x00BB, 0x00B1, 0xFFFD } } - - // if you add more chacater sets at the end, change LAST_MIB above -}; - -TQSimpleTextCodec::TQSimpleTextCodec( int i ) - : TQTextCodec(), forwardIndex( i ) -{ -#ifndef TQ_WS_TQWS - reverseMap = 0; -#endif -} - - -TQSimpleTextCodec::~TQSimpleTextCodec() -{ -#ifndef TQ_WS_TQWS - delete reverseMap; -#else - if ( reverseOwner == this ) { - delete reverseMap; - reverseMap = 0; - reverseOwner = 0; - } -#endif -} - -void TQSimpleTextCodec::buildReverseMap() -{ -#ifdef TQ_WS_TQWS - if ( reverseOwner != this ) { - int m = 0; - int i = 0; - while( i < 128 ) { - if ( tqunicodevalues[forwardIndex].values[i] > m && - tqunicodevalues[forwardIndex].values[i] < 0xfffd ) - m = tqunicodevalues[forwardIndex].values[i]; - i++; - } - m++; - if ( !reverseMap ) - reverseMap = new TQMemArray<unsigned char>( m ); - if ( m > (int)(reverseMap->size()) ) - reverseMap->resize( m ); - for( i = 0; i < 128 && i < m; i++ ) - (*reverseMap)[i] = (char)i; - for( ;i < m; i++ ) - (*reverseMap)[i] = 0; - for( i=128; i<256; i++ ) { - int u = tqunicodevalues[forwardIndex].values[i-128]; - if ( u < m ) - (*reverseMap)[u] = (char)(unsigned char)(i); - } - reverseOwner = this; - } -#else - if ( !reverseMap ) { - TQMemArray<unsigned char> **map = &((TQSimpleTextCodec *)this)->reverseMap; - int m = 0; - int i = 0; - while( i < 128 ) { - if ( tqunicodevalues[forwardIndex].values[i] > m && - tqunicodevalues[forwardIndex].values[i] < 0xfffd ) - m = tqunicodevalues[forwardIndex].values[i]; - i++; - } - m++; - *map = new TQMemArray<unsigned char>( m ); - for( i = 0; i < 128 && i < m; i++ ) - (**map)[i] = (char)i; - for( ;i < m; i++ ) - (**map)[i] = 0; - for( i=128; i<256; i++ ) { - int u = tqunicodevalues[forwardIndex].values[i-128]; - if ( u < m ) - (**map)[u] = (char)(unsigned char)(i); - } - } -#endif -} - -TQString TQSimpleTextCodec::toUnicode(const char* chars, int len) const -{ - if ( len <= 0 || chars == 0 ) - return TQString::null; - - const unsigned char * c = (const unsigned char *)chars; - int i; - - for ( i = 0; i < len; i++ ) - if ( c[i] == '\0' ) { - len = i; - break; - } - - TQString r; - r.setUnicode(0, len); - TQChar* uc = (TQChar*)r.tqunicode(); // const_cast - - for ( i = 0; i < len; i++ ) { - if ( c[i] > 127 ) - uc[i] = tqunicodevalues[forwardIndex].values[c[i]-128]; - else - uc[i] = c[i]; - } - return r; -} - - -TQCString TQSimpleTextCodec::fromUnicode(const TQString& uc, int& len ) const -{ -#ifdef TQ_WS_TQWS - if ( this != reverseOwner ) -#else - if ( !reverseMap ) -#endif - ((TQSimpleTextCodec *)this)->buildReverseMap(); - - if ( len <0 || len > (int)uc.length() ) - len = uc.length(); - TQCString r( len+1 ); - int i = len; - int u; - const TQChar* ucp = uc.tqunicode(); - unsigned char* rp = (unsigned char *)r.data(); - unsigned char* rmp = reverseMap->data(); - int rmsize = (int) reverseMap->size(); - while( i-- ) - { - u = ucp->tqunicode(); - *rp = u < 128 ? u : (( u < rmsize ) ? (*(rmp+u)) : '?' ); - if ( *rp == 0 ) *rp = '?'; - rp++; - ucp++; - } - r[len] = 0; - return r; -} - -void TQSimpleTextCodec::fromUnicode( const TQChar *in, unsigned short *out, int length ) const -{ -#ifdef TQ_WS_TQWS - if ( this != reverseOwner ) -#else - if ( !reverseMap ) -#endif - ((TQSimpleTextCodec *)this)->buildReverseMap(); - - unsigned char* rmp = reverseMap->data(); - int rmsize = (int) reverseMap->size(); - while ( length-- ) { - unsigned short u = in->tqunicode(); - *out = u < 128 ? u : (( u < rmsize ) ? (*(rmp+u)) : 0 ); - ++in; - ++out; - } -} - -unsigned short TQSimpleTextCodec::characterFromUnicode(const TQString &str, int pos) const -{ -#ifdef TQ_WS_TQWS - if ( this != reverseOwner ) -#else - if ( !reverseMap ) -#endif - ((TQSimpleTextCodec *)this)->buildReverseMap(); - - unsigned short u = str[pos].tqunicode(); - unsigned char* rmp = reverseMap->data(); - int rmsize = (int) reverseMap->size(); - return u < 128 ? u : (( u < rmsize ) ? (*(rmp+u)) : 0 ); -} - -bool TQSimpleTextCodec::canEncode( TQChar ch ) const -{ -#ifdef TQ_WS_TQWS - if ( this != reverseOwner ) -#else - if ( !reverseMap ) -#endif - ((TQSimpleTextCodec *)this)->buildReverseMap(); - - unsigned short u = ch.tqunicode(); - unsigned char* rmp = reverseMap->data(); - int rmsize = (int) reverseMap->size(); - return u < 128 ? TRUE : (( u < rmsize ) ? (*(rmp+u) != 0) : FALSE ); -} - -const char* TQSimpleTextCodec::name() const -{ - return tqunicodevalues[forwardIndex].cs; -} - -const char* TQSimpleTextCodec::mimeName() const -{ - return tqunicodevalues[forwardIndex].mime; -} - - -int TQSimpleTextCodec::mibEnum() const -{ - return tqunicodevalues[forwardIndex].mib; -} - -int TQSimpleTextCodec::heuristicNameMatch(const char* hint) const -{ - if ( qstricmp( hint, mimeName() ) == 0 ) - return 10000; // return a large value - if ( hint[0]=='k' ) { - TQCString lhint = TQCString(hint).lower(); - // Help people with messy fonts - if ( lhint == "koi8-1" ) - return TQTextCodec::heuristicNameMatch("koi8-r")-1; - if ( lhint == "koi8-ru" ) - return TQTextCodec::heuristicNameMatch("koi8-r")-1; - } else if ( hint[0] == 't' && mibEnum() == 2259 /* iso8859-11 */ ) { - // 8859-11 and tis620 are byte by byte equivalent - int i = simpleHeuristicNameMatch("tis620-0", hint); - if( !i ) - i = simpleHeuristicNameMatch("tis-620", hint); - if( i ) return i; - } else if ( mibEnum() == 82 /* ISO 8859-6 */ ) { - int i = simpleHeuristicNameMatch("ISO 8859-6-I", hint); - if ( i ) - return i; - } - return TQTextCodec::heuristicNameMatch(hint); -} - -int TQSimpleTextCodec::heuristicContentMatch(const char* chars, int len) const -{ - if ( len<1 || !chars ) - return -1; - int i = 0; - const uchar * c = (const unsigned char *)chars; - int r = 0; - while( i<len && c && *c ) { - if ( *c >= 128 ) { - if ( tqunicodevalues[forwardIndex].values[(*c)-128] == 0xfffd ) - return -1; - } - if ( (*c >= ' ' && *c < 127) || - *c == '\n' || *c == '\t' || *c == '\r' ) - r++; - i++; - c++; - } - if ( mibEnum()==4 ) - r+=1; - return r; -} - -#endif - -class TQLatin1Codec : public TQTextCodec -{ -public: -#if !defined(TQ_NO_USING_KEYWORD) - using TQTextCodec::fromUnicode; - using TQTextCodec::toUnicode; -#endif - TQString toUnicode(const char* chars, int len) const; - TQCString fromUnicode(const TQString& uc, int& lenInOut ) const; - void fromUnicode( const TQChar *in, unsigned short *out, int length ) const; - unsigned short characterFromUnicode(const TQString &str, int pos) const; - - const char* name() const; - const char* mimeName() const; - int mibEnum() const; - - int heuristicContentMatch(const char* chars, int len) const; - -private: - int forwardIndex; -}; - - -TQString TQLatin1Codec::toUnicode(const char* chars, int len) const -{ - if ( chars == 0 ) - return TQString::null; - - return TQString::tqfromLatin1(chars, len); -} - - -TQCString TQLatin1Codec::fromUnicode(const TQString& uc, int& len ) const -{ - if ( len <0 || len > (int)uc.length() ) - len = uc.length(); - TQCString r( len+1 ); - char *d = r.data(); - int i = 0; - const TQChar *ch = uc.tqunicode(); - while ( i < len ) { - d[i] = ch->row() ? '?' : ch->cell(); - i++; - ch++; - } - r[len] = 0; - return r; -} - -void TQLatin1Codec::fromUnicode( const TQChar *in, unsigned short *out, int length ) const -{ - while ( length-- ) { - *out = in->row() ? 0 : in->cell(); - ++in; - ++out; - } -} - -unsigned short TQLatin1Codec::characterFromUnicode(const TQString &str, int pos) const -{ - const TQChar *ch = str.tqunicode() + pos; - if (ch->row()) - return 0; - return (unsigned short) ch->cell(); -} - - -const char* TQLatin1Codec::name() const -{ - return "ISO 8859-1"; -} - -const char* TQLatin1Codec::mimeName() const -{ - return "ISO-8859-1"; -} - - -int TQLatin1Codec::mibEnum() const -{ - return 4; -} - -int TQLatin1Codec::heuristicContentMatch(const char* chars, int len) const -{ - if ( len<1 || !chars ) - return -1; - int i = 0; - const uchar * c = (const unsigned char *)chars; - int r = 0; - while( i<len && c && *c ) { - if ( *c >= 0x80 && *c < 0xa0 ) - return -1; - if ( (*c >= ' ' && *c < 127) || - *c == '\n' || *c == '\t' || *c == '\r' ) - r++; - i++; - c++; - } - if ( this == (const TQTextCodec *)codecForLocale() ) - r += 5; - return r; -} - -class TQLatin15Codec: public TQLatin1Codec -{ -public: - TQString toUnicode(const char* chars, int len) const; -#if !defined(TQ_NO_USING_KEYWORD) - using TQLatin1Codec::fromUnicode; -#endif - TQCString fromUnicode(const TQString& uc, int& lenInOut ) const; - void fromUnicode( const TQChar *in, unsigned short *out, int length ) const; - unsigned short characterFromUnicode(const TQString &str, int pos) const; - - const char* name() const; - const char* mimeName() const; - int mibEnum() const; - -private: - int forwardIndex; -}; - - -TQString TQLatin15Codec::toUnicode(const char* chars, int len) const -{ - if ( chars == 0 ) - return TQString::null; - - TQString str = TQString::tqfromLatin1(chars, len); - TQChar *uc = (TQChar *)str.tqunicode(); - while( len-- ) { - switch( uc->tqunicode() ) { - case 0xa4: - *uc = 0x20ac; - break; - case 0xa6: - *uc = 0x0160; - break; - case 0xa8: - *uc = 0x0161; - break; - case 0xb4: - *uc = 0x017d; - break; - case 0xb8: - *uc = 0x017e; - break; - case 0xbc: - *uc = 0x0152; - break; - case 0xbd: - *uc = 0x0153; - break; - case 0xbe: - *uc = 0x0178; - break; - default: - break; - } - uc++; - } - return str; -} - -static inline unsigned char -latin15CharFromUnicode( unsigned short uc, bool replacement = TRUE ) -{ - uchar c; - if ( uc < 0x0100 ) { - if ( uc > 0xa3 && uc < 0xbf ) { - switch( uc ) { - case 0xa4: - case 0xa6: - case 0xa8: - case 0xb4: - case 0xb8: - case 0xbc: - case 0xbd: - case 0xbe: - c = replacement ? '?' : 0; - break; - default: - c = (unsigned char) uc; - break; - } - } else { - c = (unsigned char) uc; - } - } else { - if ( uc == 0x20ac ) - c = 0xa4; - else if ( (uc & 0xff00) == 0x0100 ) { - switch( uc ) { - case 0x0160: - c = 0xa6; - break; - case 0x0161: - c = 0xa8; - break; - case 0x017d: - c = 0xb4; - break; - case 0x017e: - c = 0xb8; - break; - case 0x0152: - c = 0xbc; - break; - case 0x0153: - c = 0xbd; - break; - case 0x0178: - c = 0xbe; - break; - default: - c = replacement ? '?' : 0; - } - } else { - c = replacement ? '?' : 0; - } - } - return c; -} - - -void TQLatin15Codec::fromUnicode( const TQChar *in, unsigned short *out, int length ) const -{ - while ( length-- ) { - *out = latin15CharFromUnicode( in->tqunicode(), FALSE ); - ++in; - ++out; - } -} - - -TQCString TQLatin15Codec::fromUnicode(const TQString& uc, int& len ) const -{ - if ( len <0 || len > (int)uc.length() ) - len = uc.length(); - TQCString r( len+1 ); - char *d = r.data(); - int i = 0; - const TQChar *ch = uc.tqunicode(); - while ( i < len ) { - d[i] = latin15CharFromUnicode( ch->tqunicode() ); - i++; - ch++; - } - r[len] = 0; - return r; -} - -unsigned short TQLatin15Codec::characterFromUnicode(const TQString &str, int pos) const -{ - return latin15CharFromUnicode( str.tqunicode()[pos].tqunicode(), FALSE ); -} - - -const char* TQLatin15Codec::name() const -{ - return "ISO 8859-15"; -} - -const char* TQLatin15Codec::mimeName() const -{ - return "ISO-8859-15"; -} - - -int TQLatin15Codec::mibEnum() const -{ - return 111; -} - -static TQTextCodec *checkForCodec(const char *name) { - TQTextCodec *c = TQTextCodec::codecForName(name); - if (!c) { - const char *at = strchr(name, '@'); - if (at) { - TQCString n(name, at - name + 1); - c = TQTextCodec::codecForName(n.data()); - } - } - return c; -} - -/* the next two functions are implicitely thread safe, - as they are only called by setup() which uses a mutex. -*/ -static void setupLocaleMapper() -{ -#ifdef TQ_OS_WIN32 - localeMapper = TQTextCodec::codecForName( "System" ); -#else - -#if defined (_XOPEN_UNIX) && !defined(TQ_OS_TQNX6) && !defined(TQ_OS_OSF) && !defined(TQ_OS_MAC) - char *charset = nl_langinfo (CODESET); - if ( charset ) - localeMapper = TQTextCodec::codecForName( charset ); -#endif - - if ( !localeMapper ) { - // Very poorly defined and followed standards causes lots of code - // to try to get all the cases... - - // Try to determine locale codeset from locale name assigned to - // LC_CTYPE category. - - // First part is getting that locale name. First try setlocale() which - // definitely knows it, but since we cannot fully trust it, get ready - // to fall back to environment variables. - char * ctype = qstrdup( setlocale( LC_CTYPE, 0 ) ); - - // Get the first nonempty value from $LC_ALL, $LC_CTYPE, and $LANG - // environment variables. - char * lang = qstrdup( getenv("LC_ALL") ); - if ( !lang || lang[0] == 0 || strcmp( lang, "C" ) == 0 ) { - if ( lang ) delete [] lang; - lang = qstrdup( getenv("LC_CTYPE") ); - } - if ( !lang || lang[0] == 0 || strcmp( lang, "C" ) == 0 ) { - if ( lang ) delete [] lang; - lang = qstrdup( getenv("LANG") ); - } - - // Now try these in order: - // 1. CODESET from ctype if it contains a .CODESET part (e.g. en_US.ISO8859-15) - // 2. CODESET from lang if it contains a .CODESET part - // 3. ctype (maybe the locale is named "ISO-8859-1" or something) - // 4. locale (ditto) - // 5. check for "@euro" - // 6. guess locale from ctype unless ctype is "C" - // 7. guess locale from lang - - // 1. CODESET from ctype if it contains a .CODESET part (e.g. en_US.ISO8859-15) - char * codeset = ctype ? strchr( ctype, '.' ) : 0; - if ( codeset && *codeset == '.' ) - localeMapper = checkForCodec( codeset + 1 ); - - // 2. CODESET from lang if it contains a .CODESET part - codeset = lang ? strchr( lang, '.' ) : 0; - if ( !localeMapper && codeset && *codeset == '.' ) - localeMapper = checkForCodec( codeset + 1 ); - - // 3. ctype (maybe the locale is named "ISO-8859-1" or something) - if ( !localeMapper && ctype && *ctype != 0 && strcmp (ctype, "C") != 0 ) - localeMapper = checkForCodec( ctype ); - - // 4. locale (ditto) - if ( !localeMapper && lang && *lang != 0 ) - localeMapper = checkForCodec( lang ); - - // 5. "@euro" - if ( !localeMapper && ctype && strstr( ctype, "@euro" ) || lang && strstr( lang, "@euro" ) ) - localeMapper = TQTextCodec::codecForName( "ISO 8859-15" ); - - // 6. guess locale from ctype unless ctype is "C" - // 7. guess locale from lang - char * try_by_name = ctype; - if ( ctype && *ctype != 0 && strcmp (ctype, "C") != 0 ) - try_by_name = lang; - - // Now do the guessing. - if ( lang && *lang && !localeMapper && try_by_name && *try_by_name ) { - if ( try_locale_list( iso8859_15locales, lang ) ) - localeMapper = TQTextCodec::codecForName( "ISO 8859-15" ); - else if ( try_locale_list( iso8859_2locales, lang ) ) - localeMapper = TQTextCodec::codecForName( "ISO 8859-2" ); - else if ( try_locale_list( iso8859_3locales, lang ) ) - localeMapper = TQTextCodec::codecForName( "ISO 8859-3" ); - else if ( try_locale_list( iso8859_4locales, lang ) ) - localeMapper = TQTextCodec::codecForName( "ISO 8859-4" ); - else if ( try_locale_list( iso8859_5locales, lang ) ) - localeMapper = TQTextCodec::codecForName( "ISO 8859-5" ); - else if ( try_locale_list( iso8859_6locales, lang ) ) - localeMapper = TQTextCodec::codecForName( "ISO 8859-6" ); - else if ( try_locale_list( iso8859_7locales, lang ) ) - localeMapper = TQTextCodec::codecForName( "ISO 8859-7" ); - else if ( try_locale_list( iso8859_8locales, lang ) ) - localeMapper = TQTextCodec::codecForName( "ISO 8859-8-I" ); - else if ( try_locale_list( iso8859_9locales, lang ) ) - localeMapper = TQTextCodec::codecForName( "ISO 8859-9" ); - else if ( try_locale_list( iso8859_13locales, lang ) ) - localeMapper = TQTextCodec::codecForName( "ISO 8859-13" ); - else if ( try_locale_list( tis_620locales, lang ) ) - localeMapper = TQTextCodec::codecForName( "ISO 8859-11" ); - else if ( try_locale_list( koi8_ulocales, lang ) ) - localeMapper = TQTextCodec::codecForName( "KOI8-U" ); - else if ( try_locale_list( cp_1251locales, lang ) ) - localeMapper = TQTextCodec::codecForName( "CP 1251" ); - else if ( try_locale_list( pt_154locales, lang ) ) - localeMapper = TQTextCodec::codecForName( "PT 154" ); - else if ( try_locale_list( probably_koi8_rlocales, lang ) ) - localeMapper = ru_RU_hack( lang ); - } - - delete [] ctype; - delete [] lang; - } - if ( localeMapper && localeMapper->mibEnum() == 11 ) - localeMapper = TQTextCodec::codecForName( "ISO 8859-8-I" ); - - // If everything failed, we default to 8859-1 - // We could perhaps default to 8859-15. - if ( !localeMapper ) - localeMapper = TQTextCodec::codecForName( "ISO 8859-1" ); -#endif -} - - -static void realSetup() -{ -#if defined(TQT_CHECK_STATE) - if ( destroying_is_ok ) - qWarning( "TQTextCodec: creating new codec during codec cleanup!" ); -#endif - all = new TQValueList<TQTextCodec*>; - - (void)new TQLatin1Codec; - (void)new TQLatin15Codec; - (void)new TQUtf8Codec; - (void)new TQUtf16Codec; - -#ifndef TQT_NO_CODECS - int i = 0; - do { - (void)new TQSimpleTextCodec( i ); - } while( tqunicodevalues[i++].mib != LAST_MIB ); - - (void)new TQTsciiCodec; - - for (i = 0; i < 9; ++i) { - (void)new TQIsciiCodec(i); - } -#endif // TQT_NO_CODECS -#ifndef TQT_NO_CODEC_HEBREW - (void)new TQHebrewCodec; -#endif -#ifndef TQT_NO_BIG_CODECS - (void)new TQBig5Codec; - (void)new TQBig5hkscsCodec; - (void)new TQEucJpCodec; - (void)new TQEucKrCodec; - (void)new TQGb2312Codec; - (void)new TQGbkCodec; - (void)new TQGb18030Codec; - (void)new TQJisCodec; - (void)new TQSjisCodec; -#endif // TQT_NO_BIG_CODECS - -#ifdef TQ_OS_WIN32 - (void) new TQWindowsLocalCodec; -#endif // TQ_OS_WIN32 - - if ( !localeMapper ) - setupLocaleMapper(); -} - -void TQTextCodec::fromUnicodeInternal( const TQChar *in, unsigned short *out, int length ) -{ - switch( mibEnum() ) { -#ifndef TQT_NO_CODECS - case 2084: - case 2088: - case 5: - case 6: - case 7: - case 8: - case 82: - case 10: - case 85: - case 12: - case 13: - case 109: - case 110: - case 2004: - case 2009: - case 2086: - case 2250: - case 2251: - case 2252: - case 2253: - case 2254: - case 2255: - case 2256: - case 2257: - case 2258: - case 2259: - ((TQSimpleTextCodec *)this)->fromUnicode( in, out, length ); - break; - -#if !defined(TQT_NO_BIG_CODECS) && defined(TQ_WS_X11) - // the TQFont*Codecs are only used on X11 - - case 15: - ((TQFontJis0201Codec *) this)->fromUnicode( in, out, length ); - break; - - case 63: - ((TQFontJis0208Codec *) this)->fromUnicode( in, out, length ); - break; - - case 36: - ((TQFontKsc5601Codec *) this)->fromUnicode( in, out, length ); - break; - - case 57: - ((TQFontGb2312Codec *) this)->fromUnicode( in, out, length ); - break; - - case -113: - ((TQFontGbkCodec *) this)->fromUnicode( in, out, length ); - break; - - case -114: - ((TQFontGb18030_0Codec *) this)->fromUnicode( in, out, length ); - break; - - case -2026: - ((TQFontBig5Codec *) this)->fromUnicode( in, out, length ); - break; - - case -2101: - ((TQFontBig5hkscsCodec *) this)->fromUnicode( in, out, length ); - break; - - case -4242: - ((TQFontLaoCodec *) this)->fromUnicode( in, out, length ); - break; -#endif -#endif // TQT_NO_CODECS - - case 4: - ((TQLatin1Codec *) this)->fromUnicode( in, out, length ); - break; - - case 111: - ((TQLatin15Codec *) this)->fromUnicode( in, out, length ); - break; - - default: - { - TQConstString string( in, length ); - TQString str = string.string(); - for ( int i = 0; i < length; i++ ) - out[i] = characterFromUnicode( str, i ); - } - } -} - - -/*! - \fn TQTextCodec* TQTextCodec::codecForTr() - - Returns the codec used by TQObject::tr() on its argument. If this - function returns 0 (the default), tr() assumes Latin-1. - - \sa setCodecForTr() -*/ - -/*! - \fn void TQTextCodec::setCodecForTr(TQTextCodec *c) - \nonreentrant - - Sets the codec used by TQObject::tr() on its argument to \a c. If - \a c is 0 (the default), tr() assumes Latin-1. - - If the literal quoted text in the program is not in the Latin-1 - encoding, this function can be used to set the appropriate - encoding. For example, software developed by Korean programmers - might use eucKR for all the text in the program, in which case the - main() function might look like this: - - \code - int main(int argc, char** argv) - { - TQApplication app(argc, argv); - ... install any additional codecs ... - TQTextCodec::setCodecForTr( TQTextCodec::codecForName("eucKR") ); - ... - } - \endcode - - Note that this is not the way to select the encoding that the \e - user has chosen. For example, to convert an application containing - literal English strings to Korean, all that is needed is for the - English strings to be passed through tr() and for translation - files to be loaded. For details of internationalization, see the - \link i18n.html TQt internationalization documentation\endlink. - - \sa codecForTr(), setCodecForTr(), setCodecForCStrings() -*/ - - -/*! - \fn TQTextCodec* TQTextCodec::codecForCStrings() - - Returns the codec used by TQString to convert to and from const - char* and TQCStrings. If this function returns 0 (the default), - TQString assumes Latin-1. - - \sa setCodecForCStrings() -*/ - -/*! - \fn void TQTextCodec::setCodecForCStrings(TQTextCodec *c) - \nonreentrant - - Sets the codec used by TQString to convert to and from const char* - and TQCStrings. If \a c is 0 (the default), TQString assumes Latin-1. - - \warning Some codecs do not preserve the characters in the ascii - range (0x00 to 0x7f). For example, the Japanese Shift-JIS - encoding maps the backslash character (0x5a) to the Yen character. - This leads to unexpected results when using the backslash - character to escape characters in strings used in e.g. regular - expressions. Use TQString::tqfromLatin1() to preserve characters in - the ascii range when needed. - - \sa codecForCStrings(), setCodecForTr(), setCodecForCStrings() -*/ - - -TQTextCodec *TQTextCodec::cftr = 0; -TQTextCodec *TQTextCodec::cfcs = 0; - - -#endif // TQT_NO_TEXTCODEC |