/**************************************************************************** ** ** Implementation of TQTextCodec class ** ** Created : 981015 ** ** Copyright (C) 1998-2008 Trolltech ASA. All rights reserved. ** ** This file is part of the tools module of the TQt GUI Toolkit. ** ** This file may be used under the terms of the GNU General ** Public License versions 2.0 or 3.0 as published by the Free ** Software Foundation and appearing in the files LICENSE.GPL2 ** and LICENSE.GPL3 included in the packaging of this file. ** Alternatively you may (at your option) use any later version ** of the GNU General Public License if such license has been ** publicly approved by Trolltech ASA (or its successors, if any) ** and the KDE Free TQt Foundation. ** ** Please review the following information to ensure GNU General ** Public Licensing requirements will be met: ** http://trolltech.com/products/qt/licenses/licensing/opensource/. ** If you are unsure which license is appropriate for your use, please ** review the following information: ** http://trolltech.com/products/qt/licenses/licensing/licensingoverview ** or contact the sales department at sales@trolltech.com. ** ** This file may be used under the terms of the Q Public License as ** defined by Trolltech ASA and appearing in the file LICENSE.TQPL ** included in the packaging of this file. Licensees holding valid TQt ** Commercial licenses may use this file in accordance with the TQt ** Commercial License Agreement provided with the Software. ** ** This file is provided "AS IS" with NO WARRANTY OF ANY KIND, ** INCLUDING THE WARRANTIES OF DESIGN, MERCHANTABILITY AND FITNESS FOR ** A PARTICULAR PURPOSE. Trolltech reserves all rights not granted ** herein. ** **********************************************************************/ #include "qplatformdefs.h" // UNIX Large File Support redefines open -> open64 #if defined(open) # undef open #endif #include "tqtextcodec.h" #ifndef TQT_NO_TEXTCODEC #include "tqvaluelist.h" #include "tqtextcodecfactory.h" #include "tqutfcodec.h" #include "ntqnamespace.h" #ifndef TQT_NO_CODECS #include "tqhebrewcodec.h" #include "tqtsciicodec.h" #include "tqisciicodec_p.h" #endif // TQT_NO_CODECS #ifndef TQT_NO_BIG_CODECS #include "tqbig5codec.h" #include "tqeucjpcodec.h" #include "tqeuckrcodec.h" #include "tqgb18030codec.h" #include "tqjiscodec.h" #include "tqjpunicode.h" #include "tqsjiscodec.h" #endif // TQT_NO_BIG_CODECS #include "tqfile.h" #include "tqstrlist.h" #include "tqstring.h" #include "../tools/qlocale_p.h" #if !defined(TQT_NO_CODECS) && !defined(TQT_NO_BIG_CODECS) && defined(TQ_WS_X11) # include "tqfontcodecs_p.h" #endif #ifdef TQT_THREAD_SUPPORT # include #endif // TQT_THREAD_SUPPORT #include #include #ifndef Q_OS_TEMP #include #endif #if defined(_XOPEN_UNIX) && !defined(Q_OS_QNX6) #include #endif static TQValueList *all = 0; static bool codecs_destroyed = false; static TQTextCodec * localeMapper = 0; class TQTextCodecCleanup { public: ~TQTextCodecCleanup() { TQTextCodec::deleteAllCodecs(); } }; static TQTextCodecCleanup tqtextcodec_cleanup; /*! Deletes all the created codecs. \warning Do not call this function. TQApplication calls this function just before exiting to delete any TQTextCodec objects that may be lying around. Since various other classes hold pointers to TQTextCodec objects, it is not safe to call this function earlier. If you are using the utility classes (like TQString) but not using TQApplication, calling this function at the very end of your application may be helpful for chasing down memory leaks by eliminating any TQTextCodec objects. */ void TQTextCodec::deleteAllCodecs() { codecs_destroyed = true; if ( !all ) return; #ifdef TQT_THREAD_SUPPORT TQMutexLocker locker( tqt_global_mutexpool ? tqt_global_mutexpool->get( &all ) : 0 ); #endif // TQT_THREAD_SUPPORT TQValueList *ball = all; all = 0; TQValueList::Iterator it; for ( it = ball->begin(); it != ball->end(); ++it ) { delete *it; *it = 0; } ball->clear(); delete ball; localeMapper = 0; } static void setup(); class TQTextStatelessEncoder: public TQTextEncoder { const TQTextCodec* codec; public: TQTextStatelessEncoder(const TQTextCodec*); TQCString fromUnicode(const TQString& uc, int& lenInOut); }; class TQTextStatelessDecoder : public TQTextDecoder { const TQTextCodec* codec; public: TQTextStatelessDecoder(const TQTextCodec*); TQString toUnicode(const char* chars, int len); }; TQTextStatelessEncoder::TQTextStatelessEncoder(const TQTextCodec* c) : codec(c) { } TQCString TQTextStatelessEncoder::fromUnicode(const TQString& uc, int& lenInOut) { return codec->fromUnicode(uc,lenInOut); } TQTextStatelessDecoder::TQTextStatelessDecoder(const TQTextCodec* c) : codec(c) { } TQString TQTextStatelessDecoder::toUnicode(const char* chars, int len) { return codec->toUnicode(chars,len); } /*! \class TQTextCodec tqtextcodec.h \brief The TQTextCodec class provides conversion between text encodings. \reentrant \ingroup i18n TQt uses Unicode to store, draw and manipulate strings. In many situations you may wish to deal with data that uses a different encoding. For example, most Japanese documents are still stored in Shift-JIS or ISO2022, while Russian users often have their documents in KOI8-R or CP1251. TQt provides a set of TQTextCodec classes to help with converting non-Unicode formats to and from Unicode. You can also create your own codec classes (\link #subclassing see later\endlink). The supported encodings are: \list \i Latin1 \i Big5 -- Chinese \i Big5-HKSCS -- Chinese \i eucJP -- Japanese \i eucKR -- Korean \i GB2312 -- Chinese \i GBK -- Chinese \i GB18030 -- Chinese \i JIS7 -- Japanese \i Shift-JIS -- Japanese \i TSCII -- Tamil \i utf8 -- Unicode, 8-bit \i utf16 -- Unicode \i KOI8-R -- Russian \i KOI8-U -- Ukrainian \i ISO8859-1 -- Western \i ISO8859-2 -- Central European \i ISO8859-3 -- Central European \i ISO8859-4 -- Baltic \i ISO8859-5 -- Cyrillic \i ISO8859-6 -- Arabic \i ISO8859-7 -- Greek \i ISO8859-8 -- Hebrew, visually ordered \i ISO8859-8-i -- Hebrew, logically ordered \i ISO8859-9 -- Turkish \i ISO8859-10 \i ISO8859-13 \i ISO8859-14 \i ISO8859-15 -- Western \i IBM 850 \i IBM 866 \i CP874 \i CP1250 -- Central European \i CP1251 -- Cyrillic \i CP1252 -- Western \i CP1253 -- Greek \i CP1254 -- Turkish \i CP1255 -- Hebrew \i CP1256 -- Arabic \i CP1257 -- Baltic \i CP1258 \i Apple Roman \i TIS-620 -- Thai \endlist TQTextCodecs can be used as follows to convert some locally encoded string to Unicode. Suppose you have some string encoded in Russian KOI8-R encoding, and want to convert it to Unicode. The simple way to do this is: \code TQCString locallyEncoded = "..."; // text to convert TQTextCodec *codec = TQTextCodec::codecForName("KOI8-R"); // get the codec for KOI8-R TQString unicodeString = codec->toUnicode( locallyEncoded ); \endcode After this, \c{unicodeString} holds the text converted to Unicode. Converting a string from Unicode to the local encoding is just as easy: \code TQString unicodeString = "..."; // any Unicode text TQTextCodec *codec = TQTextCodec::codecForName("KOI8-R"); // get the codec for KOI8-R TQCString locallyEncoded = codec->fromUnicode( unicodeString ); \endcode Some care must be taken when trying to convert the data in chunks, for example, when receiving it over a network. In such cases it is possible that a multi-byte character will be split over two chunks. At best this might result in the loss of a character and at worst cause the entire conversion to fail. The approach to use in these situations is to create a TQTextDecoder object for the codec and use this TQTextDecoder for the whole decoding process, as shown below: \code TQTextCodec *codec = TQTextCodec::codecForName( "Shift-JIS" ); TQTextDecoder *decoder = codec->makeDecoder(); TQString unicodeString; while( receiving_data ) { TQByteArray chunk = new_data; unicodeString += decoder->toUnicode( chunk.data(), chunk.length() ); } \endcode The TQTextDecoder object maintains state between chunks and therefore works correctly even if a multi-byte character is split between chunks. \target subclassing \section1 Creating your own Codec class Support for new text encodings can be added to TQt by creating TQTextCodec subclasses. Built-in codecs can be overridden by custom codecs since more recently created TQTextCodec objects take precedence over earlier ones. You may find it more convenient to make your codec class available as a plugin; see the \link plugins-howto.html plugin documentation\endlink for more details. The abstract virtual functions describe the encoder to the system and the coder is used as required in the different text file formats supported by TQTextStream, and under X11, for the locale-specific character input and output. To add support for another 8-bit encoding to TQt, make a subclass of TQTextCodec and implement at least the following methods: \code const char* name() const \endcode Return the official name for the encoding. \code int mibEnum() const \endcode Return the MIB enum for the encoding if it is listed in the \link http://www.iana.org/assignments/character-sets IANA character-sets encoding file\endlink. If the encoding is multi-byte then it will have "state"; that is, the interpretation of some bytes will be dependent on some preceding bytes. For such encodings, you must implement: \code TQTextDecoder* makeDecoder() const \endcode Return a TQTextDecoder that remembers incomplete multi-byte sequence prefixes or other required state. If the encoding does \e not require state, you should implement: \code TQString toUnicode(const char* chars, int len) const \endcode Converts \e len characters from \e chars to Unicode. The base TQTextCodec class has default implementations of the above two functions, \e{but they are mutually recursive}, so you must re-implement at least one of them, or both for improved efficiency. For conversion from Unicode to 8-bit encodings, it is rarely necessary to maintain state. However, two functions similar to the two above are used for encoding: \code TQTextEncoder* makeEncoder() const \endcode Return a TQTextEncoder. \code TQCString fromUnicode(const TQString& uc, int& lenInOut ) const \endcode Converts \e lenInOut characters (of type TQChar) from the start of the string \e uc, returning a TQCString result, and also returning the \link TQCString::length() length\endlink of the result in \e lenInOut. Again, these are mutually recursive so only one needs to be implemented, or both if greater efficiency is possible. Finally, you must implement: \code int heuristicContentMatch(const char* chars, int len) const \endcode Gives a value indicating how likely it is that \e len characters from \e chars are in the encoding. A good model for this function is the TQWindowsLocalCodec::heuristicContentMatch function found in the TQt sources. A TQTextCodec subclass might have improved performance if you also re-implement: \code bool canEncode( TQChar ) const \endcode Test if a Unicode character can be encoded. \code bool canEncode( const TQString& ) const \endcode Test if a string of Unicode characters can be encoded. \code int heuristicNameMatch(const char* hint) const \endcode Test if a possibly non-standard name is referring to the codec. Codecs can also be created as \link plugins-howto.html plugins\endlink. */ /*! \nonreentrant Constructs a TQTextCodec, and gives it the highest precedence. The TQTextCodec should always be constructed on the heap (i.e. with \c new). TQt takes ownership and will delete it when the application terminates. */ TQTextCodec::TQTextCodec() { // 'codecs_destroyed' should never be true at this point if (!codecs_destroyed) { setup(); #ifdef TQT_THREAD_SUPPORT TQMutexLocker locker( tqt_global_mutexpool ? tqt_global_mutexpool->get( &all ) : 0 ); #endif // TQT_THREAD_SUPPORT all->insert( all->begin(), this ); } } /*! \nonreentrant Destroys the TQTextCodec. Note that you should not delete codecs yourself: once created they become TQt's responsibility. */ TQTextCodec::~TQTextCodec() { if ( !codecs_destroyed ) tqWarning("TQTextCodec::~TQTextCodec() called by application"); #ifdef TQT_THREAD_SUPPORT TQMutexLocker locker( tqt_global_mutexpool ? tqt_global_mutexpool->get( &all ) : 0 ); #endif // TQT_THREAD_SUPPORT if ( all ) all->remove( this ); } /*! Returns a value indicating how likely it is that this decoder is appropriate for decoding some format that has the given name. The name is compared with the \a hint. A good match returns a positive number around the length of the string. A bad match is negative. The default implementation calls simpleHeuristicNameMatch() with the name of the codec. */ int TQTextCodec::heuristicNameMatch(const char* hint) const { return simpleHeuristicNameMatch(name(),hint); } // returns a string containing the letters and numbers from input, // with a space separating run of a character class. e.g. "iso8859-1" // becomes "iso 8859 1" static TQString lettersAndNumbers( const char * input ) { TQString result; TQChar c; while( input && *input ) { c = *input; if ( c.isLetter() || c.isNumber() ) result += c.lower(); if ( input[1] ) { // add space at character class transition, except // transition from upper-case to lower-case letter TQChar n( input[1] ); if ( c.isLetter() && n.isLetter() ) { if ( c == c.lower() && n == n.upper() ) result += ' '; } else if ( c.category() != n.category() ) { result += ' '; } } input++; } return result.simplifyWhiteSpace(); } /*! A simple utility function for heuristicNameMatch(): it does some very minor character-skipping so that almost-exact matches score high. \a name is the text we're matching and \a hint is used for the comparison. */ int TQTextCodec::simpleHeuristicNameMatch(const char* name, const char* hint) { // if they're the same, return a perfect score. if ( name && hint && *name && *hint && tqstricmp( name, hint ) == 0 ) return tqstrlen( hint ); // if the letters and numbers are the same, we have an "almost" // perfect match. TQString h( lettersAndNumbers( hint ) ); TQString n( lettersAndNumbers( name ) ); if ( h == n ) return tqstrlen( hint )-1; if ( h.stripWhiteSpace() == n.stripWhiteSpace() ) return tqstrlen( hint )-2; // could do some more here, but I don't think it's worth it return 0; } /*! Returns the TQTextCodec \a i positions from the most recently inserted codec, or 0 if there is no such TQTextCodec. Thus, codecForIndex(0) returns the most recently created TQTextCodec. */ TQTextCodec* TQTextCodec::codecForIndex(int i) { if (codecs_destroyed) { return nullptr; } setup(); return (uint)i >= all->count() ? 0 : *all->at(i); } /*! Returns the TQTextCodec which matches the \link TQTextCodec::mibEnum() MIBenum\endlink \a mib. */ TQTextCodec* TQTextCodec::codecForMib(int mib) { if (codecs_destroyed) { return nullptr; } setup(); TQValueList::ConstIterator i; TQTextCodec* result=0; for ( i = all->begin(); i != all->end(); ++i ) { result = *i; if ( result->mibEnum()==mib ) return result; } #if !defined(TQT_NO_COMPONENT) && !defined(QT_LITE_COMPONENT) if ( !result || (result && result->mibEnum() != mib) ) { TQTextCodec *codec = TQTextCodecFactory::createForMib(mib); if (codec) result = codec; } #endif // !TQT_NO_COMPONENT !QT_LITE_COMPONENT return result; } #ifdef Q_OS_WIN32 class TQWindowsLocalCodec: public TQTextCodec { public: TQWindowsLocalCodec(); ~TQWindowsLocalCodec(); TQString toUnicode(const char* chars, int len) const; TQCString fromUnicode(const TQString& uc, int& lenInOut ) const; const char* name() const; int mibEnum() const; int heuristicContentMatch(const char* chars, int len) const; TQTextDecoder* makeDecoder() const; }; TQWindowsLocalCodec::TQWindowsLocalCodec() { } TQWindowsLocalCodec::~TQWindowsLocalCodec() { } TQString TQWindowsLocalCodec::toUnicode(const char* chars, int len) const { if ( len == 1 && chars ) { // Optimization; avoids allocation char c[2]; c[0] = *chars; c[1] = 0; return qt_winMB2TQString( c, 2 ); } if ( len < 0 ) return qt_winMB2TQString( chars ); TQCString s(chars,len+1); return qt_winMB2TQString(s); } TQCString TQWindowsLocalCodec::fromUnicode(const TQString& uc, int& lenInOut ) const { TQCString r = qt_winTQString2MB( uc, lenInOut ); lenInOut = r.length(); return r; } const char* TQWindowsLocalCodec::name() const { return "System"; } int TQWindowsLocalCodec::mibEnum() const { return 0; } int TQWindowsLocalCodec::heuristicContentMatch(const char* chars, int len) const { // ### Not a bad default implementation? TQString t = toUnicode(chars,len); int l = t.length(); TQCString mb = fromUnicode(t,l); int i=0; while ( i < len ) { if ( chars[i] == mb[i] ) i++; else break; } return i; } class TQWindowsLocalDecoder: public TQTextDecoder { const TQWindowsLocalCodec* codec; int nbuf; uchar buf[4]; // hopefully this will be enough public: TQWindowsLocalDecoder(const TQWindowsLocalCodec *c) : codec(c), nbuf(0) { } TQString toUnicode(const char* chars, int len) { if (len != 1 && nbuf == 0) return codec->toUnicode(chars, len); if (len == 1) { char c[sizeof buf + 2]; memcpy(c, buf, nbuf); c[nbuf] = *chars; c[nbuf+1] = 0; // try to decode this: TQString retval = codec->toUnicode(c, -1); if ( retval.isEmpty() ) { // it didn't return anything; we probably stopped mid-way in a multi-byte // character buf[nbuf++] = *chars; if (nbuf + 1 == sizeof buf) { tqWarning("TQWindowsLocalDecoder: exceeded max internal buffer size"); nbuf = 0; } } else nbuf = 0; // decoded successfully return retval; } if (len == -1) len = (int)strlen(chars); // Ugh! We need to allocate memory char *s = new char[nbuf + len + 1]; memcpy(s, buf, nbuf); memcpy(s + nbuf, chars, len); s[nbuf + len] = 0; TQString retval = codec->toUnicode(s, -1); nbuf = 0; delete[] s; return retval; } }; TQTextDecoder* TQWindowsLocalCodec::makeDecoder() const { return new TQWindowsLocalDecoder(this); } #else /* locale names mostly copied from XFree86 */ static const char * const iso8859_2locales[] = { "croatian", "cs", "cs_CS", "cs_CZ","cz", "cz_CZ", "czech", "hr", "hr_HR", "hu", "hu_HU", "hungarian", "pl", "pl_PL", "polish", "ro", "ro_RO", "rumanian", "serbocroatian", "sh", "sh_SP", "sh_YU", "sk", "sk_SK", "sl", "sl_CS", "sl_SI", "slovak", "slovene", "sr_SP", 0 }; static const char * const iso8859_3locales[] = { "eo", 0 }; static const char * const iso8859_4locales[] = { "ee", "ee_EE", 0 }; static const char * const iso8859_5locales[] = { "mk", "mk_MK", "sp", "sp_YU", 0 }; static const char * const cp_1251locales[] = { "be", "be_BY", "bg", "bg_BG", "bulgarian", 0 }; static const char * const pt_154locales[] = { "ba_RU", "ky", "ky_KG", "kk", "kk_KZ", 0 }; static const char * const iso8859_6locales[] = { "ar_AA", "ar_SA", "arabic", 0 }; static const char * const iso8859_7locales[] = { "el", "el_GR", "greek", 0 }; static const char * const iso8859_8locales[] = { "hebrew", "he", "he_IL", "iw", "iw_IL", 0 }; static const char * const iso8859_9locales[] = { "tr", "tr_TR", "turkish", 0 }; static const char * const iso8859_13locales[] = { "lt", "lt_LT", "lv", "lv_LV", 0 }; static const char * const iso8859_15locales[] = { "et", "et_EE", // Euro countries "br_FR", "ca_ES", "de", "de_AT", "de_BE", "de_DE", "de_LU", "en_IE", "es", "es_ES", "eu_ES", "fi", "fi_FI", "finnish", "fr", "fr_FR", "fr_BE", "fr_LU", "french", "ga_IE", "gl_ES", "it", "it_IT", "oc_FR", "nl", "nl_BE", "nl_NL", "pt", "pt_PT", "sv_FI", "wa_BE", 0 }; static const char * const koi8_ulocales[] = { "uk", "uk_UA", "ru_UA", "ukrainian", 0 }; static const char * const tis_620locales[] = { "th", "th_TH", "thai", 0 }; static const char * const tcvnlocales[] = { "vi", "vi_VN", 0 }; static bool try_locale_list( const char * const locale[], const char * lang ) { int i; for( i=0; locale[i] && *locale[i] && strcmp(locale[i], lang); i++ ) ; return locale[i] != 0; } // For the probably_koi8_locales we have to look. the standard says // these are 8859-5, but almost all Russian users use KOI8-R and // incorrectly set $LANG to ru_RU. We'll check tolower() to see what // tolower() thinks ru_RU means. // If you read the history, it seems that many Russians blame ISO and // Perestroika for the confusion. // // The real bug is that some programs break if the user specifies // ru_RU.KOI8-R. static const char * const probably_koi8_rlocales[] = { "ru", "ru_SU", "ru_RU", "russian", 0 }; static TQTextCodec * ru_RU_hack( const char * i ) { TQTextCodec * ru_RU_codec = 0; TQCString origlocale = setlocale( LC_CTYPE, i ); // unicode koi8r latin5 name // 0x044E 0xC0 0xEE CYRILLIC SMALL LETTER YU // 0x042E 0xE0 0xCE CYRILLIC CAPITAL LETTER YU int latin5 = tolower( 0xCE ); int koi8r = tolower( 0xE0 ); if ( koi8r == 0xC0 && latin5 != 0xEE ) { ru_RU_codec = TQTextCodec::codecForName( "KOI8-R" ); } else if ( koi8r != 0xC0 && latin5 == 0xEE ) { ru_RU_codec = TQTextCodec::codecForName( "ISO 8859-5" ); } else { // something else again... let's assume... *throws dice* ru_RU_codec = TQTextCodec::codecForName( "KOI8-R" ); tqWarning( "TQTextCodec: using KOI8-R, probe failed (%02x %02x %s)", koi8r, latin5, i ); } setlocale( LC_CTYPE, origlocale.data() ); return ru_RU_codec; } #endif /*! Set the codec to \a c; this will be returned by codecForLocale(). This might be needed for some applications that want to use their own mechanism for setting the locale. \sa codecForLocale() */ void TQTextCodec::setCodecForLocale(TQTextCodec *c) { localeMapper = c; } /*! Returns a pointer to the codec most suitable for this locale. */ TQTextCodec* TQTextCodec::codecForLocale() { if (codecs_destroyed) { return nullptr; } if (!localeMapper) { setup(); } return localeMapper; } /*! Searches all installed TQTextCodec objects and returns the one which best matches \a name; the match is case-insensitive. Returns 0 if no codec's heuristicNameMatch() reports a match better than \a accuracy, or if \a name is a null string. \sa heuristicNameMatch() */ TQTextCodec* TQTextCodec::codecForName( const char* name, int accuracy ) { if (codecs_destroyed) { return nullptr; } if ( !name || !*name ) return 0; setup(); TQValueList::ConstIterator i; TQTextCodec* result = 0; int best = accuracy; TQTextCodec* cursor; for ( i = all->begin(); i != all->end(); ++i ) { cursor = *i; int s = cursor->heuristicNameMatch( name ); if ( s > best ) { best = s; result = cursor; } } #if !defined(TQT_NO_COMPONENT) && !defined(QT_LITE_COMPONENT) if ( !result ) result = TQTextCodecFactory::createForName(name); #endif // !TQT_NO_COMPONENT !QT_LITE_COMPONENT return result; } /*! Searches all installed TQTextCodec objects, returning the one which most recognizes the given content. May return 0. Note that this is often a poor choice, since character encodings often use most of the available character sequences, and so only by linguistic analysis could a true match be made. \a chars contains the string to check, and \a len contains the number of characters in the string to use. \sa heuristicContentMatch() */ TQTextCodec* TQTextCodec::codecForContent(const char* chars, int len) { if (codecs_destroyed) { return nullptr; } setup(); TQValueList::ConstIterator i; TQTextCodec* result = 0; int best=0; TQTextCodec* cursor; for ( i = all->begin(); i != all->end(); ++i ) { cursor = *i; int s = cursor->heuristicContentMatch(chars,len); if ( s > best ) { best = s; result = cursor; } } return result; } /*! \fn const char* TQTextCodec::name() const TQTextCodec subclasses must reimplement this function. It returns the name of the encoding supported by the subclass. When choosing a name for an encoding, consider these points: \list \i On X11, heuristicNameMatch( const char * hint ) is used to test if a the TQTextCodec can convert between Unicode and the encoding of a font with encoding \e hint, such as "iso8859-1" for Latin-1 fonts, "koi8-r" for Russian KOI8 fonts. The default algorithm of heuristicNameMatch() uses name(). \i Some applications may use this function to present encodings to the end user. \endlist */ /*! \fn int TQTextCodec::mibEnum() const Subclasses of TQTextCodec must reimplement this function. It returns the MIBenum (see \link http://www.iana.org/assignments/character-sets the IANA character-sets encoding file\endlink for more information). It is important that each TQTextCodec subclass returns the correct unique value for this function. */ /*! Returns the preferred mime name of the encoding as defined in the \link http://www.iana.org/assignments/character-sets IANA character-sets encoding file\endlink. */ const char* TQTextCodec::mimeName() const { return name(); } /*! \fn int TQTextCodec::heuristicContentMatch(const char* chars, int len) const TQTextCodec subclasses must reimplement this function. It examines the first \a len bytes of \a chars and returns a value indicating how likely it is that the string is a prefix of text encoded in the encoding of the subclass. A negative return value indicates that the text is detectably not in the encoding (e.g. it contains characters undefined in the encoding). A return value of 0 indicates that the text should be decoded with this codec rather than as ASCII, but there is no particular evidence. The value should range up to \a len. Thus, most decoders will return -1, 0, or -\a len. The characters are not null terminated. \sa codecForContent(). */ /*! Creates a TQTextDecoder which stores enough state to decode chunks of char* data to create chunks of Unicode data. The default implementation creates a stateless decoder, which is only sufficient for the simplest encodings where each byte corresponds to exactly one Unicode character. The caller is responsible for deleting the returned object. */ TQTextDecoder* TQTextCodec::makeDecoder() const { return new TQTextStatelessDecoder(this); } /*! Creates a TQTextEncoder which stores enough state to encode chunks of Unicode data as char* data. The default implementation creates a stateless encoder, which is only sufficient for the simplest encodings where each Unicode character corresponds to exactly one character. The caller is responsible for deleting the returned object. */ TQTextEncoder* TQTextCodec::makeEncoder() const { return new TQTextStatelessEncoder(this); } /*! TQTextCodec subclasses must reimplement this function or makeDecoder(). It converts the first \a len characters of \a chars to Unicode. The default implementation makes a decoder with makeDecoder() and converts the input with that. Note that the default makeDecoder() implementation makes a decoder that simply calls this function, hence subclasses \e must reimplement one function or the other to avoid infinite recursion. */ TQString TQTextCodec::toUnicode(const char* chars, int len) const { if ( chars == 0 ) return TQString::null; TQTextDecoder* i = makeDecoder(); TQString result = i->toUnicode(chars,len); delete i; return result; } /*! TQTextCodec subclasses must reimplement either this function or makeEncoder(). It converts the first \a lenInOut characters of \a uc from Unicode to the encoding of the subclass. If \a lenInOut is negative or too large, the length of \a uc is used instead. Converts \a lenInOut characters (not bytes) from \a uc, producing a TQCString. \a lenInOut will be set to the \link TQCString::length() length\endlink of the result (in bytes). The default implementation makes an encoder with makeEncoder() and converts the input with that. Note that the default makeEncoder() implementation makes an encoder that simply calls this function, hence subclasses \e must reimplement one function or the other to avoid infinite recursion. */ TQCString TQTextCodec::fromUnicode(const TQString& uc, int& lenInOut) const { TQTextEncoder* i = makeEncoder(); TQCString result = i->fromUnicode(uc, lenInOut); delete i; return result; } /*! \overload \internal */ TQByteArray TQTextCodec::fromUnicode( const TQString &str, int pos, int len ) const { TQByteArray a; if( len < 0 ) len = str.length() - pos; a = fromUnicode( str.mid(pos, len) ); if( a.size() > 0 && a[(int)a.size() - 1] == '\0' ) a.resize( a.size() - 1 ); return a; } /*! \overload \a uc is the unicode source string. */ TQCString TQTextCodec::fromUnicode(const TQString& uc) const { int l = uc.length(); return fromUnicode(uc,l); } /*! \overload \a a contains the source characters; \a len contains the number of characters in \a a to use. */ TQString TQTextCodec::toUnicode(const TQByteArray& a, int len) const { int l = a.size(); l = TQMIN( l, len ); return toUnicode( a.data(), l ); } /*! \overload \a a contains the source characters. */ TQString TQTextCodec::toUnicode(const TQByteArray& a) const { int l = a.size(); return toUnicode( a.data(), l ); } /*! \overload \a a contains the source characters; \a len contains the number of characters in \a a to use. */ TQString TQTextCodec::toUnicode(const TQCString& a, int len) const { int l = a.length(); l = TQMIN( l, len ); return toUnicode( a.data(), l ); } /*! \overload \a a contains the source characters. */ TQString TQTextCodec::toUnicode(const TQCString& a) const { int l = a.length(); return toUnicode( a.data(), l ); } /*! \overload \a chars contains the source characters. */ TQString TQTextCodec::toUnicode(const char* chars) const { return toUnicode(chars,tqstrlen(chars)); } /*! \internal */ unsigned short TQTextCodec::characterFromUnicode(const TQString &str, int pos) const { TQCString result = TQTextCodec::fromUnicode(TQString(str[pos])); uchar *ch = (uchar *) result.data(); ushort retval = 0; if (result.size() > 2) { retval = (ushort) *ch << 8; ch++; } return retval + *ch; } /*! Returns TRUE if the Unicode character \a ch can be fully encoded with this codec; otherwise returns FALSE. The default implementation tests if the result of toUnicode(fromUnicode(ch)) is the original \a ch. Subclasses may be able to improve the efficiency. */ bool TQTextCodec::canEncode( TQChar ch ) const { return toUnicode(fromUnicode(ch)) == ch; } /*! \overload \a s contains the string being tested for encode-ability. */ bool TQTextCodec::canEncode( const TQString& s ) const { if ( s.isEmpty() ) return TRUE; return toUnicode(fromUnicode(s)) == s; } /*! \class TQTextEncoder tqtextcodec.h \brief The TQTextEncoder class provides a state-based encoder. \reentrant \ingroup i18n The encoder converts Unicode into another format, remembering any state that is required between calls. \sa TQTextCodec::makeEncoder() */ /*! Destroys the encoder. */ TQTextEncoder::~TQTextEncoder() { } /*! \fn TQCString TQTextEncoder::fromUnicode(const TQString& uc, int& lenInOut) Converts \a lenInOut characters (not bytes) from \a uc, producing a TQCString. \a lenInOut will be set to the \link TQCString::length() length\endlink of the result (in bytes). The encoder is free to record state to use when subsequent calls are made to this function (for example, it might change modes with escape sequences if needed during the encoding of one string, then assume that mode applies when a subsequent call begins). */ /*! \class TQTextDecoder tqtextcodec.h \brief The TQTextDecoder class provides a state-based decoder. \reentrant \ingroup i18n The decoder converts a text format into Unicode, remembering any state that is required between calls. \sa TQTextCodec::makeEncoder() */ /*! Destroys the decoder. */ TQTextDecoder::~TQTextDecoder() { } /*! \fn TQString TQTextDecoder::toUnicode(const char* chars, int len) Converts the first \a len bytes in \a chars to Unicode, returning the result. If not all characters are used (e.g. if only part of a multi-byte encoding is at the end of the characters), the decoder remembers enough state to continue with the next call to this function. */ #define CHAINED 0xffff struct TQMultiByteUnicodeTable { // If multiByte, ignore unicode and index into multiByte // with the next character. TQMultiByteUnicodeTable() : unicode(0xfffd), multiByte(0) { } ~TQMultiByteUnicodeTable() { if ( multiByte ) delete [] multiByte; } ushort unicode; TQMultiByteUnicodeTable* multiByte; }; static int getByte(char* &cursor) { int byte = 0; if ( *cursor ) { if ( cursor[1] == 'x' ) byte = strtol(cursor+2,&cursor,16); else if ( cursor[1] == 'd' ) byte = strtol(cursor+2,&cursor,10); else byte = strtol(cursor+2,&cursor,8); } return byte&0xff; } class TQTextCodecFromIOD; class TQTextCodecFromIODDecoder : public TQTextDecoder { const TQTextCodecFromIOD* codec; TQMultiByteUnicodeTable* mb; public: TQTextCodecFromIODDecoder(const TQTextCodecFromIOD* c); TQString toUnicode(const char* chars, int len); }; class TQTextCodecFromIOD : public TQTextCodec { friend class TQTextCodecFromIODDecoder; TQCString n; // If from_unicode_page[row()][cell()] is 0 and from_unicode_page_multiByte, // use from_unicode_page_multiByte[row()][cell()] as string. char** from_unicode_page; char*** from_unicode_page_multiByte; char unkn; // Only one of these is used ushort* to_unicode; TQMultiByteUnicodeTable* to_unicode_multiByte; int max_bytes_per_char; TQStrList aliases; bool stateless() const { return !to_unicode_multiByte; } public: TQTextCodecFromIOD(TQIODevice* iod) { from_unicode_page = 0; to_unicode_multiByte = 0; to_unicode = 0; from_unicode_page_multiByte = 0; max_bytes_per_char = 1; const int maxlen=100; char line[maxlen]; char esc='\\'; char comm='%'; bool incmap = FALSE; while (iod->readLine(line,maxlen) > 0) { if (0==tqstrnicmp(line,"",15)) n = line+15; else if (0==tqstrnicmp(line," ",14)) esc = line[14]; else if (0==tqstrnicmp(line," ",15)) comm = line[15]; else if (line[0]==comm && 0==tqstrnicmp(line+1," alias ",7)) { aliases.append(line+8); } else if (0==tqstrnicmp(line,"CHARMAP",7)) { if (!from_unicode_page) { from_unicode_page = new char*[256]; for (int i=0; i<256; i++) from_unicode_page[i]=0; } if (!to_unicode) { to_unicode = new ushort[256]; } incmap = TRUE; } else if (0==tqstrnicmp(line,"END CHARMAP",11)) break; else if (incmap) { char* cursor = line; int byte=-1,unicode=-1; ushort* mb_unicode=0; const int maxmb=8; // more -> we'll need to improve datastructures char mb[maxmb+1]; int nmb=0; while (*cursor) { if (cursor[0]=='<' && cursor[1]=='U' && cursor[2]>='0' && cursor[2]<='9' && cursor[3]>='0' && cursor[3]<='9') { unicode = strtol(cursor+2,&cursor,16); } else if (*cursor==esc) { byte = getByte(cursor); if ( *cursor == esc ) { if ( !to_unicode_multiByte ) { to_unicode_multiByte = new TQMultiByteUnicodeTable[256]; for (int i=0; i<256; i++) { to_unicode_multiByte[i].unicode = to_unicode[i]; to_unicode_multiByte[i].multiByte = 0; } delete [] to_unicode; to_unicode = 0; } TQMultiByteUnicodeTable* mbut = to_unicode_multiByte+byte; mb[nmb++] = byte; while ( nmb < maxmb && *cursor == esc ) { // Always at least once mbut->unicode = CHAINED; byte = getByte(cursor); mb[nmb++] = byte; if (!mbut->multiByte) { mbut->multiByte = new TQMultiByteUnicodeTable[256]; } mbut = mbut->multiByte+byte; mb_unicode = & mbut->unicode; } if ( nmb > max_bytes_per_char ) max_bytes_per_char = nmb; } } else { cursor++; } } if (unicode >= 0 && unicode <= 0xffff) { TQChar ch((ushort)unicode); if (!from_unicode_page[ch.row()]) { from_unicode_page[ch.row()] = new char[256]; for (int i=0; i<256; i++) from_unicode_page[ch.row()][i]=0; } if ( mb_unicode ) { from_unicode_page[ch.row()][ch.cell()] = 0; if (!from_unicode_page_multiByte) { from_unicode_page_multiByte = new char**[256]; for (int i=0; i<256; i++) from_unicode_page_multiByte[i]=0; } if (!from_unicode_page_multiByte[ch.row()]) { from_unicode_page_multiByte[ch.row()] = new char*[256]; for (int i=0; i<256; i++) from_unicode_page_multiByte[ch.row()][i] = 0; } mb[nmb++] = 0; from_unicode_page_multiByte[ch.row()][ch.cell()] = tqstrdup(mb); *mb_unicode = unicode; } else { from_unicode_page[ch.row()][ch.cell()] = (char)byte; if ( to_unicode ) to_unicode[byte] = unicode; else to_unicode_multiByte[byte].unicode = unicode; } } else { } } } n = n.stripWhiteSpace(); unkn = '?'; // ##### Might be a bad choice. } ~TQTextCodecFromIOD() { if ( from_unicode_page ) { for (int i=0; i<256; i++) if (from_unicode_page[i]) delete [] from_unicode_page[i]; } if ( from_unicode_page_multiByte ) { for (int i=0; i<256; i++) if (from_unicode_page_multiByte[i]) for (int j=0; j<256; j++) if (from_unicode_page_multiByte[i][j]) delete [] from_unicode_page_multiByte[i][j]; } if ( to_unicode ) delete [] to_unicode; if ( to_unicode_multiByte ) delete [] to_unicode_multiByte; } bool ok() const { return !!from_unicode_page; } TQTextDecoder* makeDecoder() const { if ( stateless() ) return TQTextCodec::makeDecoder(); else return new TQTextCodecFromIODDecoder(this); } const char* name() const { return n; } int mibEnum() const { return 0; // #### Unknown. } int heuristicContentMatch(const char*, int) const { return 0; } int heuristicNameMatch(const char* hint) const { int bestr = TQTextCodec::heuristicNameMatch(hint); TQStrListIterator it(aliases); char* a; while ((a=it.current())) { ++it; int r = simpleHeuristicNameMatch(a,hint); if (r > bestr) bestr = r; } return bestr; } TQString toUnicode(const char* chars, int len) const { const uchar* uchars = (const uchar*)chars; TQString result; TQMultiByteUnicodeTable* multiByte=to_unicode_multiByte; if ( multiByte ) { while (len--) { TQMultiByteUnicodeTable& mb = multiByte[*uchars]; if ( mb.multiByte ) { // Chained multi-byte multiByte = mb.multiByte; } else { result += TQChar(mb.unicode); multiByte=to_unicode_multiByte; } uchars++; } } else { while (len--) result += TQChar(to_unicode[*uchars++]); } return result; } #if !defined(Q_NO_USING_KEYWORD) using TQTextCodec::fromUnicode; #endif TQCString fromUnicode(const TQString& uc, int& lenInOut) const { if (lenInOut > (int)uc.length()) lenInOut = uc.length(); int rlen = lenInOut*max_bytes_per_char; TQCString rstr(rlen+1); char* cursor = rstr.data(); char* s=0; int l = lenInOut; int lout = 0; for (int i=0; ito_unicode_multiByte; } TQString TQTextCodecFromIODDecoder::toUnicode(const char* chars, int len) { const uchar* uchars = (const uchar*)chars; TQString result; while (len--) { TQMultiByteUnicodeTable& t = mb[*uchars]; if ( t.multiByte ) { // Chained multi-byte mb = t.multiByte; } else { if ( t.unicode ) result += TQChar(t.unicode); mb=codec->to_unicode_multiByte; } uchars++; } return result; } #ifndef TQT_NO_CODECS // Cannot use
 or \code
/*!
    Reads a POSIX2 charmap definition from \a iod.
    The parser recognizes the following lines:


  <code_set_name> name
  <escape_char> character
  % alias alias
  CHARMAP
  <token> /xhexbyte <Uunicode> ...
  <token> /ddecbyte <Uunicode> ...
  <token> /octbyte <Uunicode> ...
  <token> /any/any... <Uunicode> ...
  END CHARMAP
The resulting TQTextCodec is returned (and also added to the global list of codecs). The name() of the result is taken from the code_set_name. Note that a codec constructed in this way uses much more memory and is slower than a hand-written TQTextCodec subclass, since tables in code are kept in memory shared by all TQt applications. \sa loadCharmapFile() */ TQTextCodec* TQTextCodec::loadCharmap(TQIODevice* iod) { TQTextCodecFromIOD* r = new TQTextCodecFromIOD(iod); if ( !r->ok() ) { delete r; r = 0; } return r; } /*! A convenience function for loadCharmap() that loads the charmap definition from the file \a filename. */ TQTextCodec* TQTextCodec::loadCharmapFile(TQString filename) { TQFile f(filename); if (f.open(IO_ReadOnly)) { TQTextCodecFromIOD* r = new TQTextCodecFromIOD(&f); if ( !r->ok() ) delete r; else return r; } return 0; } #endif //TQT_NO_CODECS /*! Returns a string representing the current language and sublanguage, e.g. "pt" for Portuguese, or "pt_br" for Portuguese/Brazil. */ const char* TQTextCodec::locale() { return TQLocalePrivate::systemLocaleName(); } #ifndef TQT_NO_CODECS class TQSimpleTextCodec: public TQTextCodec { public: TQSimpleTextCodec( int ); ~TQSimpleTextCodec(); TQString toUnicode(const char* chars, int len) const; #if !defined(Q_NO_USING_KEYWORD) using TQTextCodec::fromUnicode; #endif TQCString fromUnicode(const TQString& uc, int& lenInOut ) const; unsigned short characterFromUnicode(const TQString &str, int pos) const; const char* name() const; const char* mimeName() const; int mibEnum() const; int heuristicContentMatch(const char* chars, int len) const; int heuristicNameMatch(const char* hint) const; #if !defined(Q_NO_USING_KEYWORD) using TQTextCodec::canEncode; #endif bool canEncode( TQChar ch ) const; void fromUnicode( const TQChar *in, unsigned short *out, int length ) const; private: void buildReverseMap(); int forwardIndex; TQMemArray *reverseMap; }; #define LAST_MIB 2004 static const struct { const char *mime; const char * cs; int mib; TQ_UINT16 values[128]; } unicodevalues[] = { // from RFC 1489, ftp://ftp.isi.edu/in-notes/rfc1489.txt { "KOI8-R", "KOI8-R", 2084, { 0x2500, 0x2502, 0x250C, 0x2510, 0x2514, 0x2518, 0x251C, 0x2524, 0x252C, 0x2534, 0x253C, 0x2580, 0x2584, 0x2588, 0x258C, 0x2590, 0x2591, 0x2592, 0x2593, 0x2320, 0x25A0, 0x2219/**/, 0x221A, 0x2248, 0x2264, 0x2265, 0x00A0, 0x2321, 0x00B0, 0x00B2, 0x00B7, 0x00F7, 0x2550, 0x2551, 0x2552, 0x0451, 0x2553, 0x2554, 0x2555, 0x2556, 0x2557, 0x2558, 0x2559, 0x255A, 0x255B, 0x255C, 0x255D, 0x255E, 0x255F, 0x2560, 0x2561, 0x0401, 0x2562, 0x2563, 0x2564, 0x2565, 0x2566, 0x2567, 0x2568, 0x2569, 0x256A, 0x256B, 0x256C, 0x00A9, 0x044E, 0x0430, 0x0431, 0x0446, 0x0434, 0x0435, 0x0444, 0x0433, 0x0445, 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, 0x043F, 0x044F, 0x0440, 0x0441, 0x0442, 0x0443, 0x0436, 0x0432, 0x044C, 0x044B, 0x0437, 0x0448, 0x044D, 0x0449, 0x0447, 0x044A, 0x042E, 0x0410, 0x0411, 0x0426, 0x0414, 0x0415, 0x0424, 0x0413, 0x0425, 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F, 0x042F, 0x0420, 0x0421, 0x0422, 0x0423, 0x0416, 0x0412, 0x042C, 0x042B, 0x0417, 0x0428, 0x042D, 0x0429, 0x0427, 0x042A } }, // /**/ - The BULLET OPERATOR is confused. Some people think // it should be 0x2022 (BULLET). // from RFC 2319, ftp://ftp.isi.edu/in-notes/rfc2319.txt { "KOI8-U", "KOI8-U", 2088, { 0x2500, 0x2502, 0x250C, 0x2510, 0x2514, 0x2518, 0x251C, 0x2524, 0x252C, 0x2534, 0x253C, 0x2580, 0x2584, 0x2588, 0x258C, 0x2590, 0x2591, 0x2592, 0x2593, 0x2320, 0x25A0, 0x2219, 0x221A, 0x2248, 0x2264, 0x2265, 0x00A0, 0x2321, 0x00B0, 0x00B2, 0x00B7, 0x00F7, 0x2550, 0x2551, 0x2552, 0x0451, 0x0454, 0x2554, 0x0456, 0x0457, 0x2557, 0x2558, 0x2559, 0x255A, 0x255B, 0x0491, 0x255D, 0x255E, 0x255F, 0x2560, 0x2561, 0x0401, 0x0404, 0x2563, 0x0406, 0x0407, 0x2566, 0x2567, 0x2568, 0x2569, 0x256A, 0x0490, 0x256C, 0x00A9, 0x044E, 0x0430, 0x0431, 0x0446, 0x0434, 0x0435, 0x0444, 0x0433, 0x0445, 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, 0x043F, 0x044F, 0x0440, 0x0441, 0x0442, 0x0443, 0x0436, 0x0432, 0x044C, 0x044B, 0x0437, 0x0448, 0x044D, 0x0449, 0x0447, 0x044A, 0x042E, 0x0410, 0x0411, 0x0426, 0x0414, 0x0415, 0x0424, 0x0413, 0x0425, 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F, 0x042F, 0x0420, 0x0421, 0x0422, 0x0423, 0x0416, 0x0412, 0x042C, 0x042B, 0x0417, 0x0428, 0x042D, 0x0429, 0x0427, 0x042A } }, // next bits generated from tables on the Unicode 2.0 CD. we can // use these tables since this is part of the transition to using // unicode everywhere in qt. // $ for A in 8 9 A B C D E F ; do for B in 0 1 2 3 4 5 6 7 8 9 A B C D E F ; do echo 0x${A}${B} 0xFFFD ; done ; done > /tmp/digits ; for a in 8859-* ; do ( awk '/^0x[89ABCDEF]/{ print $1, $2 }' < $a ; cat /tmp/digits ) | sort | uniq -w4 | cut -c6- | paste '-d ' - - - - - - - - | sed -e 's/ /, /g' -e 's/$/,/' -e '$ s/,$/} },/' -e '1 s/^/{ /' > ~/tmp/$a ; done // then I inserted the files manually. { "ISO-8859-2", "ISO 8859-2", 5, { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F, 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, 0x00A0, 0x0104, 0x02D8, 0x0141, 0x00A4, 0x013D, 0x015A, 0x00A7, 0x00A8, 0x0160, 0x015E, 0x0164, 0x0179, 0x00AD, 0x017D, 0x017B, 0x00B0, 0x0105, 0x02DB, 0x0142, 0x00B4, 0x013E, 0x015B, 0x02C7, 0x00B8, 0x0161, 0x015F, 0x0165, 0x017A, 0x02DD, 0x017E, 0x017C, 0x0154, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x0139, 0x0106, 0x00C7, 0x010C, 0x00C9, 0x0118, 0x00CB, 0x011A, 0x00CD, 0x00CE, 0x010E, 0x0110, 0x0143, 0x0147, 0x00D3, 0x00D4, 0x0150, 0x00D6, 0x00D7, 0x0158, 0x016E, 0x00DA, 0x0170, 0x00DC, 0x00DD, 0x0162, 0x00DF, 0x0155, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x013A, 0x0107, 0x00E7, 0x010D, 0x00E9, 0x0119, 0x00EB, 0x011B, 0x00ED, 0x00EE, 0x010F, 0x0111, 0x0144, 0x0148, 0x00F3, 0x00F4, 0x0151, 0x00F6, 0x00F7, 0x0159, 0x016F, 0x00FA, 0x0171, 0x00FC, 0x00FD, 0x0163, 0x02D9} }, { "ISO-8859-3", "ISO 8859-3", 6, { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F, 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, 0x00A0, 0x0126, 0x02D8, 0x00A3, 0x00A4, 0xFFFD, 0x0124, 0x00A7, 0x00A8, 0x0130, 0x015E, 0x011E, 0x0134, 0x00AD, 0xFFFD, 0x017B, 0x00B0, 0x0127, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x0125, 0x00B7, 0x00B8, 0x0131, 0x015F, 0x011F, 0x0135, 0x00BD, 0xFFFD, 0x017C, 0x00C0, 0x00C1, 0x00C2, 0xFFFD, 0x00C4, 0x010A, 0x0108, 0x00C7, 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF, 0xFFFD, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x0120, 0x00D6, 0x00D7, 0x011C, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x016C, 0x015C, 0x00DF, 0x00E0, 0x00E1, 0x00E2, 0xFFFD, 0x00E4, 0x010B, 0x0109, 0x00E7, 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF, 0xFFFD, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x0121, 0x00F6, 0x00F7, 0x011D, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x016D, 0x015D, 0x02D9} }, { "ISO-8859-4", "ISO 8859-4", 7, { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F, 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, 0x00A0, 0x0104, 0x0138, 0x0156, 0x00A4, 0x0128, 0x013B, 0x00A7, 0x00A8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00AD, 0x017D, 0x00AF, 0x00B0, 0x0105, 0x02DB, 0x0157, 0x00B4, 0x0129, 0x013C, 0x02C7, 0x00B8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014A, 0x017E, 0x014B, 0x0100, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x012E, 0x010C, 0x00C9, 0x0118, 0x00CB, 0x0116, 0x00CD, 0x00CE, 0x012A, 0x0110, 0x0145, 0x014C, 0x0136, 0x00D4, 0x00D5, 0x00D6, 0x00D7, 0x00D8, 0x0172, 0x00DA, 0x00DB, 0x00DC, 0x0168, 0x016A, 0x00DF, 0x0101, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x012F, 0x010D, 0x00E9, 0x0119, 0x00EB, 0x0117, 0x00ED, 0x00EE, 0x012B, 0x0111, 0x0146, 0x014D, 0x0137, 0x00F4, 0x00F5, 0x00F6, 0x00F7, 0x00F8, 0x0173, 0x00FA, 0x00FB, 0x00FC, 0x0169, 0x016B, 0x02D9} }, { "ISO-8859-5", "ISO 8859-5", 8, { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F, 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, 0x00A0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407, 0x0408, 0x0409, 0x040A, 0x040B, 0x040C, 0x00AD, 0x040E, 0x040F, 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F, 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, 0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F, 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, 0x043F, 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447, 0x0448, 0x0449, 0x044A, 0x044B, 0x044C, 0x044D, 0x044E, 0x044F, 0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457, 0x0458, 0x0459, 0x045A, 0x045B, 0x045C, 0x00A7, 0x045E, 0x045F} }, { "ISO-8859-6", "ISO 8859-6", 82, { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F, 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, 0x00A0, 0xFFFD, 0xFFFD, 0xFFFD, 0x00A4, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x060C, 0x00AD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x061B, 0xFFFD, 0xFFFD, 0xFFFD, 0x061F, 0xFFFD, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627, 0x0628, 0x0629, 0x062A, 0x062B, 0x062C, 0x062D, 0x062E, 0x062F, 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637, 0x0638, 0x0639, 0x063A, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647, 0x0648, 0x0649, 0x064A, 0x064B, 0x064C, 0x064D, 0x064E, 0x064F, 0x0650, 0x0651, 0x0652, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD} }, { "ISO-8859-7", "ISO 8859-7", 10, { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F, 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, 0x00A0, 0x2018, 0x2019, 0x00A3, 0xFFFD, 0xFFFD, 0x00A6, 0x00A7, 0x00A8, 0x00A9, 0xFFFD, 0x00AB, 0x00AC, 0x00AD, 0xFFFD, 0x2015, 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x0384, 0x0385, 0x0386, 0x00B7, 0x0388, 0x0389, 0x038A, 0x00BB, 0x038C, 0x00BD, 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x039B, 0x039C, 0x039D, 0x039E, 0x039F, 0x03A0, 0x03A1, 0xFFFD, 0x03A3, 0x03A4, 0x03A5, 0x03A6, 0x03A7, 0x03A8, 0x03A9, 0x03AA, 0x03AB, 0x03AC, 0x03AD, 0x03AE, 0x03AF, 0x03B0, 0x03B1, 0x03B2, 0x03B3, 0x03B4, 0x03B5, 0x03B6, 0x03B7, 0x03B8, 0x03B9, 0x03BA, 0x03BB, 0x03BC, 0x03BD, 0x03BE, 0x03BF, 0x03C0, 0x03C1, 0x03C2, 0x03C3, 0x03C4, 0x03C5, 0x03C6, 0x03C7, 0x03C8, 0x03C9, 0x03CA, 0x03CB, 0x03CC, 0x03CD, 0x03CE, 0xFFFD} }, { "ISO-8859-8-I", "ISO 8859-8-I", 85, { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F, 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, 0x00A0, 0xFFFD, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, 0x00A8, 0x00A9, 0x00D7, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x203E, 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, 0x00B8, 0x00B9, 0x00F7, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x2017, 0x05D0, 0x05D1, 0x05D2, 0x05D3, 0x05D4, 0x05D5, 0x05D6, 0x05D7, 0x05D8, 0x05D9, 0x05DA, 0x05DB, 0x05DC, 0x05DD, 0x05DE, 0x05DF, 0x05E0, 0x05E1, 0x05E2, 0x05E3, 0x05E4, 0x05E5, 0x05E6, 0x05E7, 0x05E8, 0x05E9, 0x05EA, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD} }, { "ISO-8859-9", "ISO 8859-9", 12, { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F, 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, 0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF, 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF, 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7, 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF, 0x011E, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7, 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x0130, 0x015E, 0x00DF, 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF, 0x011F, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7, 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x0131, 0x015F, 0x00FF} }, { "ISO-8859-10", "ISO 8859-10", 13, { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F, 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, 0x00A0, 0x0104, 0x0112, 0x0122, 0x012A, 0x0128, 0x0136, 0x00A7, 0x013B, 0x0110, 0x0160, 0x0166, 0x017D, 0x00AD, 0x016A, 0x014A, 0x00B0, 0x0105, 0x0113, 0x0123, 0x012B, 0x0129, 0x0137, 0x00B7, 0x013C, 0x0111, 0x0161, 0x0167, 0x017E, 0x2015, 0x016B, 0x014B, 0x0100, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x012E, 0x010C, 0x00C9, 0x0118, 0x00CB, 0x0116, 0x00CD, 0x00CE, 0x00CF, 0x00D0, 0x0145, 0x014C, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x0168, 0x00D8, 0x0172, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF, 0x0101, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x012F, 0x010D, 0x00E9, 0x0119, 0x00EB, 0x0117, 0x00ED, 0x00EE, 0x00EF, 0x00F0, 0x0146, 0x014D, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x0169, 0x00F8, 0x0173, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x0138} }, { "ISO-8859-13", "ISO 8859-13", 109, { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F, 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, 0x00A0, 0x201D, 0x00A2, 0x00A3, 0x00A4, 0x201E, 0x00A6, 0x00A7, 0x00D8, 0x00A9, 0x0156, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00C6, 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x201C, 0x00B5, 0x00B6, 0x00B7, 0x00F8, 0x00B9, 0x0157, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00E6, 0x0104, 0x012E, 0x0100, 0x0106, 0x00C4, 0x00C5, 0x0118, 0x0112, 0x010C, 0x00C9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012A, 0x013B, 0x0160, 0x0143, 0x0145, 0x00D3, 0x014C, 0x00D5, 0x00D6, 0x00D7, 0x0172, 0x0141, 0x015A, 0x016A, 0x00DC, 0x017B, 0x017D, 0x00DF, 0x0105, 0x012F, 0x0101, 0x0107, 0x00E4, 0x00E5, 0x0119, 0x0113, 0x010D, 0x00E9, 0x017A, 0x0117, 0x0123, 0x0137, 0x012B, 0x013C, 0x0161, 0x0144, 0x0146, 0x00F3, 0x014D, 0x00F5, 0x00F6, 0x00F7, 0x0173, 0x0142, 0x015B, 0x016B, 0x00FC, 0x017C, 0x017E, 0x2019} }, { "ISO-8859-14", "ISO 8859-14", 110, { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F, 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, 0x00A0, 0x1E02, 0x1E03, 0x00A3, 0x010A, 0x010B, 0x1E0A, 0x00A7, 0x1E80, 0x00A9, 0x1E82, 0x1E0B, 0x1EF2, 0x00AD, 0x00AE, 0x0178, 0x1E1E, 0x1E1F, 0x0120, 0x0121, 0x1E40, 0x1E41, 0x00B6, 0x1E56, 0x1E81, 0x1E57, 0x1E83, 0x1E60, 0x1EF3, 0x1E84, 0x1E85, 0x1E61, 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7, 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF, 0x0174, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x1E6A, 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x0176, 0x00DF, 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF, 0x0175, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x1E6B, 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x0177, 0x00FF} }, { "ISO-8859-16", "ISO 8859-16", 112, { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F, 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, 0x00A0, 0x0104, 0x0105, 0x0141, 0x20AC, 0x201E, 0x0160, 0x00A7, 0x0161, 0x00A9, 0x0218, 0x00AB, 0x0179, 0x00AD, 0x017A, 0x017B, 0x00B0, 0x00B1, 0x010C, 0x0142, 0x017D, 0x201D, 0x00B6, 0x00B7, 0x017E, 0x010D, 0x0219, 0x00BB, 0x0152, 0x0153, 0x0178, 0x017C, 0x00C0, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x0106, 0x00C6, 0x00C7, 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF, 0x0110, 0x0143, 0x00D2, 0x00D3, 0x00D4, 0x0150, 0x00D6, 0x015A, 0x0170, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x0118, 0x021A, 0x00DF, 0x00E0, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x0107, 0x00E6, 0x00E7, 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF, 0x0111, 0x0144, 0x00F2, 0x00F3, 0x00F4, 0x0151, 0x00F6, 0x015B, 0x0171, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x0119, 0x021B, 0x00FF} }, // next bits generated again from tables on the Unicode 3.0 CD. // $ for a in CP* ; do ( awk '/^0x[89ABCDEF]/{ print $1, $2 }' < $a ) | sort | sed -e 's/#UNDEF.*$/0xFFFD/' | cut -c6- | paste '-d ' - - - - - - - - | sed -e 's/ /, /g' -e 's/$/,/' -e '$ s/,$/} },/' -e '1 s/^/{ /' > ~/tmp/$a ; done { "CP 850", "IBM 850", 2009, { 0x00C7, 0x00FC, 0x00E9, 0x00E2, 0x00E4, 0x00E0, 0x00E5, 0x00E7, 0x00EA, 0x00EB, 0x00E8, 0x00EF, 0x00EE, 0x00EC, 0x00C4, 0x00C5, 0x00C9, 0x00E6, 0x00C6, 0x00F4, 0x00F6, 0x00F2, 0x00FB, 0x00F9, 0x00FF, 0x00D6, 0x00DC, 0x00F8, 0x00A3, 0x00D8, 0x00D7, 0x0192, 0x00E1, 0x00ED, 0x00F3, 0x00FA, 0x00F1, 0x00D1, 0x00AA, 0x00BA, 0x00BF, 0x00AE, 0x00AC, 0x00BD, 0x00BC, 0x00A1, 0x00AB, 0x00BB, 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x00C1, 0x00C2, 0x00C0, 0x00A9, 0x2563, 0x2551, 0x2557, 0x255D, 0x00A2, 0x00A5, 0x2510, 0x2514, 0x2534, 0x252C, 0x251C, 0x2500, 0x253C, 0x00E3, 0x00C3, 0x255A, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256C, 0x00A4, 0x00F0, 0x00D0, 0x00CA, 0x00CB, 0x00C8, 0x0131, 0x00CD, 0x00CE, 0x00CF, 0x2518, 0x250C, 0x2588, 0x2584, 0x00A6, 0x00CC, 0x2580, 0x00D3, 0x00DF, 0x00D4, 0x00D2, 0x00F5, 0x00D5, 0x00B5, 0x00FE, 0x00DE, 0x00DA, 0x00DB, 0x00D9, 0x00FD, 0x00DD, 0x00AF, 0x00B4, 0x00AD, 0x00B1, 0x2017, 0x00BE, 0x00B6, 0x00A7, 0x00F7, 0x00B8, 0x00B0, 0x00A8, 0x00B7, 0x00B9, 0x00B3, 0x00B2, 0x25A0, 0x00A0} }, { "CP 874", "CP 874", 0, //### what is the mib? { 0x20AC, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x2026, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x00A0, 0x0E01, 0x0E02, 0x0E03, 0x0E04, 0x0E05, 0x0E06, 0x0E07, 0x0E08, 0x0E09, 0x0E0A, 0x0E0B, 0x0E0C, 0x0E0D, 0x0E0E, 0x0E0F, 0x0E10, 0x0E11, 0x0E12, 0x0E13, 0x0E14, 0x0E15, 0x0E16, 0x0E17, 0x0E18, 0x0E19, 0x0E1A, 0x0E1B, 0x0E1C, 0x0E1D, 0x0E1E, 0x0E1F, 0x0E20, 0x0E21, 0x0E22, 0x0E23, 0x0E24, 0x0E25, 0x0E26, 0x0E27, 0x0E28, 0x0E29, 0x0E2A, 0x0E2B, 0x0E2C, 0x0E2D, 0x0E2E, 0x0E2F, 0x0E30, 0x0E31, 0x0E32, 0x0E33, 0x0E34, 0x0E35, 0x0E36, 0x0E37, 0x0E38, 0x0E39, 0x0E3A, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x0E3F, 0x0E40, 0x0E41, 0x0E42, 0x0E43, 0x0E44, 0x0E45, 0x0E46, 0x0E47, 0x0E48, 0x0E49, 0x0E4A, 0x0E4B, 0x0E4C, 0x0E4D, 0x0E4E, 0x0E4F, 0x0E50, 0x0E51, 0x0E52, 0x0E53, 0x0E54, 0x0E55, 0x0E56, 0x0E57, 0x0E58, 0x0E59, 0x0E5A, 0x0E5B, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD} }, { "IBM 866", "IBM 866", 2086, { 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F, 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, 0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F, 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, 0x043F, 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2561, 0x2562, 0x2556, 0x2555, 0x2563, 0x2551, 0x2557, 0x255D, 0x255C, 0x255B, 0x2510, 0x2514, 0x2534, 0x252C, 0x251C, 0x2500, 0x253C, 0x255E, 0x255F, 0x255A, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256C, 0x2567, 0x2568, 0x2564, 0x2565, 0x2559, 0x2558, 0x2552, 0x2553, 0x256B, 0x256A, 0x2518, 0x250C, 0x2588, 0x2584, 0x258C, 0x2590, 0x2580, 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447, 0x0448, 0x0449, 0x044A, 0x044B, 0x044C, 0x044D, 0x044E, 0x044F, 0x0401, 0x0451, 0x0404, 0x0454, 0x0407, 0x0457, 0x040E, 0x045E, 0x00B0, 0x2219, 0x00B7, 0x221A, 0x2116, 0x00A4, 0x25A0, 0x00A0} }, { "windows-1250", "CP 1250", 2250, { 0x20AC, 0xFFFD, 0x201A, 0xFFFD, 0x201E, 0x2026, 0x2020, 0x2021, 0xFFFD, 0x2030, 0x0160, 0x2039, 0x015A, 0x0164, 0x017D, 0x0179, 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 0xFFFD, 0x2122, 0x0161, 0x203A, 0x015B, 0x0165, 0x017E, 0x017A, 0x00A0, 0x02C7, 0x02D8, 0x0141, 0x00A4, 0x0104, 0x00A6, 0x00A7, 0x00A8, 0x00A9, 0x015E, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x017B, 0x00B0, 0x00B1, 0x02DB, 0x0142, 0x00B4, 0x00B5, 0x00B6, 0x00B7, 0x00B8, 0x0105, 0x015F, 0x00BB, 0x013D, 0x02DD, 0x013E, 0x017C, 0x0154, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x0139, 0x0106, 0x00C7, 0x010C, 0x00C9, 0x0118, 0x00CB, 0x011A, 0x00CD, 0x00CE, 0x010E, 0x0110, 0x0143, 0x0147, 0x00D3, 0x00D4, 0x0150, 0x00D6, 0x00D7, 0x0158, 0x016E, 0x00DA, 0x0170, 0x00DC, 0x00DD, 0x0162, 0x00DF, 0x0155, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x013A, 0x0107, 0x00E7, 0x010D, 0x00E9, 0x0119, 0x00EB, 0x011B, 0x00ED, 0x00EE, 0x010F, 0x0111, 0x0144, 0x0148, 0x00F3, 0x00F4, 0x0151, 0x00F6, 0x00F7, 0x0159, 0x016F, 0x00FA, 0x0171, 0x00FC, 0x00FD, 0x0163, 0x02D9} }, { "windows-1251", "CP 1251", 2251, { 0x0402, 0x0403, 0x201A, 0x0453, 0x201E, 0x2026, 0x2020, 0x2021, 0x20AC, 0x2030, 0x0409, 0x2039, 0x040A, 0x040C, 0x040B, 0x040F, 0x0452, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 0xFFFD, 0x2122, 0x0459, 0x203A, 0x045A, 0x045C, 0x045B, 0x045F, 0x00A0, 0x040E, 0x045E, 0x0408, 0x00A4, 0x0490, 0x00A6, 0x00A7, 0x0401, 0x00A9, 0x0404, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x0407, 0x00B0, 0x00B1, 0x0406, 0x0456, 0x0491, 0x00B5, 0x00B6, 0x00B7, 0x0451, 0x2116, 0x0454, 0x00BB, 0x0458, 0x0405, 0x0455, 0x0457, 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F, 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, 0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F, 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, 0x043F, 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447, 0x0448, 0x0449, 0x044A, 0x044B, 0x044C, 0x044D, 0x044E, 0x044F} }, { "windows-1252", "CP 1252", 2252, { 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0xFFFD, 0x017D, 0xFFFD, 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0xFFFD, 0x017E, 0x0178, 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, 0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF, 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF, 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7, 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF, 0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7, 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF, 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF, 0x00F0, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7, 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x00FF} }, { "windows-1253", "CP 1253", 2253, { 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, 0xFFFD, 0x2030, 0xFFFD, 0x2039, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 0xFFFD, 0x2122, 0xFFFD, 0x203A, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x00A0, 0x0385, 0x0386, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, 0x00A8, 0x00A9, 0xFFFD, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x2015, 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x0384, 0x00B5, 0x00B6, 0x00B7, 0x0388, 0x0389, 0x038A, 0x00BB, 0x038C, 0x00BD, 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x039B, 0x039C, 0x039D, 0x039E, 0x039F, 0x03A0, 0x03A1, 0xFFFD, 0x03A3, 0x03A4, 0x03A5, 0x03A6, 0x03A7, 0x03A8, 0x03A9, 0x03AA, 0x03AB, 0x03AC, 0x03AD, 0x03AE, 0x03AF, 0x03B0, 0x03B1, 0x03B2, 0x03B3, 0x03B4, 0x03B5, 0x03B6, 0x03B7, 0x03B8, 0x03B9, 0x03BA, 0x03BB, 0x03BC, 0x03BD, 0x03BE, 0x03BF, 0x03C0, 0x03C1, 0x03C2, 0x03C3, 0x03C4, 0x03C5, 0x03C6, 0x03C7, 0x03C8, 0x03C9, 0x03CA, 0x03CB, 0x03CC, 0x03CD, 0x03CE, 0xFFFD} }, { "windows-1254", "CP 1254", 2254, { 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0xFFFD, 0xFFFD, 0x0178, 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, 0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF, 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF, 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7, 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF, 0x011E, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7, 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x0130, 0x015E, 0x00DF, 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF, 0x011F, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7, 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x0131, 0x015F, 0x00FF} }, { "windows-1255", "CP 1255", 2255, { 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, 0x02C6, 0x2030, 0xFFFD, 0x2039, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 0x02DC, 0x2122, 0xFFFD, 0x203A, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x20AA, 0x00A5, 0x00A6, 0x00A7, 0x00A8, 0x00A9, 0x00D7, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF, 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, 0x00B8, 0x00B9, 0x00F7, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF, 0x05B0, 0x05B1, 0x05B2, 0x05B3, 0x05B4, 0x05B5, 0x05B6, 0x05B7, 0x05B8, 0x05B9, 0xFFFD, 0x05BB, 0x05BC, 0x05BD, 0x05BE, 0x05BF, 0x05C0, 0x05C1, 0x05C2, 0x05C3, 0x05F0, 0x05F1, 0x05F2, 0x05F3, 0x05F4, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x05D0, 0x05D1, 0x05D2, 0x05D3, 0x05D4, 0x05D5, 0x05D6, 0x05D7, 0x05D8, 0x05D9, 0x05DA, 0x05DB, 0x05DC, 0x05DD, 0x05DE, 0x05DF, 0x05E0, 0x05E1, 0x05E2, 0x05E3, 0x05E4, 0x05E5, 0x05E6, 0x05E7, 0x05E8, 0x05E9, 0x05EA, 0xFFFD, 0xFFFD, 0x200E, 0x200F, 0xFFFD} }, { "windows-1256", "CP 1256", 2256, { 0x20AC, 0x067E, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, 0x02C6, 0x2030, 0x0679, 0x2039, 0x0152, 0x0686, 0x0698, 0x0688, 0x06AF, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 0x06A9, 0x2122, 0x0691, 0x203A, 0x0153, 0x200C, 0x200D, 0x06BA, 0x00A0, 0x060C, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, 0x00A8, 0x00A9, 0x06BE, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF, 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, 0x00B8, 0x00B9, 0x061B, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x061F, 0x06C1, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627, 0x0628, 0x0629, 0x062A, 0x062B, 0x062C, 0x062D, 0x062E, 0x062F, 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x00D7, 0x0637, 0x0638, 0x0639, 0x063A, 0x0640, 0x0641, 0x0642, 0x0643, 0x00E0, 0x0644, 0x00E2, 0x0645, 0x0646, 0x0647, 0x0648, 0x00E7, 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x0649, 0x064A, 0x00EE, 0x00EF, 0x064B, 0x064C, 0x064D, 0x064E, 0x00F4, 0x064F, 0x0650, 0x00F7, 0x0651, 0x00F9, 0x0652, 0x00FB, 0x00FC, 0x200E, 0x200F, 0x06D2} }, { "windows-1257", "CP 1257", 2257, { 0x20AC, 0xFFFD, 0x201A, 0xFFFD, 0x201E, 0x2026, 0x2020, 0x2021, 0xFFFD, 0x2030, 0xFFFD, 0x2039, 0xFFFD, 0x00A8, 0x02C7, 0x00B8, 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 0xFFFD, 0x2122, 0xFFFD, 0x203A, 0xFFFD, 0x00AF, 0x02DB, 0xFFFD, 0x00A0, 0xFFFD, 0x00A2, 0x00A3, 0x00A4, 0xFFFD, 0x00A6, 0x00A7, 0x00D8, 0x00A9, 0x0156, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00C6, 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, 0x00F8, 0x00B9, 0x0157, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00E6, 0x0104, 0x012E, 0x0100, 0x0106, 0x00C4, 0x00C5, 0x0118, 0x0112, 0x010C, 0x00C9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012A, 0x013B, 0x0160, 0x0143, 0x0145, 0x00D3, 0x014C, 0x00D5, 0x00D6, 0x00D7, 0x0172, 0x0141, 0x015A, 0x016A, 0x00DC, 0x017B, 0x017D, 0x00DF, 0x0105, 0x012F, 0x0101, 0x0107, 0x00E4, 0x00E5, 0x0119, 0x0113, 0x010D, 0x00E9, 0x017A, 0x0117, 0x0123, 0x0137, 0x012B, 0x013C, 0x0161, 0x0144, 0x0146, 0x00F3, 0x014D, 0x00F5, 0x00F6, 0x00F7, 0x0173, 0x0142, 0x015B, 0x016B, 0x00FC, 0x017C, 0x017E, 0x02D9} }, { "windows-1258", "CP 1258", 2258, { 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, 0x02C6, 0x2030, 0xFFFD, 0x2039, 0x0152, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 0x02DC, 0x2122, 0xFFFD, 0x203A, 0x0153, 0xFFFD, 0xFFFD, 0x0178, 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, 0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF, 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF, 0x00C0, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x00C5, 0x00C6, 0x00C7, 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x0300, 0x00CD, 0x00CE, 0x00CF, 0x0110, 0x00D1, 0x0309, 0x00D3, 0x00D4, 0x01A0, 0x00D6, 0x00D7, 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x01AF, 0x0303, 0x00DF, 0x00E0, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x00E5, 0x00E6, 0x00E7, 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x0301, 0x00ED, 0x00EE, 0x00EF, 0x0111, 0x00F1, 0x0323, 0x00F3, 0x00F4, 0x01A1, 0x00F6, 0x00F7, 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x01B0, 0x20AB, 0x00FF} }, { "Apple Roman", "Apple Roman", 0, { 0x00C4, 0x00C5, 0x00C7, 0x00C9, 0x00D1, 0x00D6, 0x00DC, 0x00E1, 0x00E0, 0x00E2, 0x00E4, 0x00E3, 0x00E5, 0x00E7, 0x00E9, 0x00E8, 0x00EA, 0x00EB, 0x00ED, 0x00EC, 0x00EE, 0x00EF, 0x00F1, 0x00F3, 0x00F2, 0x00F4, 0x00F6, 0x00F5, 0x00FA, 0x00F9, 0x00FB, 0x00FC, 0x2020, 0x00B0, 0x00A2, 0x00A3, 0x00A7, 0x2022, 0x00B6, 0x00DF, 0x00AE, 0x00A9, 0x2122, 0x00B4, 0x00A8, 0x2260, 0x00C6, 0x00D8, 0x221E, 0x00B1, 0x2264, 0x2265, 0x00A5, 0x00B5, 0x2202, 0x2211, 0x220F, 0x03C0, 0x222B, 0x00AA, 0x00BA, 0x03A9, 0x00E6, 0x00F8, 0x00BF, 0x00A1, 0x00AC, 0x221A, 0x0192, 0x2248, 0x2206, 0x00AB, 0x00BB, 0x2026, 0x00A0, 0x00C0, 0x00C3, 0x00D5, 0x0152, 0x0153, 0x2013, 0x2014, 0x201C, 0x201D, 0x2018, 0x2019, 0x00F7, 0x25CA, 0x00FF, 0x0178, 0x2044, 0x20AC, 0x2039, 0x203A, 0xFB01, 0xFB02, 0x2021, 0x00B7, 0x201A, 0x201E, 0x2030, 0x00C2, 0x00CA, 0x00C1, 0x00CB, 0x00C8, 0x00CD, 0x00CE, 0x00CF, 0x00CC, 0x00D3, 0x00D4, 0xF8FF, 0x00D2, 0x00DA, 0x00DB, 0x00D9, 0x0131, 0x02C6, 0x02DC, 0x00AF, 0x02D8, 0x02D9, 0x02DA, 0x00B8, 0x02DD, 0x02DB, 0x02C7} }, // This one is based on the charmap file // /usr/share/i18n/charmaps/SAMI-WS2.gz, which is manually adapted // to this format by Børre Gaup { "WINSAMI2", "WS2", 0, { 0x20AC, 0xFFFD, 0x010C, 0x0192, 0x010D, 0x01B7, 0x0292, 0x01EE, 0x01EF, 0x0110, 0x0160, 0x2039, 0x0152, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 0x0111, 0x01E6, 0x0161, 0x203A, 0x0153, 0xFFFD, 0xFFFD, 0x0178, 0x00A0, 0x01E7, 0x01E4, 0x00A3, 0x00A4, 0x01E5, 0x00A6, 0x00A7, 0x00A8, 0x00A9, 0x021E, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x021F, 0x00B0, 0x00B1, 0x01E8, 0x01E9, 0x00B4, 0x00B5, 0x00B6, 0x00B7, 0x014A, 0x014B, 0x0166, 0x00BB, 0x0167, 0x00BD, 0x017D, 0x017E, 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7, 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF, 0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7, 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF, 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF, 0x00F0, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7, 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x00FF} }, // this one is generated from the charmap file located in /usr/share/i18n/charmaps // on most Linux distributions. The thai character set tis620 is byte by byte equivalent // to iso8859-11, so we name it 8859-11 here, but recognise the name tis620 too. // $ for A in 8 9 A B C D E F ; do for B in 0 1 2 3 4 5 6 7 8 9 A B C D E F ; do echo x${A}${B} 0xFFFD ; done ; done > /tmp/digits ; ( cut -c25- < TIS-620 ; cat /tmp/digits ) | awk '/^x[89ABCDEF]/{ print $1, $2 }' | sed -e 's///' | sort | uniq -w4 | cut -c5- | paste '-d ' - - - - - - - - | sed -e 's/ /, /g' -e 's/$/,/' -e '$ s/,$/} },/' -e '1 s/^/{ /' > ~/tmp/tis-620 { "TIS-620", "ISO 8859-11", 2259, // Thai character set mib enum taken from tis620 (which is byte by byte equivalent) { 0x20AC, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x2026, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x0E01, 0x0E02, 0x0E03, 0x0E04, 0x0E05, 0x0E06, 0x0E07, 0x0E08, 0x0E09, 0x0E0A, 0x0E0B, 0x0E0C, 0x0E0D, 0x0E0E, 0x0E0F, 0x0E10, 0x0E11, 0x0E12, 0x0E13, 0x0E14, 0x0E15, 0x0E16, 0x0E17, 0x0E18, 0x0E19, 0x0E1A, 0x0E1B, 0x0E1C, 0x0E1D, 0x0E1E, 0x0E1F, 0x0E20, 0x0E21, 0x0E22, 0x0E23, 0x0E24, 0x0E25, 0x0E26, 0x0E27, 0x0E28, 0x0E29, 0x0E2A, 0x0E2B, 0x0E2C, 0x0E2D, 0x0E2E, 0x0E2F, 0x0E30, 0x0E31, 0x0E32, 0x0E33, 0x0E34, 0x0E35, 0x0E36, 0x0E37, 0x0E38, 0x0E39, 0x0E3A, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x0E3F, 0x0E40, 0x0E41, 0x0E42, 0x0E43, 0x0E44, 0x0E45, 0x0E46, 0x0E47, 0x0E48, 0x0E49, 0x0E4A, 0x0E4B, 0x0E4C, 0x0E4D, 0x0E4E, 0x0E4F, 0x0E50, 0x0E51, 0x0E52, 0x0E53, 0x0E54, 0x0E55, 0x0E56, 0x0E57, 0x0E58, 0x0E59, 0x0E5A, 0x0E5B, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD } }, /* Name: hp-roman8 [HP-PCL5,RFC1345,KXS2] MIBenum: 2004 Source: LaserJet IIP Printer User's Manual, HP part no 33471-90901, Hewlet-Packard, June 1989. Alias: roman8 Alias: r8 Alias: csHPRoman8 */ { "Roman8", "HP-Roman8", 2004, { 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x00A0, 0x00C0, 0x00C2, 0x00C8, 0x00CA, 0x00CB, 0x00CE, 0x00CF, 0x00B4, 0x02CB, 0x02C6, 0x00A8, 0x02DC, 0x00D9, 0x00DB, 0x20A4, 0x00AF, 0x00DD, 0x00FD, 0x00B0, 0x00C7, 0x00E7, 0x00D1, 0x00F1, 0x00A1, 0x00BF, 0x00A4, 0x00A3, 0x00A5, 0x00A7, 0x0192, 0x00A2, 0x00E2, 0x00EA, 0x00F4, 0x00FB, 0x00E1, 0x00E9, 0x00F3, 0x00FA, 0x00E0, 0x00E8, 0x00F2, 0x00F9, 0x00E4, 0x00EB, 0x00F6, 0x00FC, 0x00C5, 0x00EE, 0x00D8, 0x00C6, 0x00E5, 0x00ED, 0x00F8, 0x00E6, 0x00C4, 0x00EC, 0x00D6, 0x00DC, 0x00C9, 0x00EF, 0x00DF, 0x00D4, 0x00C1, 0x00C3, 0x00E3, 0x00D0, 0x00F0, 0x00CD, 0x00CC, 0x00D3, 0x00D2, 0x00D5, 0x00F5, 0x0160, 0x0161, 0x00DA, 0x0178, 0x00FF, 0x00DE, 0x00FE, 0x00B7, 0x00B5, 0x00B6, 0x00BE, 0x2014, 0x00BC, 0x00BD, 0x00AA, 0x00BA, 0x00AB, 0x25A0, 0x00BB, 0x00B1, 0xFFFD } } // if you add more chacater sets at the end, change LAST_MIB above }; TQSimpleTextCodec::TQSimpleTextCodec( int i ) : TQTextCodec(), forwardIndex( i ) { reverseMap = 0; } TQSimpleTextCodec::~TQSimpleTextCodec() { delete reverseMap; } void TQSimpleTextCodec::buildReverseMap() { if ( !reverseMap ) { TQMemArray **map = &((TQSimpleTextCodec *)this)->reverseMap; int m = 0; int i = 0; while( i < 128 ) { if ( unicodevalues[forwardIndex].values[i] > m && unicodevalues[forwardIndex].values[i] < 0xfffd ) m = unicodevalues[forwardIndex].values[i]; i++; } m++; *map = new TQMemArray( m ); for( i = 0; i < 128 && i < m; i++ ) (**map)[i] = (char)i; for( ;i < m; i++ ) (**map)[i] = 0; for( i=128; i<256; i++ ) { int u = unicodevalues[forwardIndex].values[i-128]; if ( u < m ) (**map)[u] = (char)(unsigned char)(i); } } } TQString TQSimpleTextCodec::toUnicode(const char* chars, int len) const { if ( len <= 0 || chars == 0 ) return TQString::null; const unsigned char * c = (const unsigned char *)chars; int i; for ( i = 0; i < len; i++ ) if ( c[i] == '\0' ) { len = i; break; } TQString r; r.setUnicode(0, len); TQChar* uc = (TQChar*)r.unicode(); // const_cast for ( i = 0; i < len; i++ ) { if ( c[i] > 127 ) uc[i] = unicodevalues[forwardIndex].values[c[i]-128]; else uc[i] = c[i]; } return r; } TQCString TQSimpleTextCodec::fromUnicode(const TQString& uc, int& len ) const { if ( !reverseMap ) ((TQSimpleTextCodec *)this)->buildReverseMap(); if ( len <0 || len > (int)uc.length() ) len = uc.length(); TQCString r( len+1 ); int i = len; int u; const TQChar* ucp = uc.unicode(); unsigned char* rp = (unsigned char *)r.data(); unsigned char* rmp = reverseMap->data(); int rmsize = (int) reverseMap->size(); while( i-- ) { u = ucp->unicode(); *rp = u < 128 ? u : (( u < rmsize ) ? (*(rmp+u)) : '?' ); if ( *rp == 0 ) *rp = '?'; rp++; ucp++; } r[len] = 0; return r; } void TQSimpleTextCodec::fromUnicode( const TQChar *in, unsigned short *out, int length ) const { if ( !reverseMap ) ((TQSimpleTextCodec *)this)->buildReverseMap(); unsigned char* rmp = reverseMap->data(); int rmsize = (int) reverseMap->size(); while ( length-- ) { unsigned short u = in->unicode(); *out = u < 128 ? u : (( u < rmsize ) ? (*(rmp+u)) : 0 ); ++in; ++out; } } unsigned short TQSimpleTextCodec::characterFromUnicode(const TQString &str, int pos) const { if ( !reverseMap ) ((TQSimpleTextCodec *)this)->buildReverseMap(); unsigned short u = str[pos].unicode(); unsigned char* rmp = reverseMap->data(); int rmsize = (int) reverseMap->size(); return u < 128 ? u : (( u < rmsize ) ? (*(rmp+u)) : 0 ); } bool TQSimpleTextCodec::canEncode( TQChar ch ) const { if ( !reverseMap ) ((TQSimpleTextCodec *)this)->buildReverseMap(); unsigned short u = ch.unicode(); unsigned char* rmp = reverseMap->data(); int rmsize = (int) reverseMap->size(); return u < 128 ? TRUE : (( u < rmsize ) ? (*(rmp+u) != 0) : FALSE ); } const char* TQSimpleTextCodec::name() const { return unicodevalues[forwardIndex].cs; } const char* TQSimpleTextCodec::mimeName() const { return unicodevalues[forwardIndex].mime; } int TQSimpleTextCodec::mibEnum() const { return unicodevalues[forwardIndex].mib; } int TQSimpleTextCodec::heuristicNameMatch(const char* hint) const { if ( tqstricmp( hint, mimeName() ) == 0 ) return 10000; // return a large value if ( hint[0]=='k' ) { TQCString lhint = TQCString(hint).lower(); // Help people with messy fonts if ( lhint == "koi8-1" ) return TQTextCodec::heuristicNameMatch("koi8-r")-1; if ( lhint == "koi8-ru" ) return TQTextCodec::heuristicNameMatch("koi8-r")-1; } else if ( hint[0] == 't' && mibEnum() == 2259 /* iso8859-11 */ ) { // 8859-11 and tis620 are byte by byte equivalent int i = simpleHeuristicNameMatch("tis620-0", hint); if( !i ) i = simpleHeuristicNameMatch("tis-620", hint); if( i ) return i; } else if ( mibEnum() == 82 /* ISO 8859-6 */ ) { int i = simpleHeuristicNameMatch("ISO 8859-6-I", hint); if ( i ) return i; } return TQTextCodec::heuristicNameMatch(hint); } int TQSimpleTextCodec::heuristicContentMatch(const char* chars, int len) const { if ( len<1 || !chars ) return -1; int i = 0; const uchar * c = (const unsigned char *)chars; int r = 0; while( i= 128 ) { if ( unicodevalues[forwardIndex].values[(*c)-128] == 0xfffd ) return -1; } if ( (*c >= ' ' && *c < 127) || *c == '\n' || *c == '\t' || *c == '\r' ) r++; i++; c++; } if ( mibEnum()==4 ) r+=1; return r; } #endif class TQLatin1Codec : public TQTextCodec { public: #if !defined(Q_NO_USING_KEYWORD) using TQTextCodec::fromUnicode; using TQTextCodec::toUnicode; #endif TQString toUnicode(const char* chars, int len) const; TQCString fromUnicode(const TQString& uc, int& lenInOut ) const; void fromUnicode( const TQChar *in, unsigned short *out, int length ) const; unsigned short characterFromUnicode(const TQString &str, int pos) const; const char* name() const; const char* mimeName() const; int mibEnum() const; int heuristicContentMatch(const char* chars, int len) const; private: int forwardIndex; }; TQString TQLatin1Codec::toUnicode(const char* chars, int len) const { if ( chars == 0 ) return TQString::null; return TQString::fromLatin1(chars, len); } TQCString TQLatin1Codec::fromUnicode(const TQString& uc, int& len ) const { if ( len <0 || len > (int)uc.length() ) len = uc.length(); TQCString r( len+1 ); char *d = r.data(); int i = 0; const TQChar *ch = uc.unicode(); while ( i < len ) { d[i] = ch->row() ? '?' : ch->cell(); i++; ch++; } r[len] = 0; return r; } void TQLatin1Codec::fromUnicode( const TQChar *in, unsigned short *out, int length ) const { while ( length-- ) { *out = in->row() ? 0 : in->cell(); ++in; ++out; } } unsigned short TQLatin1Codec::characterFromUnicode(const TQString &str, int pos) const { const TQChar *ch = str.unicode() + pos; if (ch->row()) return 0; return (unsigned short) ch->cell(); } const char* TQLatin1Codec::name() const { return "ISO 8859-1"; } const char* TQLatin1Codec::mimeName() const { return "ISO-8859-1"; } int TQLatin1Codec::mibEnum() const { return 4; } int TQLatin1Codec::heuristicContentMatch(const char* chars, int len) const { if ( len<1 || !chars ) return -1; int i = 0; const uchar * c = (const unsigned char *)chars; int r = 0; while( i= 0x80 && *c < 0xa0 ) return -1; if ( (*c >= ' ' && *c < 127) || *c == '\n' || *c == '\t' || *c == '\r' ) r++; i++; c++; } if ( this == (const TQTextCodec *)codecForLocale() ) r += 5; return r; } class TQLatin15Codec: public TQLatin1Codec { public: TQString toUnicode(const char* chars, int len) const; #if !defined(Q_NO_USING_KEYWORD) using TQLatin1Codec::fromUnicode; #endif TQCString fromUnicode(const TQString& uc, int& lenInOut ) const; void fromUnicode( const TQChar *in, unsigned short *out, int length ) const; unsigned short characterFromUnicode(const TQString &str, int pos) const; const char* name() const; const char* mimeName() const; int mibEnum() const; private: int forwardIndex; }; TQString TQLatin15Codec::toUnicode(const char* chars, int len) const { if ( chars == 0 ) return TQString::null; TQString str = TQString::fromLatin1(chars, len); TQChar *uc = (TQChar *)str.unicode(); while( len-- ) { switch( uc->unicode() ) { case 0xa4: *uc = 0x20ac; break; case 0xa6: *uc = 0x0160; break; case 0xa8: *uc = 0x0161; break; case 0xb4: *uc = 0x017d; break; case 0xb8: *uc = 0x017e; break; case 0xbc: *uc = 0x0152; break; case 0xbd: *uc = 0x0153; break; case 0xbe: *uc = 0x0178; break; default: break; } uc++; } return str; } static inline unsigned char latin15CharFromUnicode( unsigned short uc, bool replacement = TRUE ) { uchar c; if ( uc < 0x0100 ) { if ( uc > 0xa3 && uc < 0xbf ) { switch( uc ) { case 0xa4: case 0xa6: case 0xa8: case 0xb4: case 0xb8: case 0xbc: case 0xbd: case 0xbe: c = replacement ? '?' : 0; break; default: c = (unsigned char) uc; break; } } else { c = (unsigned char) uc; } } else { if ( uc == 0x20ac ) c = 0xa4; else if ( (uc & 0xff00) == 0x0100 ) { switch( uc ) { case 0x0160: c = 0xa6; break; case 0x0161: c = 0xa8; break; case 0x017d: c = 0xb4; break; case 0x017e: c = 0xb8; break; case 0x0152: c = 0xbc; break; case 0x0153: c = 0xbd; break; case 0x0178: c = 0xbe; break; default: c = replacement ? '?' : 0; } } else { c = replacement ? '?' : 0; } } return c; } void TQLatin15Codec::fromUnicode( const TQChar *in, unsigned short *out, int length ) const { while ( length-- ) { *out = latin15CharFromUnicode( in->unicode(), FALSE ); ++in; ++out; } } TQCString TQLatin15Codec::fromUnicode(const TQString& uc, int& len ) const { if ( len <0 || len > (int)uc.length() ) len = uc.length(); TQCString r( len+1 ); char *d = r.data(); int i = 0; const TQChar *ch = uc.unicode(); while ( i < len ) { d[i] = latin15CharFromUnicode( ch->unicode() ); i++; ch++; } r[len] = 0; return r; } unsigned short TQLatin15Codec::characterFromUnicode(const TQString &str, int pos) const { return latin15CharFromUnicode( str.unicode()[pos].unicode(), FALSE ); } const char* TQLatin15Codec::name() const { return "ISO 8859-15"; } const char* TQLatin15Codec::mimeName() const { return "ISO-8859-15"; } int TQLatin15Codec::mibEnum() const { return 111; } static TQTextCodec *checkForCodec(const char *name) { TQTextCodec *c = TQTextCodec::codecForName(name); if (!c) { const char *at = strchr(name, '@'); if (at) { TQCString n(name, at - name + 1); c = TQTextCodec::codecForName(n.data()); } } return c; } /* the next function is implicitely thread safe, as they are only called by setup() which uses a mutex. */ static void setupLocaleMapper() { #ifdef Q_OS_WIN32 localeMapper = TQTextCodec::codecForName( "System" ); #else #if defined (_XOPEN_UNIX) && !defined(Q_OS_QNX6) && !defined(Q_OS_OSF) && !defined(Q_OS_MAC) char *charset = nl_langinfo (CODESET); if ( charset ) localeMapper = TQTextCodec::codecForName( charset ); #endif if ( !localeMapper ) { // Very poorly defined and followed standards causes lots of code // to try to get all the cases... // Try to determine locale codeset from locale name assigned to // LC_CTYPE category. // First part is getting that locale name. First try setlocale() which // definitely knows it, but since we cannot fully trust it, get ready // to fall back to environment variables. char * ctype = tqstrdup( setlocale( LC_CTYPE, 0 ) ); // Get the first nonempty value from $LC_ALL, $LC_CTYPE, and $LANG // environment variables. char * lang = tqstrdup( getenv("LC_ALL") ); if ( !lang || lang[0] == 0 || strcmp( lang, "C" ) == 0 ) { if ( lang ) delete [] lang; lang = tqstrdup( getenv("LC_CTYPE") ); } if ( !lang || lang[0] == 0 || strcmp( lang, "C" ) == 0 ) { if ( lang ) delete [] lang; lang = tqstrdup( getenv("LANG") ); } // Now try these in order: // 1. CODESET from ctype if it contains a .CODESET part (e.g. en_US.ISO8859-15) // 2. CODESET from lang if it contains a .CODESET part // 3. ctype (maybe the locale is named "ISO-8859-1" or something) // 4. locale (ditto) // 5. check for "@euro" // 6. guess locale from ctype unless ctype is "C" // 7. guess locale from lang // 1. CODESET from ctype if it contains a .CODESET part (e.g. en_US.ISO8859-15) char * codeset = ctype ? strchr( ctype, '.' ) : 0; if ( codeset && *codeset == '.' ) localeMapper = checkForCodec( codeset + 1 ); // 2. CODESET from lang if it contains a .CODESET part codeset = lang ? strchr( lang, '.' ) : 0; if ( !localeMapper && codeset && *codeset == '.' ) localeMapper = checkForCodec( codeset + 1 ); // 3. ctype (maybe the locale is named "ISO-8859-1" or something) if ( !localeMapper && ctype && *ctype != 0 && strcmp (ctype, "C") != 0 ) localeMapper = checkForCodec( ctype ); // 4. locale (ditto) if ( !localeMapper && lang && *lang != 0 ) localeMapper = checkForCodec( lang ); // 5. "@euro" if ( ( !localeMapper && ctype && strstr( ctype, "@euro" ) ) || (lang && strstr( lang, "@euro" ) ) ) localeMapper = TQTextCodec::codecForName( "ISO 8859-15" ); // 6. guess locale from ctype unless ctype is "C" // 7. guess locale from lang char * try_by_name = ctype; if ( ctype && *ctype != 0 && strcmp (ctype, "C") != 0 ) try_by_name = lang; // Now do the guessing. if ( lang && *lang && !localeMapper && try_by_name && *try_by_name ) { if ( try_locale_list( iso8859_15locales, lang ) ) localeMapper = TQTextCodec::codecForName( "ISO 8859-15" ); else if ( try_locale_list( iso8859_2locales, lang ) ) localeMapper = TQTextCodec::codecForName( "ISO 8859-2" ); else if ( try_locale_list( iso8859_3locales, lang ) ) localeMapper = TQTextCodec::codecForName( "ISO 8859-3" ); else if ( try_locale_list( iso8859_4locales, lang ) ) localeMapper = TQTextCodec::codecForName( "ISO 8859-4" ); else if ( try_locale_list( iso8859_5locales, lang ) ) localeMapper = TQTextCodec::codecForName( "ISO 8859-5" ); else if ( try_locale_list( iso8859_6locales, lang ) ) localeMapper = TQTextCodec::codecForName( "ISO 8859-6" ); else if ( try_locale_list( iso8859_7locales, lang ) ) localeMapper = TQTextCodec::codecForName( "ISO 8859-7" ); else if ( try_locale_list( iso8859_8locales, lang ) ) localeMapper = TQTextCodec::codecForName( "ISO 8859-8-I" ); else if ( try_locale_list( iso8859_9locales, lang ) ) localeMapper = TQTextCodec::codecForName( "ISO 8859-9" ); else if ( try_locale_list( iso8859_13locales, lang ) ) localeMapper = TQTextCodec::codecForName( "ISO 8859-13" ); else if ( try_locale_list( tis_620locales, lang ) ) localeMapper = TQTextCodec::codecForName( "ISO 8859-11" ); else if ( try_locale_list( koi8_ulocales, lang ) ) localeMapper = TQTextCodec::codecForName( "KOI8-U" ); else if ( try_locale_list( cp_1251locales, lang ) ) localeMapper = TQTextCodec::codecForName( "CP 1251" ); else if ( try_locale_list( pt_154locales, lang ) ) localeMapper = TQTextCodec::codecForName( "PT 154" ); else if ( try_locale_list( probably_koi8_rlocales, lang ) ) localeMapper = ru_RU_hack( lang ); } delete [] ctype; delete [] lang; } if ( localeMapper && localeMapper->mibEnum() == 11 ) localeMapper = TQTextCodec::codecForName( "ISO 8859-8-I" ); // If everything failed, we default to 8859-1 // We could perhaps default to 8859-15. if ( !localeMapper ) localeMapper = TQTextCodec::codecForName( "ISO 8859-1" ); #endif } static void setup() { #if defined(QT_CHECK_STATE) if ( codecs_destroyed ) { // If codecs have been destroyed, the application is being destroyed. // Do not create new codecs since this could lead to SEGV while trying to // print a message using tqWarning/tqDebug/tqFatal at this stage // // Note: the use of `printf` instead of `tqWarning` is intentional. We should never // get to this line of code. If we do, we are in some strange exception that we // didn't think of. Using `tqWarning` could potentially lead to an infinite loop with // `tqWarning` trying to setup codecs and this method calling `tqWarning` again. // Using `printf` makes sure this never happens, even for exceptions we didn't foresee. printf("TQTextCodec: setup() called when codecs have already been destroyed\n"); fflush(stdout); return; } #endif if ( all ) return; #ifdef TQT_THREAD_SUPPORT TQMutexLocker locker( tqt_global_mutexpool ? tqt_global_mutexpool->get( &all ) : 0 ); #endif // TQT_THREAD_SUPPORT all = new TQValueList; (void)new TQLatin1Codec; (void)new TQLatin15Codec; (void)new TQUtf8Codec; (void)new TQUtf16Codec; #ifndef TQT_NO_CODECS int i = 0; do { (void)new TQSimpleTextCodec( i ); } while( unicodevalues[i++].mib != LAST_MIB ); (void)new TQTsciiCodec; for (i = 0; i < 9; ++i) { (void)new TQIsciiCodec(i); } #endif // TQT_NO_CODECS #ifndef TQT_NO_CODEC_HEBREW (void)new TQHebrewCodec; #endif #ifndef TQT_NO_BIG_CODECS (void)new TQBig5Codec; (void)new TQBig5hkscsCodec; (void)new TQEucJpCodec; (void)new TQEucKrCodec; (void)new TQGb2312Codec; (void)new TQGbkCodec; (void)new TQGb18030Codec; (void)new TQJisCodec; (void)new TQSjisCodec; #endif // TQT_NO_BIG_CODECS #ifdef Q_OS_WIN32 (void) new TQWindowsLocalCodec; #endif // Q_OS_WIN32 if ( !localeMapper ) setupLocaleMapper(); } void TQTextCodec::fromUnicodeInternal( const TQChar *in, unsigned short *out, int length ) { switch( mibEnum() ) { #ifndef TQT_NO_CODECS case 2084: case 2088: case 5: case 6: case 7: case 8: case 82: case 10: case 85: case 12: case 13: case 109: case 110: case 2004: case 2009: case 2086: case 2250: case 2251: case 2252: case 2253: case 2254: case 2255: case 2256: case 2257: case 2258: case 2259: ((TQSimpleTextCodec *)this)->fromUnicode( in, out, length ); break; #if !defined(TQT_NO_BIG_CODECS) && defined(TQ_WS_X11) // the TQFont*Codecs are only used on X11 case 15: ((TQFontJis0201Codec *) this)->fromUnicode( in, out, length ); break; case 63: ((TQFontJis0208Codec *) this)->fromUnicode( in, out, length ); break; case 36: ((TQFontKsc5601Codec *) this)->fromUnicode( in, out, length ); break; case 57: ((TQFontGb2312Codec *) this)->fromUnicode( in, out, length ); break; case -113: ((TQFontGbkCodec *) this)->fromUnicode( in, out, length ); break; case -114: ((TQFontGb18030_0Codec *) this)->fromUnicode( in, out, length ); break; case -2026: ((TQFontBig5Codec *) this)->fromUnicode( in, out, length ); break; case -2101: ((TQFontBig5hkscsCodec *) this)->fromUnicode( in, out, length ); break; case -4242: ((TQFontLaoCodec *) this)->fromUnicode( in, out, length ); break; #endif #endif // TQT_NO_CODECS case 4: ((TQLatin1Codec *) this)->fromUnicode( in, out, length ); break; case 111: ((TQLatin15Codec *) this)->fromUnicode( in, out, length ); break; default: { TQConstString string( in, length ); TQString str = string.string(); for ( int i = 0; i < length; i++ ) out[i] = characterFromUnicode( str, i ); } } } /*! \fn TQTextCodec* TQTextCodec::codecForTr() Returns the codec used by TQObject::tr() on its argument. If this function returns 0 (the default), tr() assumes Latin-1. \sa setCodecForTr() */ /*! \fn void TQTextCodec::setCodecForTr(TQTextCodec *c) \nonreentrant Sets the codec used by TQObject::tr() on its argument to \a c. If \a c is 0 (the default), tr() assumes Latin-1. If the literal quoted text in the program is not in the Latin-1 encoding, this function can be used to set the appropriate encoding. For example, software developed by Korean programmers might use eucKR for all the text in the program, in which case the main() function might look like this: \code int main(int argc, char** argv) { TQApplication app(argc, argv); ... install any additional codecs ... TQTextCodec::setCodecForTr( TQTextCodec::codecForName("eucKR") ); ... } \endcode Note that this is not the way to select the encoding that the \e user has chosen. For example, to convert an application containing literal English strings to Korean, all that is needed is for the English strings to be passed through tr() and for translation files to be loaded. For details of internationalization, see the \link i18n.html TQt internationalization documentation\endlink. \sa codecForTr(), setCodecForTr(), setCodecForCStrings() */ /*! \fn TQTextCodec* TQTextCodec::codecForCStrings() Returns the codec used by TQString to convert to and from const char* and TQCStrings. If this function returns 0 (the default), TQString assumes Latin-1. \sa setCodecForCStrings() */ /*! \fn void TQTextCodec::setCodecForCStrings(TQTextCodec *c) \nonreentrant Sets the codec used by TQString to convert to and from const char* and TQCStrings. If \a c is 0 (the default), TQString assumes Latin-1. \warning Some codecs do not preserve the characters in the ascii range (0x00 to 0x7f). For example, the Japanese Shift-JIS encoding maps the backslash character (0x5a) to the Yen character. This leads to unexpected results when using the backslash character to escape characters in strings used in e.g. regular expressions. Use TQString::fromLatin1() to preserve characters in the ascii range when needed. \sa codecForCStrings(), setCodecForTr(), setCodecForCStrings() */ TQTextCodec *TQTextCodec::cftr = 0; TQTextCodec *TQTextCodec::cfcs = 0; #endif // TQT_NO_TEXTCODEC