/* Copyright (C) 2010 Timothy Pearson This library is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with this library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #include #include #ifdef USE_QT4 // returns a string containing the letters and numbers from input, // with a space separating run of a character class. e.g. "iso8859-1" // becomes "iso 8859 1" static QString lettersAndNumbers( const char * input ) { QString result; QChar c; while( input && *input ) { c = *input; if ( c.isLetter() || c.isNumber() ) result += c.lower(); if ( input[1] ) { // add space at character class transition, except // transition from upper-case to lower-case letter QChar n( input[1] ); if ( c.isLetter() && n.isLetter() ) { if ( c == c.lower() && n == n.upper() ) result += ' '; } else if ( c.category() != n.category() ) { result += ' '; } } input++; } return result.simplifyWhiteSpace(); } #define CHAINED 0xffff struct QMultiByteUnicodeTable { // If multiByte, ignore unicode and index into multiByte // with the next character. QMultiByteUnicodeTable() : unicode(0xfffd), multiByte(0) { } ~QMultiByteUnicodeTable() { if ( multiByte ) delete [] multiByte; } ushort unicode; QMultiByteUnicodeTable* multiByte; }; static int getByte(char* &cursor) { int byte = 0; if ( *cursor ) { if ( cursor[1] == 'x' ) byte = strtol(cursor+2,&cursor,16); else if ( cursor[1] == 'd' ) byte = strtol(cursor+2,&cursor,10); else byte = strtol(cursor+2,&cursor,8); } return byte&0xff; } class QTextCodecFromIOD; class QTextCodecFromIODDecoder : public QTextDecoder { const QTextCodecFromIOD* codec; QMultiByteUnicodeTable* mb; public: QTextCodecFromIODDecoder(const QTextCodecFromIOD* c); //QString toUnicode(const char* chars, int len); QString convertToUnicode(const char* chars, int len, int *state); }; class QTextCodecFromIOD : public QTextCodec { friend class QTextCodecFromIODDecoder; TQCString n; // If from_unicode_page[row()][cell()] is 0 and from_unicode_page_multiByte, // use from_unicode_page_multiByte[row()][cell()] as string. char** from_unicode_page; char*** from_unicode_page_multiByte; char unkn; // Only one of these is used ushort* to_unicode; QMultiByteUnicodeTable* to_unicode_multiByte; int max_bytes_per_char; TQStrList aliases; bool stateless() const { return !to_unicode_multiByte; } public: QTextCodecFromIOD(QIODevice* iod) { from_unicode_page = 0; to_unicode_multiByte = 0; to_unicode = 0; from_unicode_page_multiByte = 0; max_bytes_per_char = 1; const int maxlen=100; char line[maxlen]; char esc='\\'; char comm='%'; bool incmap = FALSE; while (iod->readLine(line,maxlen) > 0) { if (0==qstrnicmp(line,"",15)) n = line+15; else if (0==qstrnicmp(line," ",14)) esc = line[14]; else if (0==qstrnicmp(line," ",15)) comm = line[15]; else if (line[0]==comm && 0==qstrnicmp(line+1," alias ",7)) { aliases.append(line+8); } else if (0==qstrnicmp(line,"CHARMAP",7)) { if (!from_unicode_page) { from_unicode_page = new char*[256]; for (int i=0; i<256; i++) from_unicode_page[i]=0; } if (!to_unicode) { to_unicode = new ushort[256]; } incmap = TRUE; } else if (0==qstrnicmp(line,"END CHARMAP",11)) break; else if (incmap) { char* cursor = line; int byte=-1,unicode=-1; ushort* mb_unicode=0; const int maxmb=8; // more -> we'll need to improve datastructures char mb[maxmb+1]; int nmb=0; while (*cursor) { if (cursor[0]=='<' && cursor[1]=='U' && cursor[2]>='0' && cursor[2]<='9' && cursor[3]>='0' && cursor[3]<='9') { unicode = strtol(cursor+2,&cursor,16); } else if (*cursor==esc) { byte = getByte(cursor); if ( *cursor == esc ) { if ( !to_unicode_multiByte ) { to_unicode_multiByte = new QMultiByteUnicodeTable[256]; for (int i=0; i<256; i++) { to_unicode_multiByte[i].unicode = to_unicode[i]; to_unicode_multiByte[i].multiByte = 0; } delete [] to_unicode; to_unicode = 0; } QMultiByteUnicodeTable* mbut = to_unicode_multiByte+byte; mb[nmb++] = byte; while ( nmb < maxmb && *cursor == esc ) { // Always at least once mbut->unicode = CHAINED; byte = getByte(cursor); mb[nmb++] = byte; if (!mbut->multiByte) { mbut->multiByte = new QMultiByteUnicodeTable[256]; } mbut = mbut->multiByte+byte; mb_unicode = & mbut->unicode; } if ( nmb > max_bytes_per_char ) max_bytes_per_char = nmb; } } else { cursor++; } } if (unicode >= 0 && unicode <= 0xffff) { QChar ch((ushort)unicode); if (!from_unicode_page[ch.row()]) { from_unicode_page[ch.row()] = new char[256]; for (int i=0; i<256; i++) from_unicode_page[ch.row()][i]=0; } if ( mb_unicode ) { from_unicode_page[ch.row()][ch.cell()] = 0; if (!from_unicode_page_multiByte) { from_unicode_page_multiByte = new char**[256]; for (int i=0; i<256; i++) from_unicode_page_multiByte[i]=0; } if (!from_unicode_page_multiByte[ch.row()]) { from_unicode_page_multiByte[ch.row()] = new char*[256]; for (int i=0; i<256; i++) from_unicode_page_multiByte[ch.row()][i] = 0; } mb[nmb++] = 0; from_unicode_page_multiByte[ch.row()][ch.cell()] = qstrdup(mb); *mb_unicode = unicode; } else { from_unicode_page[ch.row()][ch.cell()] = (char)byte; if ( to_unicode ) to_unicode[byte] = unicode; else to_unicode_multiByte[byte].unicode = unicode; } } else { } } } n = n.stripWhiteSpace(); unkn = '?'; // ##### Might be a bad choice. } ~QTextCodecFromIOD() { if ( from_unicode_page ) { for (int i=0; i<256; i++) if (from_unicode_page[i]) delete [] from_unicode_page[i]; } if ( from_unicode_page_multiByte ) { for (int i=0; i<256; i++) if (from_unicode_page_multiByte[i]) for (int j=0; j<256; j++) if (from_unicode_page_multiByte[i][j]) delete [] from_unicode_page_multiByte[i][j]; } if ( to_unicode ) delete [] to_unicode; if ( to_unicode_multiByte ) delete [] to_unicode_multiByte; } bool ok() const { return !!from_unicode_page; } QTextDecoder* makeDecoder() const { if ( stateless() ) return QTextCodec::makeDecoder(); else return new QTextCodecFromIODDecoder(this); } const char* qtio_name() const { return n; } int mibEnum() const { return 0; // #### Unknown. } int heuristicContentMatch(const char*, int) const { return 0; } int heuristicNameMatch(const char* hint) const { int bestr = QTextCodec::heuristicNameMatch(hint); TQStrListIterator it(aliases); char* a; while ((a=it.current())) { ++it; int r = simpleHeuristicNameMatch(a,hint); if (r > bestr) bestr = r; } return bestr; } QString toUnicode(const char* chars, int len) const { const uchar* uchars = (const uchar*)chars; QString result; QMultiByteUnicodeTable* multiByte=to_unicode_multiByte; if ( multiByte ) { while (len--) { QMultiByteUnicodeTable& mb = multiByte[*uchars]; if ( mb.multiByte ) { // Chained multi-byte multiByte = mb.multiByte; } else { result += QChar(mb.unicode); multiByte=to_unicode_multiByte; } uchars++; } } else { while (len--) result += QChar(to_unicode[*uchars++]); } return result; } QString convertToUnicode(const char* chars, int len, ConverterState *state) const { return toUnicode(chars, len); } #if !defined(Q_NO_USING_KEYWORD) using QTextCodec::fromUnicode; #endif TQCString fromUnicode(const QString& uc, int& lenInOut) const { if (lenInOut > (int)uc.length()) lenInOut = uc.length(); int rlen = lenInOut*max_bytes_per_char; TQCString rstr(rlen+1); char* cursor = rstr.data(); char* s=0; int l = lenInOut; int lout = 0; for (int i=0; ito_unicode_multiByte; // } QString QTextCodecFromIODDecoder::convertToUnicode(const char* chars, int len, int *state) { const uchar* uchars = (const uchar*)chars; QString result; while (len--) { QMultiByteUnicodeTable& t = mb[*uchars]; if ( t.multiByte ) { // Chained multi-byte mb = t.multiByte; } else { if ( t.unicode ) result += QChar(t.unicode); mb=codec->to_unicode_multiByte; } uchars++; } return result; } #ifndef QT_NO_CODECS // Cannot use
 or \code
/*!
    Reads a POSIX2 charmap definition from \a iod.
    The parser recognizes the following lines:


  <code_set_name> name
  <escape_char> character
  % alias alias
  CHARMAP
  <token> /xhexbyte <Uunicode> ...
  <token> /ddecbyte <Uunicode> ...
  <token> /octbyte <Uunicode> ...
  <token> /any/any... <Uunicode> ...
  END CHARMAP
The resulting QTextCodec is returned (and also added to the global list of codecs). The name() of the result is taken from the code_set_name. Note that a codec constructed in this way uses much more memory and is slower than a hand-written QTextCodec subclass, since tables in code are kept in memory shared by all Qt applications. \sa loadCharmapFile() */ QTextCodec* QTextCodec::loadCharmap(QIODevice* iod) { QTextCodecFromIOD* r = new QTextCodecFromIOD(iod); if ( !r->ok() ) { delete r; r = 0; } return r; } /*! A convenience function for loadCharmap() that loads the charmap definition from the file \a filename. */ QTextCodec* QTextCodec::loadCharmapFile(QString filename) { QFile f(filename); if (f.open(IO_ReadOnly)) { QTextCodecFromIOD* r = new QTextCodecFromIOD(&f); if ( !r->ok() ) delete r; else return r; } return 0; } /*! Returns a value indicating how likely it is that this decoder is appropriate for decoding some format that has the given name. The name is compared with the \a hint. A good match returns a positive number around the length of the string. A bad match is negative. The default implementation calls simpleHeuristicNameMatch() with the name of the codec. */ int QTextCodec::heuristicNameMatch(const char* hint) const { return simpleHeuristicNameMatch(name(),hint); } /*! A simple utility function for heuristicNameMatch(): it does some very minor character-skipping so that almost-exact matches score high. \a name is the text we're matching and \a hint is used for the comparison. */ int QTextCodec::simpleHeuristicNameMatch(const char* name, const char* hint) { // if they're the same, return a perfect score. if ( name && hint && *name && *hint && qstricmp( name, hint ) == 0 ) return qstrlen( hint ); // if the letters and numbers are the same, we have an "almost" // perfect match. QString h( lettersAndNumbers( hint ) ); QString n( lettersAndNumbers( name ) ); if ( h == n ) return qstrlen( hint )-1; if ( h.stripWhiteSpace() == n.stripWhiteSpace() ) return qstrlen( hint )-2; // could do some more here, but I don't think it's worth it return 0; } #endif //QT_NO_CODECS #endif // USE_QT4