summaryrefslogtreecommitdiffstats
path: root/qtinterface/tqtextcodec.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'qtinterface/tqtextcodec.cpp')
-rw-r--r--qtinterface/tqtextcodec.cpp492
1 files changed, 492 insertions, 0 deletions
diff --git a/qtinterface/tqtextcodec.cpp b/qtinterface/tqtextcodec.cpp
index 7958168..6e047a5 100644
--- a/qtinterface/tqtextcodec.cpp
+++ b/qtinterface/tqtextcodec.cpp
@@ -21,3 +21,495 @@ Boston, MA 02110-1301, USA.
#include <tqt.h>
#include <tqtextcodec.h>
+
+#ifdef USE_QT4
+
+// returns a string containing the letters and numbers from input,
+// with a space separating run of a character class. e.g. "iso8859-1"
+// becomes "iso 8859 1"
+static QString lettersAndNumbers( const char * input )
+{
+ QString result;
+ QChar c;
+
+ while( input && *input ) {
+ c = *input;
+ if ( c.isLetter() || c.isNumber() )
+ result += c.lower();
+ if ( input[1] ) {
+ // add space at character class transition, except
+ // transition from upper-case to lower-case letter
+ QChar n( input[1] );
+ if ( c.isLetter() && n.isLetter() ) {
+ if ( c == c.lower() && n == n.upper() )
+ result += ' ';
+ } else if ( c.category() != n.category() ) {
+ result += ' ';
+ }
+ }
+ input++;
+ }
+ return result.simplifyWhiteSpace();
+}
+
+#define CHAINED 0xffff
+
+struct QMultiByteUnicodeTable {
+ // If multiByte, ignore unicode and index into multiByte
+ // with the next character.
+ QMultiByteUnicodeTable() : unicode(0xfffd), multiByte(0) { }
+
+ ~QMultiByteUnicodeTable()
+ {
+ if ( multiByte )
+ delete [] multiByte;
+ }
+
+ ushort unicode;
+ QMultiByteUnicodeTable* multiByte;
+};
+
+static int getByte(char* &cursor)
+{
+ int byte = 0;
+ if ( *cursor ) {
+ if ( cursor[1] == 'x' )
+ byte = strtol(cursor+2,&cursor,16);
+ else if ( cursor[1] == 'd' )
+ byte = strtol(cursor+2,&cursor,10);
+ else
+ byte = strtol(cursor+2,&cursor,8);
+ }
+ return byte&0xff;
+}
+
+class QTextCodecFromIOD;
+
+class QTextCodecFromIODDecoder : public QTextDecoder {
+ const QTextCodecFromIOD* codec;
+ QMultiByteUnicodeTable* mb;
+public:
+ QTextCodecFromIODDecoder(const QTextCodecFromIOD* c);
+ //QString toUnicode(const char* chars, int len);
+ QString convertToUnicode(const char* chars, int len, int *state);
+};
+
+class QTextCodecFromIOD : public QTextCodec {
+ friend class QTextCodecFromIODDecoder;
+
+ TQCString n;
+
+ // If from_unicode_page[row()][cell()] is 0 and from_unicode_page_multiByte,
+ // use from_unicode_page_multiByte[row()][cell()] as string.
+ char** from_unicode_page;
+ char*** from_unicode_page_multiByte;
+ char unkn;
+
+ // Only one of these is used
+ ushort* to_unicode;
+ QMultiByteUnicodeTable* to_unicode_multiByte;
+ int max_bytes_per_char;
+ TQStrList aliases;
+
+ bool stateless() const { return !to_unicode_multiByte; }
+
+public:
+ QTextCodecFromIOD(QIODevice* iod)
+ {
+ from_unicode_page = 0;
+ to_unicode_multiByte = 0;
+ to_unicode = 0;
+ from_unicode_page_multiByte = 0;
+ max_bytes_per_char = 1;
+
+ const int maxlen=100;
+ char line[maxlen];
+ char esc='\\';
+ char comm='%';
+ bool incmap = FALSE;
+ while (iod->readLine(line,maxlen) > 0) {
+ if (0==qstrnicmp(line,"<code_set_name>",15))
+ n = line+15;
+ else if (0==qstrnicmp(line,"<escape_char> ",14))
+ esc = line[14];
+ else if (0==qstrnicmp(line,"<comment_char> ",15))
+ comm = line[15];
+ else if (line[0]==comm && 0==qstrnicmp(line+1," alias ",7)) {
+ aliases.append(line+8);
+ } else if (0==qstrnicmp(line,"CHARMAP",7)) {
+ if (!from_unicode_page) {
+ from_unicode_page = new char*[256];
+ for (int i=0; i<256; i++)
+ from_unicode_page[i]=0;
+ }
+ if (!to_unicode) {
+ to_unicode = new ushort[256];
+ }
+ incmap = TRUE;
+ } else if (0==qstrnicmp(line,"END CHARMAP",11))
+ break;
+ else if (incmap) {
+ char* cursor = line;
+ int byte=-1,unicode=-1;
+ ushort* mb_unicode=0;
+ const int maxmb=8; // more -> we'll need to improve datastructures
+ char mb[maxmb+1];
+ int nmb=0;
+
+ while (*cursor) {
+ if (cursor[0]=='<' && cursor[1]=='U' &&
+ cursor[2]>='0' && cursor[2]<='9' &&
+ cursor[3]>='0' && cursor[3]<='9') {
+
+ unicode = strtol(cursor+2,&cursor,16);
+
+ } else if (*cursor==esc) {
+
+ byte = getByte(cursor);
+
+ if ( *cursor == esc ) {
+ if ( !to_unicode_multiByte ) {
+ to_unicode_multiByte =
+ new QMultiByteUnicodeTable[256];
+ for (int i=0; i<256; i++) {
+ to_unicode_multiByte[i].unicode =
+ to_unicode[i];
+ to_unicode_multiByte[i].multiByte = 0;
+ }
+ delete [] to_unicode;
+ to_unicode = 0;
+ }
+ QMultiByteUnicodeTable* mbut =
+ to_unicode_multiByte+byte;
+ mb[nmb++] = byte;
+ while ( nmb < maxmb && *cursor == esc ) {
+ // Always at least once
+
+ mbut->unicode = CHAINED;
+ byte = getByte(cursor);
+ mb[nmb++] = byte;
+ if (!mbut->multiByte) {
+ mbut->multiByte =
+ new QMultiByteUnicodeTable[256];
+ }
+ mbut = mbut->multiByte+byte;
+ mb_unicode = & mbut->unicode;
+ }
+
+ if ( nmb > max_bytes_per_char )
+ max_bytes_per_char = nmb;
+ }
+ } else {
+ cursor++;
+ }
+ }
+
+ if (unicode >= 0 && unicode <= 0xffff)
+ {
+ QChar ch((ushort)unicode);
+ if (!from_unicode_page[ch.row()]) {
+ from_unicode_page[ch.row()] = new char[256];
+ for (int i=0; i<256; i++)
+ from_unicode_page[ch.row()][i]=0;
+ }
+ if ( mb_unicode ) {
+ from_unicode_page[ch.row()][ch.cell()] = 0;
+ if (!from_unicode_page_multiByte) {
+ from_unicode_page_multiByte = new char**[256];
+ for (int i=0; i<256; i++)
+ from_unicode_page_multiByte[i]=0;
+ }
+ if (!from_unicode_page_multiByte[ch.row()]) {
+ from_unicode_page_multiByte[ch.row()] = new char*[256];
+ for (int i=0; i<256; i++)
+ from_unicode_page_multiByte[ch.row()][i] = 0;
+ }
+ mb[nmb++] = 0;
+ from_unicode_page_multiByte[ch.row()][ch.cell()]
+ = qstrdup(mb);
+ *mb_unicode = unicode;
+ } else {
+ from_unicode_page[ch.row()][ch.cell()] = (char)byte;
+ if ( to_unicode )
+ to_unicode[byte] = unicode;
+ else
+ to_unicode_multiByte[byte].unicode = unicode;
+ }
+ } else {
+ }
+ }
+ }
+ n = n.stripWhiteSpace();
+
+ unkn = '?'; // ##### Might be a bad choice.
+ }
+
+ ~QTextCodecFromIOD()
+ {
+ if ( from_unicode_page ) {
+ for (int i=0; i<256; i++)
+ if (from_unicode_page[i])
+ delete [] from_unicode_page[i];
+ }
+ if ( from_unicode_page_multiByte ) {
+ for (int i=0; i<256; i++)
+ if (from_unicode_page_multiByte[i])
+ for (int j=0; j<256; j++)
+ if (from_unicode_page_multiByte[i][j])
+ delete [] from_unicode_page_multiByte[i][j];
+ }
+ if ( to_unicode )
+ delete [] to_unicode;
+ if ( to_unicode_multiByte )
+ delete [] to_unicode_multiByte;
+ }
+
+ bool ok() const
+ {
+ return !!from_unicode_page;
+ }
+
+ QTextDecoder* makeDecoder() const
+ {
+ if ( stateless() )
+ return QTextCodec::makeDecoder();
+ else
+ return new QTextCodecFromIODDecoder(this);
+ }
+
+ const char* qtio_name() const
+ {
+ return n;
+ }
+
+ int mibEnum() const
+ {
+ return 0; // #### Unknown.
+ }
+
+ int heuristicContentMatch(const char*, int) const
+ {
+ return 0;
+ }
+
+ int heuristicNameMatch(const char* hint) const
+ {
+ int bestr = QTextCodec::heuristicNameMatch(hint);
+ TQStrListIterator it(aliases);
+ char* a;
+ while ((a=it.current())) {
+ ++it;
+ int r = simpleHeuristicNameMatch(a,hint);
+ if (r > bestr)
+ bestr = r;
+ }
+ return bestr;
+ }
+
+ QString toUnicode(const char* chars, int len) const
+ {
+ const uchar* uchars = (const uchar*)chars;
+ QString result;
+ QMultiByteUnicodeTable* multiByte=to_unicode_multiByte;
+ if ( multiByte ) {
+ while (len--) {
+ QMultiByteUnicodeTable& mb = multiByte[*uchars];
+ if ( mb.multiByte ) {
+ // Chained multi-byte
+ multiByte = mb.multiByte;
+ } else {
+ result += QChar(mb.unicode);
+ multiByte=to_unicode_multiByte;
+ }
+ uchars++;
+ }
+ } else {
+ while (len--)
+ result += QChar(to_unicode[*uchars++]);
+ }
+ return result;
+ }
+
+ QString convertToUnicode(const char* chars, int len, ConverterState *state) const
+ {
+ return toUnicode(chars, len);
+ }
+
+#if !defined(Q_NO_USING_KEYWORD)
+ using QTextCodec::fromUnicode;
+#endif
+ TQCString fromUnicode(const QString& uc, int& lenInOut) const
+ {
+ if (lenInOut > (int)uc.length())
+ lenInOut = uc.length();
+ int rlen = lenInOut*max_bytes_per_char;
+ TQCString rstr(rlen+1);
+ char* cursor = rstr.data();
+ char* s=0;
+ int l = lenInOut;
+ int lout = 0;
+ for (int i=0; i<l; i++) {
+ QChar ch = uc[i];
+ if ( ch == QChar() ) {
+ // special
+ *cursor++ = 0;
+ } else if ( from_unicode_page[ch.row()] &&
+ from_unicode_page[ch.row()][ch.cell()] )
+ {
+ *cursor++ = from_unicode_page[ch.row()][ch.cell()];
+ lout++;
+ } else if ( from_unicode_page_multiByte &&
+ from_unicode_page_multiByte[ch.row()] &&
+ (s=from_unicode_page_multiByte[ch.row()][ch.cell()]) )
+ {
+ while (*s) {
+ *cursor++ = *s++;
+ lout++;
+ }
+ } else {
+ *cursor++ = unkn;
+ lout++;
+ }
+ }
+ *cursor = 0;
+ lenInOut = lout;
+ return rstr;
+ }
+
+ QByteArray convertFromUnicode(const QChar *charin, int len, ConverterState *state) const
+ {
+ return fromUnicode(charin, len);
+ }
+
+ QByteArray name() const
+ {
+ return qtio_name();
+ }
+};
+
+// QTextCodecFromIODDecoder::QTextCodecFromIODDecoder(const QTextCodecFromIOD* c) :
+// codec(c)
+// {
+// mb = codec->to_unicode_multiByte;
+// }
+
+QString QTextCodecFromIODDecoder::convertToUnicode(const char* chars, int len, int *state)
+{
+ const uchar* uchars = (const uchar*)chars;
+ QString result;
+ while (len--) {
+ QMultiByteUnicodeTable& t = mb[*uchars];
+ if ( t.multiByte ) {
+ // Chained multi-byte
+ mb = t.multiByte;
+ } else {
+ if ( t.unicode )
+ result += QChar(t.unicode);
+ mb=codec->to_unicode_multiByte;
+ }
+ uchars++;
+ }
+ return result;
+}
+
+#ifndef QT_NO_CODECS
+// Cannot use <pre> or \code
+/*!
+ Reads a POSIX2 charmap definition from \a iod.
+ The parser recognizes the following lines:
+
+<font name="sans">
+&nbsp;&nbsp;&lt;code_set_name&gt; <i>name</i></br>
+&nbsp;&nbsp;&lt;escape_char&gt; <i>character</i></br>
+&nbsp;&nbsp;% alias <i>alias</i></br>
+&nbsp;&nbsp;CHARMAP</br>
+&nbsp;&nbsp;&lt;<i>token</i>&gt; /x<i>hexbyte</i> &lt;U<i>unicode</i>&gt; ...</br>
+&nbsp;&nbsp;&lt;<i>token</i>&gt; /d<i>decbyte</i> &lt;U<i>unicode</i>&gt; ...</br>
+&nbsp;&nbsp;&lt;<i>token</i>&gt; /<i>octbyte</i> &lt;U<i>unicode</i>&gt; ...</br>
+&nbsp;&nbsp;&lt;<i>token</i>&gt; /<i>any</i>/<i>any</i>... &lt;U<i>unicode</i>&gt; ...</br>
+&nbsp;&nbsp;END CHARMAP</br>
+</font>
+
+ The resulting QTextCodec is returned (and also added to the global
+ list of codecs). The name() of the result is taken from the
+ code_set_name.
+
+ Note that a codec constructed in this way uses much more memory
+ and is slower than a hand-written QTextCodec subclass, since
+ tables in code are kept in memory shared by all Qt applications.
+
+ \sa loadCharmapFile()
+*/
+QTextCodec* QTextCodec::loadCharmap(QIODevice* iod)
+{
+ QTextCodecFromIOD* r = new QTextCodecFromIOD(iod);
+ if ( !r->ok() ) {
+ delete r;
+ r = 0;
+ }
+ return r;
+}
+
+/*!
+ A convenience function for loadCharmap() that loads the charmap
+ definition from the file \a filename.
+*/
+QTextCodec* QTextCodec::loadCharmapFile(QString filename)
+{
+ QFile f(filename);
+ if (f.open(IO_ReadOnly)) {
+ QTextCodecFromIOD* r = new QTextCodecFromIOD(&f);
+ if ( !r->ok() )
+ delete r;
+ else
+ return r;
+ }
+ return 0;
+}
+
+/*!
+ Returns a value indicating how likely it is that this decoder is
+ appropriate for decoding some format that has the given name. The
+ name is compared with the \a hint.
+
+ A good match returns a positive number around the length of the
+ string. A bad match is negative.
+
+ The default implementation calls simpleHeuristicNameMatch() with
+ the name of the codec.
+*/
+int QTextCodec::heuristicNameMatch(const char* hint) const
+{
+ return simpleHeuristicNameMatch(name(),hint);
+}
+
+/*!
+ A simple utility function for heuristicNameMatch(): it does some
+ very minor character-skipping so that almost-exact matches score
+ high. \a name is the text we're matching and \a hint is used for
+ the comparison.
+*/
+int QTextCodec::simpleHeuristicNameMatch(const char* name, const char* hint)
+{
+ // if they're the same, return a perfect score.
+ if ( name && hint && *name && *hint && qstricmp( name, hint ) == 0 )
+ return qstrlen( hint );
+
+ // if the letters and numbers are the same, we have an "almost"
+ // perfect match.
+ QString h( lettersAndNumbers( hint ) );
+ QString n( lettersAndNumbers( name ) );
+ if ( h == n )
+ return qstrlen( hint )-1;
+
+ if ( h.stripWhiteSpace() == n.stripWhiteSpace() )
+ return qstrlen( hint )-2;
+
+ // could do some more here, but I don't think it's worth it
+
+ return 0;
+}
+
+#endif //QT_NO_CODECS
+
+#endif // USE_QT4 \ No newline at end of file