diff options
Diffstat (limited to 'libkdepim/qutf7codec.cpp')
-rw-r--r-- | libkdepim/qutf7codec.cpp | 550 |
1 files changed, 0 insertions, 550 deletions
diff --git a/libkdepim/qutf7codec.cpp b/libkdepim/qutf7codec.cpp deleted file mode 100644 index dc8ef2a2f..000000000 --- a/libkdepim/qutf7codec.cpp +++ /dev/null @@ -1,550 +0,0 @@ -/* - qutf7codec.cpp - - A TQTextCodec for UTF-7 (rfc2152). - Copyright (c) 2001 Marc Mutz <mutz@kde.org> - See file COPYING for details - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License, version 2.0, - as published by the Free Software Foundation. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, US - - As a special exception, permission is granted to use this plugin - with any version of TQt by TrollTech AS, Norway. In this case, the - use of this plugin doesn't cause the resulting executable to be - covered by the GNU General Public License. - This exception does not however tqinvalidate any other reasons why the - executable file might be covered by the GNU General Public License. -*/ - - -#include "qutf7codec.h" - -#ifndef TQT_NO_TEXTCODEC - -int TQUtf7Codec::mibEnum() const { - return 1012; -} - -int TQStrictUtf7Codec::mibEnum() const { - return -1012; -} - -const char* TQUtf7Codec::name() const { - return "UTF-7"; -} - -const char* TQStrictUtf7Codec::name() const { - return "X-QT-UTF-7-STRICT"; -} - -const char* TQUtf7Codec::mimeName() const { - return "UTF-7"; -} - -bool TQUtf7Codec::canEncode( TQChar ) const { - return TRUE; -} - -bool TQUtf7Codec::canEncode( const TQString & ) const { - return TRUE; -} - -static uchar base64Set[] = { - 0x00, 0x00, 0x00, 0x00, // '\0' ... - 0x00, 0x11, 0xFF, 0xC0, // ' ' ... '?' - 0x7F, 0xFF, 0xFF, 0xE0, // '@' ... '_' - 0x7F, 0xFF, 0xFF, 0xE0 // '`' ... DEL -}; - -static uchar base64SetWithLastTwoBitsZero[] = { - 0x00, 0x00, 0x00, 0x00, // '\0' ... - 0x00, 0x00, 0x88, 0x80, // ' ' ... '?' - 0x44, 0x44, 0x44, 0x40, // '@' ... '_' - 0x11, 0x11, 0x11, 0x00 // '`' ... DEL -}; - -static uchar directSet[] = { - 0x00, 0x00, 0x00, 0x00, // '\0' ... - 0x01, 0xCF, 0xFF, 0xE1, // ' ' ... '?' - 0x7F, 0xFF, 0xFF, 0xE0, // '@' ... '_' - 0x7F, 0xFF, 0xFF, 0xE0 // '`' ... DEL -}; - -static uchar optDirectSet[] = { - 0x00, 0x00, 0x00, 0x00, // '\0' ... - 0x7E, 0x20, 0x00, 0x1E, // ' ' ... '?' - 0x80, 0x00, 0x00, 0x17, // '@' ... '_' - 0x80, 0x00, 0x00, 0x1C // '`' ... DEL -}; - -static inline bool isOfSet(uchar ch, uchar* set) { - return set[ ch/8 ] & (0x80 >> ( ch%8 )); -} - -int TQUtf7Codec::heuristicContentMatch(const char* chars, int len) const -{ - int stepNo = 0; - int i; - bool shifted = FALSE; - bool rightAfterEscape = FALSE; - bool onlyNullBitsSinceLastBoundary = TRUE; - for ( i = 0; i < len ; i++ ) { - if ((unsigned char)chars[i] >= 128) // 8bit chars not allowed. - break; - if (shifted) { - if ( isOfSet(chars[i],base64Set) ) { - switch (stepNo) { - case 0: - onlyNullBitsSinceLastBoundary = TRUE; - break; - case 3: - onlyNullBitsSinceLastBoundary - = isOfSet(chars[i],base64SetWithLastTwoBitsZero); - break; - case 6: - onlyNullBitsSinceLastBoundary - = ( chars[i] == 'A' || chars[i] == 'Q' || - chars[i] == 'g' || chars[i] == 'w' ); - break; - default: - onlyNullBitsSinceLastBoundary - = onlyNullBitsSinceLastBoundary && (chars[i] == 'A'); - } - stepNo = (stepNo + 1) % 8; - rightAfterEscape = FALSE; - } else { - if (rightAfterEscape && chars[i] != '-') - break; // a '+' must be followed by '-' or a base64 char - if (!onlyNullBitsSinceLastBoundary) - break; // non-zero bits in the tail of the base64 encoding - shifted = FALSE; - stepNo = 0; - } - } else { - if (chars[i] == '+') { - shifted = TRUE; - rightAfterEscape = TRUE; - } - } - } - return i; -} - -class TQUtf7Decoder : public TQTextDecoder { - // the storage for our tqunicode char until it's finished - ushort uc; - // the state of the base64 decoding - // can be 0 (just finished three tqunicode chars) - // 1 (have the upper 6 bits of uc already) - // 2 (have the upper 12 bits of uc already) - // 3 (have the upper 2 bits of uc already) - // .......... - // 7 (have the upper 10 bits of uc already) - // => n (have the upper (n * 6) % 16 bits of uc already) - // "stepNo" cycles through all it's values every three - // tqunicode chars. - char stepNo; - // remembers if we are in shifted-sequence mode - bool shifted; - // remembers if we're just after the initial '+' - // of a shifted-sequence. - bool rightAfterEscape; -public: - TQUtf7Decoder() : uc(0), stepNo(0), shifted(FALSE), rightAfterEscape(FALSE) - { - } - -private: - inline void resetParser() - { - uc = 0; - stepNo = 0; - shifted = FALSE; - rightAfterEscape = FALSE; - } - -public: - TQString toUnicode(const char* chars, int len) - { - TQString result = ""; - for (int i=0; i<len; i++) { - uchar ch = chars[i]; - - // - // check for 8bit char's: - // - if ( ch > 127 ) { - qWarning("TQUtf7Decoder: 8bit char found in input. " - "Parser has been re-initialized!"); - resetParser(); - result += TQChar::replacement; - continue; - } - - if (shifted) { // in shifted mode - - // - // first, we check specialities that only occur - // right after the escaping '+': - // - if ( rightAfterEscape && ch == '-' ) { - // a "+-" sequence is a short-circuit encoding - // for just '+': - resetParser(); - result += TQChar('+'); - // we're already done for this "ch", so - continue; - } - - // - // Here we're going to extract the bits represented by "ch": - // - ushort bits; - if ( ch >= 'A' && ch <= 'Z' ) { - bits = ch - 'A'; - } else if ( ch >= 'a' && ch <= 'z' ) { - bits = ch - 'a' + 26; - } else if ( ch >= '0' && ch <= '9' ) { - bits = ch - '0' + 52; - } else if ( ch == '+' ) { - bits = 62; - } else if ( ch == '/' ) { - bits = 63; - } else { - bits = 0; // keep compiler happy - - // - // ch is not of the base64 alphabet. - // Here we are going to check the sequence's validity: - // - if ( rightAfterEscape ) { - // any non-base64 char following an escaping '+' - // makes for an ill-formed sequence. - // Note that we catch (the valid) "+-" pair - // right at the beginning. - qWarning("TQUtf7Decoder: ill-formed input: " - "non-base64 char after escaping \"+\"!"); - } - // pending bits from base64 encoding must be all 0: - if (stepNo >= 1 && uc) { - qWarning("TQUtf7Decoder: ill-formed sequence: " - "non-zero bits in shifted-sequence tail!"); - } - resetParser(); - - // a '-' signifies the end of the shifted-sequence, - // so we just swallow it. - if ( ch == '-' ) - continue; - // end of validity checking. Process ch now... - } - - if ( /*still*/ shifted ) { - // - // now we're going to stuff the "bits" bit bucket into - // the right position inside "uc", emitting a resulting - // TQChar if possible. - // - switch (stepNo) { - // "bits" are the 6 msb's of uc - case 0: uc = bits << 10; break; - - case 1: uc |= bits << 4; break; - - // 4 bits of "bits" complete the first ushort - case 2: uc |= bits >> 2; result += TQChar(uc); - // 2 bits of "bits" make the msb's of the next ushort - uc = bits << 14; break; - case 3: uc |= bits << 8; break; - case 4: uc |= bits << 2; break; - - // 2 bits of "bits" complete the second ushort - case 5: uc |= bits >> 4; result += TQChar(uc); - // 4 bits of "bits" make the msb's of the next ushort - uc = bits << 12; break; - case 6: uc |= bits << 6; break; - - // these 6 bits complete the third ushort - // and also one round of 8 chars -> 3 ushort decoding - case 7: uc |= bits; result += TQChar(uc); - uc = 0; break; - default: ; - } // switch (stepNo) - // increase the step counter - stepNo++; - stepNo %= 8; - rightAfterEscape = FALSE; - // and look at the next char. - continue; - } // fi (still) shifted - } // fi shifted - - // - // if control reaches here, we either weren't in a - // shifted sequence or we just left one by seeing - // a non-base64-char. - // Either way, we have to process "ch" outside - // a shifted-sequence now: - // - if ( ch == '+' ) { - // '+' is the escape char for entering a - // shifted sequence: - shifted = TRUE; - stepNo = 0; - // also, we're right at the beginning where - // special rules apply: - rightAfterEscape = TRUE; - } else { - // US-ASCII values are directly used - result += TQChar(ch); - } - } - - return result; - - } // toUnicode() - -}; // class TQUtf7Decoder - -TQTextDecoder* TQUtf7Codec::makeDecoder() const -{ - return new TQUtf7Decoder; -} - - -class TQUtf7Encoder : public TQTextEncoder { - uchar dontNeedEncodingSet[16]; - ushort outbits; - uint stepNo : 2; - bool shifted : 1; - bool mayContinueShiftedSequence : 1; -public: - TQUtf7Encoder(bool encOpt, bool encLwsp) - : outbits(0), stepNo(0), - shifted(FALSE), mayContinueShiftedSequence(FALSE) - { - for ( int i = 0; i < 16 ; i++) { - dontNeedEncodingSet[i] = directSet[i]; - if (!encOpt) - dontNeedEncodingSet[i] |= optDirectSet[i]; - } - if(!encLwsp) { - dontNeedEncodingSet[' '/8] |= 0x80 >> (' '%8); - dontNeedEncodingSet['\n'/8] |= 0x80 >> ('\n'%8); - dontNeedEncodingSet['\r'/8] |= 0x80 >> ('\r'%8); - dontNeedEncodingSet['\t'/8] |= 0x80 >> ('\t'%8); - } - } - -private: - - char toBase64( ushort u ) { - if ( u < 26 ) - return (char)u + 'A'; - else if ( u < 52 ) - return (char)u - 26 + 'a'; - else if ( u < 62 ) - return (char)u - 52 + '0'; - else if ( u == 62 ) - return '+'; - else - return '/'; - } - - void addToShiftedSequence(TQCString::Iterator & t, ushort u) { - switch (stepNo) { - // no outbits; use uppermost 6 bits of u - case 0: - *t++ = toBase64( u >> 10 ); - *t++ = toBase64( (u & 0x03FF /* umask top 6 bits */ ) >> 4 ); - // save 4 lowest-order bits in outbits[5..2] - outbits = (u & 0x000F) << 2; - break; - - // outbits available; use top two bits of u to complete - // the previous char - case 1: - if (!mayContinueShiftedSequence) { - // if mayContinue, this char has already been written - *t++ = toBase64( outbits | ( u >> 14 ) ); - } - *t++ = toBase64( (u & 0x3F00 /* mask top 2 bits */ ) >> 8 ); - *t++ = toBase64( (u & 0x00FC /* mask msbyte */ ) >> 2 ); - // save 2 lowest-significant bits in outbits[5..4] - outbits = (u & 0x0003) << 4; - break; - - // outbits available; use top four bits of u to complete - // the previous char - case 2: - if (!mayContinueShiftedSequence) { - // if mayContinue, this char has already been written - *t++ = toBase64( outbits | ( u >> 12 ) ); - } - *t++ = toBase64( (u & 0x0FFF) >> 6 ); - *t++ = toBase64( u & 0x003F ); - break; - - default: ; - } - stepNo = (stepNo + 1) % 3; - } - - void endShiftedSequence(TQCString::Iterator & t) { - switch (stepNo) { - case 1: // four outbits still to be written - case 2: // two outbits still to be written - *t++ = toBase64( outbits ); - break; - case 0: // nothing to do - default: ; - } - outbits = 0; - } - - // depending on the stepNo, checks whether we can continue - // an already ended shifted-sequence with char "u". - // This is only possible if the topmost bits fit the - // already written ones (which are all 0 between calls) - bool continueOK( ushort u ) { - return stepNo == 0 || - ( stepNo == 1 && (u & 0xF000) == 0 ) || - ( stepNo == 2 && (u & 0xC000) == 0 ); - } - - void processDoesntNeedEncoding(TQCString::Iterator & t, ushort ch) { - // doesn't need encoding - if (shifted) { - endShiftedSequence(t); - // add "lead-out" to dis-ambiguate following chars: - if (isOfSet((char)ch,base64Set) || ch == '-' ) { - *t++ = '-'; - } - } else if (mayContinueShiftedSequence) { - // if mayContinue is set, this means the - // shifted-sequence needs a lead-out. - mayContinueShiftedSequence = FALSE; - if (isOfSet(ch,base64Set) || ch == '-' ) { - *t++ = '-'; - } - } - *t++ = (uchar)ch; - shifted = FALSE; - stepNo = 0; - } - -public: - TQCString fromUnicode(const TQString & uc, int & len_in_out) - { - // allocate place for worst case: - // len/2 * (5+1) for an alternating sequence of e.g. "A\", - // + 4 for a worst-case of another +ABC encoded char - // + 1 for the trailing \0 - // - int maxreslen = 3 * len_in_out + 5; - TQCString result( maxreslen ); - -#if 0 - // if (len_in_out == 1) { - cout << "\nlen_in_out: " << len_in_out - <<"; shifted: " << (shifted ? "true" : "false") - << ";\n" << "mayContinue: " - << (mayContinueShiftedSequence ? "true" : "false") - << "; stepNo: " << stepNo << ";\n" - << "outbits: " << outbits << endl; - // } -#endif - - // source and destination cursor - const TQChar * s = uc.tqunicode(); - TQCString::Iterator t = result.data(); - - if ( uc.isNull() ) { - // return to ascii requested: - if ( mayContinueShiftedSequence ) - *t++ = '-'; - } else { - // normal operation: - for (int i = 0 ; i < len_in_out ; - i++/*, checkOutBuf(result,maxreslen,t,i,len_in_out,5)*/ ) { - ushort ch = s[i].tqunicode(); - - // - // first, we check whether we might get around encoding: - // - if ( ch < 128 ) { - // - // ch is usAscii, so we have a chance that we don't - // need to encode it. - // - if ( isOfSet((uchar)ch,dontNeedEncodingSet) ) { - processDoesntNeedEncoding(t,ch); - continue; - } else if ( ch == '+' ) { - // '+' is the shift escape character - if (shifted || mayContinueShiftedSequence) { - // if we are already in shifted mode, we just - // encode the '+', too. Compare - // 24bits ("-+-") + some from ending the shifted-sequence - // with 21,33 bits - addToShiftedSequence(t,ch); - mayContinueShiftedSequence = FALSE; - shifted = TRUE; - } else { - // shortcut encoding of '+': - *t++ = '+'; - *t++ = '-'; - } - continue; // done - } // else fall through to encoding - } - // - // need encoding - // - if (!shifted && (!mayContinueShiftedSequence || !continueOK(ch) ) ) { - *t++ = '+'; - stepNo = 0; - } - addToShiftedSequence(t,ch); - shifted = TRUE; - mayContinueShiftedSequence = FALSE; - } - - if ( shifted ) { - endShiftedSequence(t); - mayContinueShiftedSequence = TRUE; - }; - shifted = FALSE; - } - - *t = '\0'; - len_in_out = t - result.data(); - -#if 0 - cout << "len_in_out: " << len_in_out << "; " - << "mayContinue: " << (mayContinueShiftedSequence ? "true" : "false") - << "; stepNo: " << stepNo << endl; -#endif - - Q_ASSERT(len_in_out <= maxreslen-1); - - return result; - } // fromUnicode() - -}; // class TQUtf7Encoder - -TQTextEncoder* TQUtf7Codec::makeEncoder() const { - return new TQUtf7Encoder( false, false ); -} - -TQTextEncoder* TQStrictUtf7Codec::makeEncoder() const { - return new TQUtf7Encoder( true, false ); -} - -#endif // TQT_NO_TEXTCODEC |