diff options
Diffstat (limited to 'tdeioslaves/imap4/rfcdecoder.cc')
-rw-r--r-- | tdeioslaves/imap4/rfcdecoder.cc | 668 |
1 files changed, 0 insertions, 668 deletions
diff --git a/tdeioslaves/imap4/rfcdecoder.cc b/tdeioslaves/imap4/rfcdecoder.cc deleted file mode 100644 index 08b641b7d..000000000 --- a/tdeioslaves/imap4/rfcdecoder.cc +++ /dev/null @@ -1,668 +0,0 @@ -/********************************************************************** - * - * rfcdecoder.cc - handler for various rfc/mime encodings - * Copyright (C) 2000 s.carstens@gmx.de - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * - * Send comments and bug fixes to s.carstens@gmx.de - * - *********************************************************************/ -#include "rfcdecoder.h" - -#include <ctype.h> -#include <sys/types.h> - -#include <stdio.h> -#include <stdlib.h> - -#include <tqtextcodec.h> -#include <tqbuffer.h> -#include <tqregexp.h> -#include <kmdcodec.h> - -// This part taken from rfc 2192 IMAP URL Scheme. C. Newman. September 1997. -// adapted to QT-Toolkit by Sven Carstens <s.carstens@gmx.de> 2000 - -static unsigned char base64chars[] = - "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,"; -#define UNDEFINED 64 -#define MAXLINE 76 - -/* UTF16 definitions */ -#define UTF16MASK 0x03FFUL -#define UTF16SHIFT 10 -#define UTF16BASE 0x10000UL -#define UTF16HIGHSTART 0xD800UL -#define UTF16HIGHEND 0xDBFFUL -#define UTF16LOSTART 0xDC00UL -#define UTF16LOEND 0xDFFFUL - -/* Convert an IMAP mailbox to a Unicode path - */ -TQString rfcDecoder::fromIMAP (const TQString & inSrc) -{ - unsigned char c, i, bitcount; - unsigned long ucs4, utf16, bitbuf; - unsigned char base64[256], utf8[6]; - unsigned long srcPtr = 0; - TQCString dst; - TQCString src = inSrc.ascii (); - uint srcLen = inSrc.length(); - - /* initialize modified base64 decoding table */ - memset (base64, UNDEFINED, sizeof (base64)); - for (i = 0; i < sizeof (base64chars); ++i) - { - base64[(int)base64chars[i]] = i; - } - - /* loop until end of string */ - while (srcPtr < srcLen) - { - c = src[srcPtr++]; - /* deal with literal characters and &- */ - if (c != '&' || src[srcPtr] == '-') - { - /* encode literally */ - dst += c; - /* skip over the '-' if this is an &- sequence */ - if (c == '&') - srcPtr++; - } - else - { - /* convert modified UTF-7 -> UTF-16 -> UCS-4 -> UTF-8 -> HEX */ - bitbuf = 0; - bitcount = 0; - ucs4 = 0; - while ((c = base64[(unsigned char) src[srcPtr]]) != UNDEFINED) - { - ++srcPtr; - bitbuf = (bitbuf << 6) | c; - bitcount += 6; - /* enough bits for a UTF-16 character? */ - if (bitcount >= 16) - { - bitcount -= 16; - utf16 = (bitcount ? bitbuf >> bitcount : bitbuf) & 0xffff; - /* convert UTF16 to UCS4 */ - if (utf16 >= UTF16HIGHSTART && utf16 <= UTF16HIGHEND) - { - ucs4 = (utf16 - UTF16HIGHSTART) << UTF16SHIFT; - continue; - } - else if (utf16 >= UTF16LOSTART && utf16 <= UTF16LOEND) - { - ucs4 += utf16 - UTF16LOSTART + UTF16BASE; - } - else - { - ucs4 = utf16; - } - /* convert UTF-16 range of UCS4 to UTF-8 */ - if (ucs4 <= 0x7fUL) - { - utf8[0] = ucs4; - i = 1; - } - else if (ucs4 <= 0x7ffUL) - { - utf8[0] = 0xc0 | (ucs4 >> 6); - utf8[1] = 0x80 | (ucs4 & 0x3f); - i = 2; - } - else if (ucs4 <= 0xffffUL) - { - utf8[0] = 0xe0 | (ucs4 >> 12); - utf8[1] = 0x80 | ((ucs4 >> 6) & 0x3f); - utf8[2] = 0x80 | (ucs4 & 0x3f); - i = 3; - } - else - { - utf8[0] = 0xf0 | (ucs4 >> 18); - utf8[1] = 0x80 | ((ucs4 >> 12) & 0x3f); - utf8[2] = 0x80 | ((ucs4 >> 6) & 0x3f); - utf8[3] = 0x80 | (ucs4 & 0x3f); - i = 4; - } - /* copy it */ - for (c = 0; c < i; ++c) - { - dst += utf8[c]; - } - } - } - /* skip over trailing '-' in modified UTF-7 encoding */ - if (src[srcPtr] == '-') - ++srcPtr; - } - } - return TQString::fromUtf8 (dst.data ()); -} - -/* replace " with \" and \ with \\ " and \ characters */ -TQString rfcDecoder::quoteIMAP(const TQString &src) -{ - uint len = src.length(); - TQString result; - result.reserve(2 * len); - for (unsigned int i = 0; i < len; i++) - { - if (src[i] == '"' || src[i] == '\\') - result += '\\'; - result += src[i]; - } - //result.squeeze(); - unnecessary and slow - return result; -} - -/* Convert Unicode path to modified UTF-7 IMAP mailbox - */ -TQString rfcDecoder::toIMAP (const TQString & inSrc) -{ - unsigned int utf8pos, utf8total, c, utf7mode, bitstogo, utf16flag; - unsigned long ucs4, bitbuf; - TQCString src = inSrc.utf8 (); - TQString dst; - - ulong srcPtr = 0; - utf7mode = 0; - utf8total = 0; - bitstogo = 0; - utf8pos = 0; - bitbuf = 0; - ucs4 = 0; - while (srcPtr < src.length ()) - { - c = (unsigned char) src[srcPtr++]; - /* normal character? */ - if (c >= ' ' && c <= '~') - { - /* switch out of UTF-7 mode */ - if (utf7mode) - { - if (bitstogo) - { - dst += base64chars[(bitbuf << (6 - bitstogo)) & 0x3F]; - bitstogo = 0; - } - dst += '-'; - utf7mode = 0; - } - dst += c; - /* encode '&' as '&-' */ - if (c == '&') - { - dst += '-'; - } - continue; - } - /* switch to UTF-7 mode */ - if (!utf7mode) - { - dst += '&'; - utf7mode = 1; - } - /* Encode US-ASCII characters as themselves */ - if (c < 0x80) - { - ucs4 = c; - utf8total = 1; - } - else if (utf8total) - { - /* save UTF8 bits into UCS4 */ - ucs4 = (ucs4 << 6) | (c & 0x3FUL); - if (++utf8pos < utf8total) - { - continue; - } - } - else - { - utf8pos = 1; - if (c < 0xE0) - { - utf8total = 2; - ucs4 = c & 0x1F; - } - else if (c < 0xF0) - { - utf8total = 3; - ucs4 = c & 0x0F; - } - else - { - /* NOTE: can't convert UTF8 sequences longer than 4 */ - utf8total = 4; - ucs4 = c & 0x03; - } - continue; - } - /* loop to split ucs4 into two utf16 chars if necessary */ - utf8total = 0; - do - { - if (ucs4 >= UTF16BASE) - { - ucs4 -= UTF16BASE; - bitbuf = (bitbuf << 16) | ((ucs4 >> UTF16SHIFT) + UTF16HIGHSTART); - ucs4 = (ucs4 & UTF16MASK) + UTF16LOSTART; - utf16flag = 1; - } - else - { - bitbuf = (bitbuf << 16) | ucs4; - utf16flag = 0; - } - bitstogo += 16; - /* spew out base64 */ - while (bitstogo >= 6) - { - bitstogo -= 6; - dst += base64chars[(bitstogo ? (bitbuf >> bitstogo) : bitbuf) & 0x3F]; - } - } - while (utf16flag); - } - /* if in UTF-7 mode, finish in ASCII */ - if (utf7mode) - { - if (bitstogo) - { - dst += base64chars[(bitbuf << (6 - bitstogo)) & 0x3F]; - } - dst += '-'; - } - return quoteIMAP(dst); -} - -//----------------------------------------------------------------------------- -TQString rfcDecoder::decodeQuoting(const TQString &aStr) -{ - TQString result; - unsigned int strLength(aStr.length()); - for (unsigned int i = 0; i < strLength ; i++) - { - if (aStr[i] == "\\") i++; - result += aStr[i]; - } - return result; -} - -//----------------------------------------------------------------------------- -TQTextCodec * -rfcDecoder::codecForName (const TQString & _str) -{ - if (_str.isEmpty ()) - return NULL; - return TQTextCodec::codecForName (_str.lower (). - replace ("windows", "cp").latin1 ()); -} - -//----------------------------------------------------------------------------- -const TQString -rfcDecoder::decodeRFC2047String (const TQString & _str) -{ - TQString throw_away; - - return decodeRFC2047String (_str, throw_away); -} - -//----------------------------------------------------------------------------- -const TQString -rfcDecoder::decodeRFC2047String (const TQString & _str, TQString & charset) -{ - TQString throw_away; - - return decodeRFC2047String (_str, charset, throw_away); -} - -//----------------------------------------------------------------------------- -const TQString -rfcDecoder::decodeRFC2047String (const TQString & _str, TQString & charset, - TQString & language) -{ - //do we have a rfc string - if (_str.find("=?") < 0) - return _str; - - TQCString aStr = _str.ascii (); // TQString.length() means Unicode chars - TQCString result; - char *pos, *beg, *end, *mid = NULL; - TQCString str; - char encoding = 0, ch; - bool valid; - const int maxLen = 200; - int i; - -// result.truncate(aStr.length()); - for (pos = aStr.data (); *pos; pos++) - { - if (pos[0] != '=' || pos[1] != '?') - { - result += *pos; - continue; - } - beg = pos + 2; - end = beg; - valid = TRUE; - // parse charset name - for (i = 2, pos += 2; - i < maxLen && (*pos != '?' && (ispunct (*pos) || isalnum (*pos))); - i++) - pos++; - if (*pos != '?' || i < 4 || i >= maxLen) - valid = FALSE; - else - { - charset = TQCString (beg, i - 1); // -2 + 1 for the zero - int pt = charset.findRev('*'); - if (pt != -1) - { - // save language for later usage - language = charset.right (charset.length () - pt - 1); - - // tie off language as defined in rfc2047 - charset.truncate(pt); - } - // get encoding and check delimiting question marks - encoding = toupper (pos[1]); - if (pos[2] != '?' - || (encoding != 'Q' && encoding != 'B' && encoding != 'q' - && encoding != 'b')) - valid = FALSE; - pos += 3; - i += 3; -// kdDebug(7116) << "rfcDecoder::decodeRFC2047String - charset " << charset << " - language " << language << " - '" << pos << "'" << endl; - } - if (valid) - { - mid = pos; - // search for end of encoded part - while (i < maxLen && *pos && !(*pos == '?' && *(pos + 1) == '=')) - { - i++; - pos++; - } - end = pos + 2; //end now points to the first char after the encoded string - if (i >= maxLen || !*pos) - valid = FALSE; - } - if (valid) - { - ch = *pos; - *pos = '\0'; - str = TQCString (mid).left ((int) (mid - pos - 1)); - if (encoding == 'Q') - { - // decode quoted printable text - for (i = str.length () - 1; i >= 0; i--) - if (str[i] == '_') - str[i] = ' '; -// kdDebug(7116) << "rfcDecoder::decodeRFC2047String - before QP '" << str << "'" << endl; - - str = KCodecs::quotedPrintableDecode(str); -// kdDebug(7116) << "rfcDecoder::decodeRFC2047String - after QP '" << str << "'" << endl; - } - else - { - // decode base64 text - str = KCodecs::base64Decode(str); - } - *pos = ch; - int len = str.length(); - for (i = 0; i < len; i++) - result += (char) (TQChar) str[i]; - - pos = end - 1; - } - else - { -// kdDebug(7116) << "rfcDecoder::decodeRFC2047String - invalid" << endl; - //result += "=?"; - //pos = beg -1; // because pos gets increased shortly afterwards - pos = beg - 2; - result += *pos++; - result += *pos; - } - } - if (!charset.isEmpty ()) - { - TQTextCodec *aCodec = codecForName (charset.ascii ()); - if (aCodec) - { -// kdDebug(7116) << "Codec is " << aCodec->name() << endl; - return aCodec->toUnicode (result); - } - } - return result; -} - - -//----------------------------------------------------------------------------- -const char especials[17] = "()<>@,;:\"/[]?.= "; - -const TQString -rfcDecoder::encodeRFC2047String (const TQString & _str) -{ - if (_str.isEmpty ()) - return _str; - const signed char *latin = reinterpret_cast<const signed char *>(_str.latin1()), *l, *start, *stop; - char hexcode; - int numQuotes, i; - int rptr = 0; - // My stats show this number results in 12 resize() out of 73,000 - int resultLen = 3 * _str.length() / 2; - TQCString result(resultLen); - - while (*latin) - { - l = latin; - start = latin; - while (*l) - { - if (*l == 32) - start = l + 1; - if (*l < 0) - break; - l++; - } - if (*l) - { - numQuotes = 1; - while (*l) - { - /* The encoded word must be limited to 75 character */ - for (i = 0; i < 16; i++) - if (*l == especials[i]) - numQuotes++; - if (*l < 0) - numQuotes++; - /* Stop after 58 = 75 - 17 characters or at "<user@host..." */ - if (l - start + 2 * numQuotes >= 58 || *l == 60) - break; - l++; - } - if (*l) - { - stop = l - 1; - while (stop >= start && *stop != 32) - stop--; - if (stop <= start) - stop = l; - } - else - stop = l; - if (resultLen - rptr - 1 <= start - latin + 1 + 16 /* =?iso-88... */) { - resultLen += (start - latin + 1) * 2 + 20; // more space - result.resize(resultLen); - } - while (latin < start) - { - result[rptr++] = *latin; - latin++; - } - strcpy(&result[rptr], "=?iso-8859-1?q?"); rptr += 15; - if (resultLen - rptr - 1 <= 3*(stop - latin + 1)) { - resultLen += (stop - latin + 1) * 4 + 20; // more space - result.resize(resultLen); - } - while (latin < stop) // can add up to 3 chars/iteration - { - numQuotes = 0; - for (i = 0; i < 16; i++) - if (*latin == especials[i]) - numQuotes = 1; - if (*latin < 0) - numQuotes = 1; - if (numQuotes) - { - result[rptr++] = '='; - hexcode = ((*latin & 0xF0) >> 4) + 48; - if (hexcode >= 58) - hexcode += 7; - result[rptr++] = hexcode; - hexcode = (*latin & 0x0F) + 48; - if (hexcode >= 58) - hexcode += 7; - result[rptr++] = hexcode; - } - else - { - result[rptr++] = *latin; - } - latin++; - } - result[rptr++] = '?'; - result[rptr++] = '='; - } - else - { - while (*latin) - { - if (rptr == resultLen - 1) { - resultLen += 30; - result.resize(resultLen); - } - result[rptr++] = *latin; - latin++; - } - } - } - result[rptr] = 0; - //free (latinStart); - return result; -} - - -//----------------------------------------------------------------------------- -const TQString -rfcDecoder::encodeRFC2231String (const TQString & _str) -{ - if (_str.isEmpty ()) - return _str; - signed char *latin = (signed char *) calloc (1, _str.length () + 1); - char *latin_us = (char *) latin; - strcpy (latin_us, _str.latin1 ()); - signed char *l = latin; - char hexcode; - int i; - bool quote; - while (*l) - { - if (*l < 0) - break; - l++; - } - if (!*l) { - free(latin); - return _str.ascii (); - } - TQCString result; - l = latin; - while (*l) - { - quote = *l < 0; - for (i = 0; i < 16; i++) - if (*l == especials[i]) - quote = true; - if (quote) - { - result += "%"; - hexcode = ((*l & 0xF0) >> 4) + 48; - if (hexcode >= 58) - hexcode += 7; - result += hexcode; - hexcode = (*l & 0x0F) + 48; - if (hexcode >= 58) - hexcode += 7; - result += hexcode; - } - else - { - result += *l; - } - l++; - } - free (latin); - return result; -} - - -//----------------------------------------------------------------------------- -const TQString -rfcDecoder::decodeRFC2231String (const TQString & _str) -{ - int p = _str.find ('\''); - - //see if it is an rfc string - if (p < 0) - return _str; - - int l = _str.findRev ('\''); - - //second is language - if (p >= l) - return _str; - - //first is charset or empty - TQString charset = _str.left (p); - TQString st = _str.mid (l + 1); - TQString language = _str.mid (p + 1, l - p - 1); - - //kdDebug(7116) << "Charset: " << charset << " Language: " << language << endl; - - char ch, ch2; - p = 0; - while (p < (int) st.length ()) - { - if (st.at (p) == 37) - { - ch = st.at (p + 1).latin1 () - 48; - if (ch > 16) - ch -= 7; - ch2 = st.at (p + 2).latin1 () - 48; - if (ch2 > 16) - ch2 -= 7; - st.at (p) = ch * 16 + ch2; - st.remove (p + 1, 2); - } - p++; - } - return st; -} |