diff options
Diffstat (limited to 'kioslaves/imap4/rfcdecoder.cc')
-rw-r--r-- | kioslaves/imap4/rfcdecoder.cc | 668 |
1 files changed, 668 insertions, 0 deletions
diff --git a/kioslaves/imap4/rfcdecoder.cc b/kioslaves/imap4/rfcdecoder.cc new file mode 100644 index 000000000..0e2bc9f73 --- /dev/null +++ b/kioslaves/imap4/rfcdecoder.cc @@ -0,0 +1,668 @@ +/********************************************************************** + * + * rfcdecoder.cc - handler for various rfc/mime encodings + * Copyright (C) 2000 s.carstens@gmx.de + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Send comments and bug fixes to s.carstens@gmx.de + * + *********************************************************************/ +#include "rfcdecoder.h" + +#include <ctype.h> +#include <sys/types.h> + +#include <stdio.h> +#include <stdlib.h> + +#include <qtextcodec.h> +#include <qbuffer.h> +#include <qregexp.h> +#include <kmdcodec.h> + +// This part taken from rfc 2192 IMAP URL Scheme. C. Newman. September 1997. +// adapted to QT-Toolkit by Sven Carstens <s.carstens@gmx.de> 2000 + +static unsigned char base64chars[] = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,"; +#define UNDEFINED 64 +#define MAXLINE 76 + +/* UTF16 definitions */ +#define UTF16MASK 0x03FFUL +#define UTF16SHIFT 10 +#define UTF16BASE 0x10000UL +#define UTF16HIGHSTART 0xD800UL +#define UTF16HIGHEND 0xDBFFUL +#define UTF16LOSTART 0xDC00UL +#define UTF16LOEND 0xDFFFUL + +/* Convert an IMAP mailbox to a Unicode path + */ +QString rfcDecoder::fromIMAP (const QString & inSrc) +{ + unsigned char c, i, bitcount; + unsigned long ucs4, utf16, bitbuf; + unsigned char base64[256], utf8[6]; + unsigned long srcPtr = 0; + QCString dst; + QCString src = inSrc.ascii (); + uint srcLen = inSrc.length(); + + /* initialize modified base64 decoding table */ + memset (base64, UNDEFINED, sizeof (base64)); + for (i = 0; i < sizeof (base64chars); ++i) + { + base64[(int)base64chars[i]] = i; + } + + /* loop until end of string */ + while (srcPtr < srcLen) + { + c = src[srcPtr++]; + /* deal with literal characters and &- */ + if (c != '&' || src[srcPtr] == '-') + { + /* encode literally */ + dst += c; + /* skip over the '-' if this is an &- sequence */ + if (c == '&') + srcPtr++; + } + else + { + /* convert modified UTF-7 -> UTF-16 -> UCS-4 -> UTF-8 -> HEX */ + bitbuf = 0; + bitcount = 0; + ucs4 = 0; + while ((c = base64[(unsigned char) src[srcPtr]]) != UNDEFINED) + { + ++srcPtr; + bitbuf = (bitbuf << 6) | c; + bitcount += 6; + /* enough bits for a UTF-16 character? */ + if (bitcount >= 16) + { + bitcount -= 16; + utf16 = (bitcount ? bitbuf >> bitcount : bitbuf) & 0xffff; + /* convert UTF16 to UCS4 */ + if (utf16 >= UTF16HIGHSTART && utf16 <= UTF16HIGHEND) + { + ucs4 = (utf16 - UTF16HIGHSTART) << UTF16SHIFT; + continue; + } + else if (utf16 >= UTF16LOSTART && utf16 <= UTF16LOEND) + { + ucs4 += utf16 - UTF16LOSTART + UTF16BASE; + } + else + { + ucs4 = utf16; + } + /* convert UTF-16 range of UCS4 to UTF-8 */ + if (ucs4 <= 0x7fUL) + { + utf8[0] = ucs4; + i = 1; + } + else if (ucs4 <= 0x7ffUL) + { + utf8[0] = 0xc0 | (ucs4 >> 6); + utf8[1] = 0x80 | (ucs4 & 0x3f); + i = 2; + } + else if (ucs4 <= 0xffffUL) + { + utf8[0] = 0xe0 | (ucs4 >> 12); + utf8[1] = 0x80 | ((ucs4 >> 6) & 0x3f); + utf8[2] = 0x80 | (ucs4 & 0x3f); + i = 3; + } + else + { + utf8[0] = 0xf0 | (ucs4 >> 18); + utf8[1] = 0x80 | ((ucs4 >> 12) & 0x3f); + utf8[2] = 0x80 | ((ucs4 >> 6) & 0x3f); + utf8[3] = 0x80 | (ucs4 & 0x3f); + i = 4; + } + /* copy it */ + for (c = 0; c < i; ++c) + { + dst += utf8[c]; + } + } + } + /* skip over trailing '-' in modified UTF-7 encoding */ + if (src[srcPtr] == '-') + ++srcPtr; + } + } + return QString::fromUtf8 (dst.data ()); +} + +/* replace " with \" and \ with \\ " and \ characters */ +QString rfcDecoder::quoteIMAP(const QString &src) +{ + uint len = src.length(); + QString result; + result.reserve(2 * len); + for (unsigned int i = 0; i < len; i++) + { + if (src[i] == '"' || src[i] == '\\') + result += '\\'; + result += src[i]; + } + //result.squeeze(); - unnecessary and slow + return result; +} + +/* Convert Unicode path to modified UTF-7 IMAP mailbox + */ +QString rfcDecoder::toIMAP (const QString & inSrc) +{ + unsigned int utf8pos, utf8total, c, utf7mode, bitstogo, utf16flag; + unsigned long ucs4, bitbuf; + QCString src = inSrc.utf8 (); + QString dst; + + ulong srcPtr = 0; + utf7mode = 0; + utf8total = 0; + bitstogo = 0; + utf8pos = 0; + bitbuf = 0; + ucs4 = 0; + while (srcPtr < src.length ()) + { + c = (unsigned char) src[srcPtr++]; + /* normal character? */ + if (c >= ' ' && c <= '~') + { + /* switch out of UTF-7 mode */ + if (utf7mode) + { + if (bitstogo) + { + dst += base64chars[(bitbuf << (6 - bitstogo)) & 0x3F]; + bitstogo = 0; + } + dst += '-'; + utf7mode = 0; + } + dst += c; + /* encode '&' as '&-' */ + if (c == '&') + { + dst += '-'; + } + continue; + } + /* switch to UTF-7 mode */ + if (!utf7mode) + { + dst += '&'; + utf7mode = 1; + } + /* Encode US-ASCII characters as themselves */ + if (c < 0x80) + { + ucs4 = c; + utf8total = 1; + } + else if (utf8total) + { + /* save UTF8 bits into UCS4 */ + ucs4 = (ucs4 << 6) | (c & 0x3FUL); + if (++utf8pos < utf8total) + { + continue; + } + } + else + { + utf8pos = 1; + if (c < 0xE0) + { + utf8total = 2; + ucs4 = c & 0x1F; + } + else if (c < 0xF0) + { + utf8total = 3; + ucs4 = c & 0x0F; + } + else + { + /* NOTE: can't convert UTF8 sequences longer than 4 */ + utf8total = 4; + ucs4 = c & 0x03; + } + continue; + } + /* loop to split ucs4 into two utf16 chars if necessary */ + utf8total = 0; + do + { + if (ucs4 >= UTF16BASE) + { + ucs4 -= UTF16BASE; + bitbuf = (bitbuf << 16) | ((ucs4 >> UTF16SHIFT) + UTF16HIGHSTART); + ucs4 = (ucs4 & UTF16MASK) + UTF16LOSTART; + utf16flag = 1; + } + else + { + bitbuf = (bitbuf << 16) | ucs4; + utf16flag = 0; + } + bitstogo += 16; + /* spew out base64 */ + while (bitstogo >= 6) + { + bitstogo -= 6; + dst += base64chars[(bitstogo ? (bitbuf >> bitstogo) : bitbuf) & 0x3F]; + } + } + while (utf16flag); + } + /* if in UTF-7 mode, finish in ASCII */ + if (utf7mode) + { + if (bitstogo) + { + dst += base64chars[(bitbuf << (6 - bitstogo)) & 0x3F]; + } + dst += '-'; + } + return quoteIMAP(dst); +} + +//----------------------------------------------------------------------------- +QString rfcDecoder::decodeQuoting(const QString &aStr) +{ + QString result; + unsigned int strLength(aStr.length()); + for (unsigned int i = 0; i < strLength ; i++) + { + if (aStr[i] == "\\") i++; + result += aStr[i]; + } + return result; +} + +//----------------------------------------------------------------------------- +QTextCodec * +rfcDecoder::codecForName (const QString & _str) +{ + if (_str.isEmpty ()) + return NULL; + return QTextCodec::codecForName (_str.lower (). + replace ("windows", "cp").latin1 ()); +} + +//----------------------------------------------------------------------------- +const QString +rfcDecoder::decodeRFC2047String (const QString & _str) +{ + QString throw_away; + + return decodeRFC2047String (_str, throw_away); +} + +//----------------------------------------------------------------------------- +const QString +rfcDecoder::decodeRFC2047String (const QString & _str, QString & charset) +{ + QString throw_away; + + return decodeRFC2047String (_str, charset, throw_away); +} + +//----------------------------------------------------------------------------- +const QString +rfcDecoder::decodeRFC2047String (const QString & _str, QString & charset, + QString & language) +{ + //do we have a rfc string + if (_str.find("=?") < 0) + return _str; + + QCString aStr = _str.ascii (); // QString.length() means Unicode chars + QCString result; + char *pos, *beg, *end, *mid = NULL; + QCString str; + char encoding = 0, ch; + bool valid; + const int maxLen = 200; + int i; + +// result.truncate(aStr.length()); + for (pos = aStr.data (); *pos; pos++) + { + if (pos[0] != '=' || pos[1] != '?') + { + result += *pos; + continue; + } + beg = pos + 2; + end = beg; + valid = TRUE; + // parse charset name + for (i = 2, pos += 2; + i < maxLen && (*pos != '?' && (ispunct (*pos) || isalnum (*pos))); + i++) + pos++; + if (*pos != '?' || i < 4 || i >= maxLen) + valid = FALSE; + else + { + charset = QCString (beg, i - 1); // -2 + 1 for the zero + int pt = charset.findRev('*'); + if (pt != -1) + { + // save language for later usage + language = charset.right (charset.length () - pt - 1); + + // tie off language as defined in rfc2047 + charset.truncate(pt); + } + // get encoding and check delimiting question marks + encoding = toupper (pos[1]); + if (pos[2] != '?' + || (encoding != 'Q' && encoding != 'B' && encoding != 'q' + && encoding != 'b')) + valid = FALSE; + pos += 3; + i += 3; +// kdDebug(7116) << "rfcDecoder::decodeRFC2047String - charset " << charset << " - language " << language << " - '" << pos << "'" << endl; + } + if (valid) + { + mid = pos; + // search for end of encoded part + while (i < maxLen && *pos && !(*pos == '?' && *(pos + 1) == '=')) + { + i++; + pos++; + } + end = pos + 2; //end now points to the first char after the encoded string + if (i >= maxLen || !*pos) + valid = FALSE; + } + if (valid) + { + ch = *pos; + *pos = '\0'; + str = QCString (mid).left ((int) (mid - pos - 1)); + if (encoding == 'Q') + { + // decode quoted printable text + for (i = str.length () - 1; i >= 0; i--) + if (str[i] == '_') + str[i] = ' '; +// kdDebug(7116) << "rfcDecoder::decodeRFC2047String - before QP '" << str << "'" << endl; + + str = KCodecs::quotedPrintableDecode(str); +// kdDebug(7116) << "rfcDecoder::decodeRFC2047String - after QP '" << str << "'" << endl; + } + else + { + // decode base64 text + str = KCodecs::base64Decode(str); + } + *pos = ch; + int len = str.length(); + for (i = 0; i < len; i++) + result += (char) (QChar) str[i]; + + pos = end - 1; + } + else + { +// kdDebug(7116) << "rfcDecoder::decodeRFC2047String - invalid" << endl; + //result += "=?"; + //pos = beg -1; // because pos gets increased shortly afterwards + pos = beg - 2; + result += *pos++; + result += *pos; + } + } + if (!charset.isEmpty ()) + { + QTextCodec *aCodec = codecForName (charset.ascii ()); + if (aCodec) + { +// kdDebug(7116) << "Codec is " << aCodec->name() << endl; + return aCodec->toUnicode (result); + } + } + return result; +} + + +//----------------------------------------------------------------------------- +const char especials[17] = "()<>@,;:\"/[]?.= "; + +const QString +rfcDecoder::encodeRFC2047String (const QString & _str) +{ + if (_str.isEmpty ()) + return _str; + const signed char *latin = reinterpret_cast<const signed char *>(_str.latin1()), *l, *start, *stop; + char hexcode; + int numQuotes, i; + int rptr = 0; + // My stats show this number results in 12 resize() out of 73,000 + int resultLen = 3 * _str.length() / 2; + QCString result(resultLen); + + while (*latin) + { + l = latin; + start = latin; + while (*l) + { + if (*l == 32) + start = l + 1; + if (*l < 0) + break; + l++; + } + if (*l) + { + numQuotes = 1; + while (*l) + { + /* The encoded word must be limited to 75 character */ + for (i = 0; i < 16; i++) + if (*l == especials[i]) + numQuotes++; + if (*l < 0) + numQuotes++; + /* Stop after 58 = 75 - 17 characters or at "<user@host..." */ + if (l - start + 2 * numQuotes >= 58 || *l == 60) + break; + l++; + } + if (*l) + { + stop = l - 1; + while (stop >= start && *stop != 32) + stop--; + if (stop <= start) + stop = l; + } + else + stop = l; + if (resultLen - rptr - 1 <= start - latin + 1 + 16 /* =?iso-88... */) { + resultLen += (start - latin + 1) * 2 + 20; // more space + result.resize(resultLen); + } + while (latin < start) + { + result[rptr++] = *latin; + latin++; + } + strcpy(&result[rptr], "=?iso-8859-1?q?"); rptr += 15; + if (resultLen - rptr - 1 <= 3*(stop - latin + 1)) { + resultLen += (stop - latin + 1) * 4 + 20; // more space + result.resize(resultLen); + } + while (latin < stop) // can add up to 3 chars/iteration + { + numQuotes = 0; + for (i = 0; i < 16; i++) + if (*latin == especials[i]) + numQuotes = 1; + if (*latin < 0) + numQuotes = 1; + if (numQuotes) + { + result[rptr++] = '='; + hexcode = ((*latin & 0xF0) >> 4) + 48; + if (hexcode >= 58) + hexcode += 7; + result[rptr++] = hexcode; + hexcode = (*latin & 0x0F) + 48; + if (hexcode >= 58) + hexcode += 7; + result[rptr++] = hexcode; + } + else + { + result[rptr++] = *latin; + } + latin++; + } + result[rptr++] = '?'; + result[rptr++] = '='; + } + else + { + while (*latin) + { + if (rptr == resultLen - 1) { + resultLen += 30; + result.resize(resultLen); + } + result[rptr++] = *latin; + latin++; + } + } + } + result[rptr] = 0; + //free (latinStart); + return result; +} + + +//----------------------------------------------------------------------------- +const QString +rfcDecoder::encodeRFC2231String (const QString & _str) +{ + if (_str.isEmpty ()) + return _str; + signed char *latin = (signed char *) calloc (1, _str.length () + 1); + char *latin_us = (char *) latin; + strcpy (latin_us, _str.latin1 ()); + signed char *l = latin; + char hexcode; + int i; + bool quote; + while (*l) + { + if (*l < 0) + break; + l++; + } + if (!*l) { + free(latin); + return _str.ascii (); + } + QCString result; + l = latin; + while (*l) + { + quote = *l < 0; + for (i = 0; i < 16; i++) + if (*l == especials[i]) + quote = true; + if (quote) + { + result += "%"; + hexcode = ((*l & 0xF0) >> 4) + 48; + if (hexcode >= 58) + hexcode += 7; + result += hexcode; + hexcode = (*l & 0x0F) + 48; + if (hexcode >= 58) + hexcode += 7; + result += hexcode; + } + else + { + result += *l; + } + l++; + } + free (latin); + return result; +} + + +//----------------------------------------------------------------------------- +const QString +rfcDecoder::decodeRFC2231String (const QString & _str) +{ + int p = _str.find ('\''); + + //see if it is an rfc string + if (p < 0) + return _str; + + int l = _str.findRev ('\''); + + //second is language + if (p >= l) + return _str; + + //first is charset or empty + QString charset = _str.left (p); + QString st = _str.mid (l + 1); + QString language = _str.mid (p + 1, l - p - 1); + + //kdDebug(7116) << "Charset: " << charset << " Language: " << language << endl; + + char ch, ch2; + p = 0; + while (p < (int) st.length ()) + { + if (st.at (p) == 37) + { + ch = st.at (p + 1).latin1 () - 48; + if (ch > 16) + ch -= 7; + ch2 = st.at (p + 2).latin1 () - 48; + if (ch2 > 16) + ch2 -= 7; + st.at (p) = ch * 16 + ch2; + st.remove (p + 1, 2); + } + p++; + } + return st; +} |