decodeRFC2047.cpp

00001 /***************************************************************************
00002                           decodeRFC2047.cpp  -  description
00003                              -------------------
00004     begin                : Mon Jan 28 2002
00005     copyright            : (C) 2002 by Eggert Ehmke
00006     email                : eggert.ehmke@berlin.de
00007  ***************************************************************************/
00008 
00009 /***************************************************************************
00010  *                                                                         *
00011  *   This program is free software; you can redistribute it and/or modify  *
00012  *   it under the terms of the GNU General Public License as published by  *
00013  *   the Free Software Foundation; either version 2 of the License, or     *
00014  *   (at your option) any later version.                                   *
00015  *                                                                         *
00016  ***************************************************************************/
00017 
00018 
00019 #include "decodeRFC2047.h"
00020 
00021 /*
00022  * These functions have been adapted from the KMail program
00023  */
00024 
00025 TQCString decodeQuotedPrintable(const TQCString& aStr)
00026 {
00027   TQCString bStr = aStr;
00028   if (aStr.isNull())
00029     bStr = "";
00030 
00031   DwString dwsrc(bStr.data());
00032   DwString dwdest;
00033 
00034   DwDecodeQuotedPrintable(dwsrc, dwdest);
00035   return dwdest.c_str();
00036 }
00037 
00038 TQCString decodeBase64(const TQCString& aStr)
00039 {
00040   TQCString bStr = aStr;
00041   if (aStr.isNull())
00042     bStr = "";
00043   while (bStr.length() < 16) bStr += "=";
00044 
00045   DwString dwsrc(bStr.data(), bStr.length());
00046   DwString dwdest;
00047   TQCString result;
00048 
00049   DwDecodeBase64(dwsrc, dwdest);
00050   result = dwdest.c_str();
00051   return result;
00052 }
00053 
00054 TQTextCodec* codecForName(const TQCString& _str)
00055 {
00056   if (_str.isEmpty()) return NULL;
00057   if (_str.lower() == "shift_jis" || _str.lower() == "shift-jis")
00058     return TQTextCodec::codecForName("sjis");
00059   return TQTextCodec::codecForName(_str.lower().replace(
00060     TQRegExp("windows"), "cp") );
00061 }
00062 
00063 TQString Codecs::decodeRFC2047(const TQCString& aStr)
00064 {
00065   TQString result;
00066   TQCString charset;
00067   char *pos, *beg, *end, *mid;
00068   TQCString str, cstr, LWSP_buffer;
00069   char encoding, ch;
00070   bool valid, lastWasEncodedWord=FALSE;
00071   const int maxLen=200;
00072   int i;
00073 
00074   if (aStr.find("=?") < 0)
00075     return TQString::fromLocal8Bit(aStr).replace(TQRegExp("\n[\t ]")," ");
00076 
00077   for (pos=aStr.data(); *pos; pos++)
00078   {
00079     // line unfolding
00080     if ( pos[0] == '\r' && pos[1] == '\n' ) {
00081       pos++;
00082       continue;
00083     }
00084     if ( pos[0] == '\n' )
00085       continue;
00086     // collect LWSP after encoded-words,
00087     // because we might need to throw it out
00088     // (when the next word is an encoded-word)
00089     if ( lastWasEncodedWord && ( pos[0] == ' ' || pos[0] == '\t' ) )
00090     {
00091       LWSP_buffer += pos[0];
00092       continue;
00093     }
00094     // verbatimly copy normal text
00095     if (pos[0]!='=' || pos[1]!='?')
00096     {
00097       result += LWSP_buffer + pos[0];
00098       LWSP_buffer = 0;
00099       lastWasEncodedWord = FALSE;
00100       continue;
00101     }
00102     // found possible encoded-word
00103     beg = pos+2;
00104     end = beg;
00105     valid = TRUE;
00106     // parse charset name
00107     charset = "";
00108     for (i=2,pos+=2; i<maxLen && (*pos!='?'&&(*pos==' '||ispunct(*pos)||isalnum(*pos))); i++)
00109     {
00110       charset += *pos;
00111       pos++;
00112     }
00113     if (*pos!='?' || i<4 || i>=maxLen) valid = FALSE;
00114     else
00115     {
00116       // get encoding and check delimiting question marks
00117       encoding = toupper(pos[1]);
00118       if (pos[2]!='?' || (encoding!='Q' && encoding!='B'))
00119                             valid = FALSE;
00120       pos+=3;
00121       i+=3;
00122     }
00123     if (valid)
00124     {
00125       mid = pos;
00126       // search for end of encoded part
00127       while (i<maxLen && *pos && !(*pos=='?' && *(pos+1)=='='))
00128       {
00129                             i++;
00130                             pos++;
00131       }
00132       end = pos+2;//end now points to the first char after the encoded string
00133       if (i>=maxLen || !*pos)
00134        valid = FALSE;
00135     }
00136     if (valid)
00137     {
00138       // valid encoding: decode and throw away separating LWSP
00139       ch = *pos;
00140       *pos = '\0';
00141       str = TQCString(mid).left((int)(mid - pos - 1));
00142       if (encoding == 'Q')
00143       {
00144                             // decode quoted printable text
00145                             for (i=str.length()-1; i>=0; i--)
00146                      if (str[i]=='_')
00147                             str[i]=' ';
00148                             cstr = decodeQuotedPrintable(str);
00149       }
00150       else
00151       {
00152                             // decode base64 text
00153                             cstr = decodeBase64(str);
00154       }
00155       TQTextCodec *codec = codecForName(charset);
00156       if (!codec)
00157        codec = codecForName(TDEGlobal::locale()->encoding());
00158       if (codec)
00159        result += codec->toUnicode(cstr);
00160       else
00161        result += TQString::fromLocal8Bit(cstr);
00162       lastWasEncodedWord = TRUE;
00163 
00164       *pos = ch;
00165       pos = end -1;
00166     }
00167     else
00168     {
00169       // invalid encoding, keep separating LWSP.
00170       //result += "=?";
00171       //pos = beg -1; // because pos gets increased shortly afterwards
00172       pos = beg - 2;
00173       result += LWSP_buffer;
00174       result += *pos++;
00175       result += *pos;
00176       lastWasEncodedWord = FALSE;
00177     }
00178     LWSP_buffer = 0;
00179   }
00180   return result;
00181 }
00182 

Generated on Thu Jul 5 19:36:06 2007 for kshowmail by  doxygen 1.5.0